diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 4a586d7f0fd..03e8b58e16c 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -2535,3 +2535,56 @@ def sanitize_anthropic_kwargs(api_kwargs: Any, *, log_prefix: str = "") -> Any:
             sorted(leaked),
         )
     return api_kwargs
+
+
+def _is_stream_unavailable_error(exc: Exception) -> bool:
+    """Return True when an Anthropic stream call should fall back to create()."""
+    err_lower = str(exc).lower()
+    if "stream" in err_lower and "not supported" in err_lower:
+        return True
+    if "invokemodelwithresponsestream" in err_lower:
+        from agent.bedrock_adapter import is_streaming_access_denied_error
+
+        return is_streaming_access_denied_error(exc)
+    return False
+
+
+def create_anthropic_message(
+    client: Any,
+    api_kwargs: dict,
+    *,
+    log_prefix: str = "",
+    prefer_stream: bool = True,
+) -> Any:
+    """Create an Anthropic message, aggregating via stream when available.
+
+    Some Anthropic-compatible gateways are SSE-only: they ignore non-streaming
+    requests and return ``text/event-stream`` even for ``messages.create()``.
+    The SDK can surface that as raw text, so callers that expect a Message then
+    crash on ``.content``.  Prefer ``messages.stream().get_final_message()`` to
+    match the main turn path, falling back to ``create()`` only for providers
+    that explicitly do not support streaming, such as restricted Bedrock roles.
+    """
+    sanitize_anthropic_kwargs(api_kwargs, log_prefix=log_prefix)
+
+    messages_api = getattr(client, "messages", None)
+    stream_fn = getattr(messages_api, "stream", None)
+    if prefer_stream and callable(stream_fn):
+        stream_kwargs = dict(api_kwargs)
+        stream_kwargs.pop("stream", None)
+        try:
+            with stream_fn(**stream_kwargs) as stream:
+                return stream.get_final_message()
+        except Exception as exc:
+            if not _is_stream_unavailable_error(exc):
+                raise
+            logger.debug(
+                "%sAnthropic Messages stream unavailable; falling back to "
+                "messages.create(): %s",
+                log_prefix,
+                exc,
+            )
+
+    create_kwargs = dict(api_kwargs)
+    create_kwargs.pop("stream", None)
+    return messages_api.create(**create_kwargs)
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 86a1c765a78..f28b5f60156 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -997,7 +997,7 @@ class _AnthropicCompletionsAdapter:
         self._is_oauth = is_oauth
 
     def create(self, **kwargs) -> Any:
-        from agent.anthropic_adapter import build_anthropic_kwargs
+        from agent.anthropic_adapter import build_anthropic_kwargs, create_anthropic_message
         from agent.transports import get_transport
 
         messages = kwargs.get("messages", [])
@@ -1041,7 +1041,7 @@ class _AnthropicCompletionsAdapter:
             if not _forbids_sampling_params(model):
                 anthropic_kwargs["temperature"] = temperature
 
-        response = self._client.messages.create(**anthropic_kwargs)
+        response = create_anthropic_message(self._client, anthropic_kwargs)
         _transport = get_transport("anthropic_messages")
         _nr = _transport.normalize_response(
             response, strip_tool_prefix=self._is_oauth
diff --git a/agent/codex_runtime.py b/agent/codex_runtime.py
index 7f175fff97f..4ff67871934 100644
--- a/agent/codex_runtime.py
+++ b/agent/codex_runtime.py
@@ -290,6 +290,7 @@ def run_codex_app_server_turn(
                 original_user_message=original_user_message,
                 final_response=turn.final_text,
                 interrupted=False,
+                messages=messages,
             )
         except Exception:
             logger.debug("external memory sync raised", exc_info=True)
diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py
index ef69ac68329..0ccc9649428 100644
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -3197,15 +3197,22 @@ def run_conversation(
                     # Terminal — flush buffered context so the user sees
                     # what was tried before the abort.
                     agent._flush_status_buffer()
+                    # Summarize once: Cloudflare/proxy HTML challenge pages and
+                    # other raw provider bodies must be collapsed to a short
+                    # one-liner here, otherwise the full page leaks into the
+                    # returned ``error`` field and downstream consumers deliver
+                    # it verbatim (e.g. a cron failure notification dumped a
+                    # ~60KB Cloudflare challenge page as 31 Discord messages).
+                    _nonretryable_summary = agent._summarize_api_error(api_error)
                     if classified.reason == FailoverReason.content_policy_blocked:
                         agent._emit_status(
                             f"❌ Provider safety filter blocked this request: "
-                            f"{agent._summarize_api_error(api_error)}"
+                            f"{_nonretryable_summary}"
                         )
                     else:
                         agent._emit_status(
                             f"❌ Non-retryable error (HTTP {status_code}): "
-                            f"{agent._summarize_api_error(api_error)}"
+                            f"{_nonretryable_summary}"
                         )
                     agent._vprint(f"{agent.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.", force=True)
                     agent._vprint(f"{agent.log_prefix}   🔌 Provider: {_provider}  Model: {_model}", force=True)
@@ -3290,18 +3297,17 @@ def run_conversation(
                     else:
                         agent._persist_session(messages, conversation_history)
                     if classified.reason == FailoverReason.content_policy_blocked:
-                        _summary = agent._summarize_api_error(api_error)
                         _policy_response = (
                             "⚠️  The model provider's safety filter blocked this request "
                             "(not a Hermes/gateway failure).\n\n"
-                            f"Provider message: {_summary}\n\n"
+                            f"Provider message: {_nonretryable_summary}\n\n"
                             f"{_CONTENT_POLICY_RECOVERY_HINT}"
                         )
                         return _content_policy_blocked_result(
                             messages,
                             api_call_count,
                             final_response=_policy_response,
-                            error_detail=_summary,
+                            error_detail=_nonretryable_summary,
                         )
                     return {
                         "final_response": None,
@@ -3309,7 +3315,7 @@ def run_conversation(
                         "api_calls": api_call_count,
                         "completed": False,
                         "failed": True,
-                        "error": str(api_error),
+                        "error": _nonretryable_summary,
                     }
 
                 if retry_count >= max_retries:
diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index 04b22c76a68..b791ac4f82c 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -15,6 +15,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple
 
 from hermes_constants import OPENROUTER_BASE_URL
 from hermes_cli.config import load_env
+from agent.secret_scope import get_secret as _get_secret
 from agent.credential_persistence import (
     is_borrowed_credential_source,
     sanitize_borrowed_credential_payload,
@@ -1666,7 +1667,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
         _env_file = load_env()
 
         def _env_val(key: str) -> str:
-            return (_env_file.get(key) or os.environ.get(key) or "").strip()
+            return (_env_file.get(key) or _get_secret(key, "") or "").strip()
 
         anthropic_api_key = _env_val("ANTHROPIC_API_KEY")
         anthropic_oauth_env = (
@@ -1952,7 +1953,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
     # changes to the .env file.
     def _get_env_prefer_dotenv(key: str) -> str:
         env_file = load_env()
-        val = env_file.get(key) or os.environ.get(key) or ""
+        val = env_file.get(key) or _get_secret(key, "") or ""
         return val.strip()
 
     # Honour user suppression — `hermes auth remove <provider> <N>` for an
diff --git a/agent/message_content.py b/agent/message_content.py
new file mode 100644
index 00000000000..c42bf408550
--- /dev/null
+++ b/agent/message_content.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from typing import Any
+
+
+_NON_TEXT_PART_TYPES = {"image", "image_url", "input_image", "audio", "input_audio"}
+_TEXT_KEYS = ("text", "content", "input_text", "output_text", "summary_text")
+
+
+def _field(value: Any, key: str) -> Any:
+    if isinstance(value, Mapping):
+        return value.get(key)
+    return getattr(value, key, None)
+
+
+def _text_from_part(part: Any) -> str:
+    if part is None:
+        return ""
+    if isinstance(part, str):
+        return part
+
+    part_type = str(_field(part, "type") or "").strip().lower()
+    if part_type in _NON_TEXT_PART_TYPES:
+        return ""
+
+    for key in _TEXT_KEYS:
+        text = _field(part, key)
+        if isinstance(text, str):
+            return text
+    return ""
+
+
+def flatten_message_text(content: Any, *, sep: str = "\n") -> str:
+    """Return the visible text from common chat/Responses message content shapes."""
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        chunks = [_text_from_part(part) for part in content]
+        return sep.join(chunk for chunk in chunks if chunk)
+
+    text = _text_from_part(content)
+    if text:
+        return text
+    try:
+        return str(content)
+    except Exception:
+        return ""
diff --git a/agent/secret_scope.py b/agent/secret_scope.py
new file mode 100644
index 00000000000..26022ca9b0e
--- /dev/null
+++ b/agent/secret_scope.py
@@ -0,0 +1,205 @@
+"""Profile-scoped credential resolution for multi-profile gateway multiplexing.
+
+The multiplexing gateway serves many profiles from one process. Each profile
+has its own ``.env`` with its own provider keys and platform tokens, so we
+**cannot** union them into the process-global ``os.environ`` (that would leak
+profile A's keys to profile B's turns, and to every subprocess spawned with
+``env=dict(os.environ)``).
+
+This module provides a fail-closed, context-local secret scope:
+
+- ``set_secret_scope(mapping)`` installs the active profile's secrets for the
+  current task (a contextvar, so it propagates into the agent's worker thread
+  via ``copy_context()`` exactly like the HERMES_HOME override).
+- ``get_secret(name)`` reads from that scope. When multiplexing is **active**
+  and no scope is set, it RAISES rather than silently falling back to
+  ``os.environ`` — an un-migrated or newly-added call site fails loud at that
+  exact line instead of leaking another profile's value. When multiplexing is
+  **off** (the default), it transparently reads ``os.environ`` so the
+  single-profile gateway and every non-gateway caller behave exactly as before.
+
+Design rationale lives in ``docs/design/multiplexing-gateway.md`` (Workstream A).
+"""
+from __future__ import annotations
+
+import os
+from contextvars import ContextVar, Token
+from pathlib import Path
+from typing import Dict, Mapping, Optional
+
+
+# ── multiplex-active flag ────────────────────────────────────────────────
+# Process-global: set once at gateway startup when gateway.multiplex_profiles
+# is true. Governs whether get_secret() fails closed on an unscoped read.
+# A plain module global (not a contextvar): it describes the deployment mode,
+# not a per-task value.
+_MULTIPLEX_ACTIVE: bool = False
+
+
+def set_multiplex_active(active: bool) -> None:
+    """Mark whether the process is running as a profile multiplexer.
+
+    Called once at gateway startup. When True, ``get_secret`` fails closed on
+    an unscoped read instead of falling back to ``os.environ``.
+    """
+    global _MULTIPLEX_ACTIVE
+    _MULTIPLEX_ACTIVE = bool(active)
+
+
+def is_multiplex_active() -> bool:
+    """Return whether the process is running as a profile multiplexer."""
+    return _MULTIPLEX_ACTIVE
+
+
+# ── the secret scope contextvar ──────────────────────────────────────────
+_SECRET_SCOPE: ContextVar[Optional[Mapping[str, str]]] = ContextVar(
+    "_SECRET_SCOPE", default=None
+)
+
+
+class UnscopedSecretError(RuntimeError):
+    """Raised when a secret is read in multiplex mode with no scope installed.
+
+    This is the fail-closed signal: it means a credential read reached
+    ``get_secret`` without a profile scope active, which in a multiplexer would
+    otherwise leak whichever profile's value happened to be in ``os.environ``.
+    The fix is to wrap the call path in ``set_secret_scope(...)`` (the per-turn
+    / per-adapter profile scope), not to widen the allowlist.
+    """
+
+
+def set_secret_scope(secrets: Optional[Mapping[str, str]]) -> Token:
+    """Install the active profile's secret mapping for the current context.
+
+    Returns a token for ``reset_secret_scope``. Pass ``None`` to clear.
+    """
+    return _SECRET_SCOPE.set(secrets)
+
+
+def reset_secret_scope(token: Token) -> None:
+    """Restore the previous secret scope."""
+    _SECRET_SCOPE.reset(token)
+
+
+def current_secret_scope() -> Optional[Mapping[str, str]]:
+    """Return the active secret mapping, or None when no scope is installed."""
+    return _SECRET_SCOPE.get()
+
+
+# ── genuinely-global env vars (NOT per-profile secrets) ──────────────────
+# These are process/deployment-level settings, not profile credentials. They
+# legitimately live in os.environ and must keep reading from it even in
+# multiplex mode — routing them through the fail-closed path would wrongly
+# crash. Anything matching is read from os.environ regardless of scope.
+#
+# Membership test is by exact name OR prefix (see _is_global_env). Keep this
+# list tight: when in doubt a value is a profile secret, not a global.
+_GLOBAL_ENV_EXACT = frozenset({
+    # Hermes runtime / deployment
+    "HERMES_HOME", "HERMES_PROFILE", "HERMES_GATEWAY_LOCK_DIR",
+    "HERMES_MAX_ITERATIONS", "HERMES_MAX_TOKENS", "HERMES_API_TIMEOUT",
+    "HERMES_REDACT_SECRETS", "HERMES_NOUS_TIMEOUT_SECONDS",
+    "_HERMES_GATEWAY",
+    # OS / interpreter
+    "PATH", "HOME", "USER", "LANG", "LC_ALL", "TZ", "PWD", "SHELL", "TMPDIR",
+    "VIRTUAL_ENV", "PYTHONPATH", "SSL_CERT_FILE",
+    # Kanban paths (per-board, not per-profile-secret)
+    "HERMES_KANBAN_DB", "HERMES_KANBAN_WORKSPACES_ROOT", "HERMES_KANBAN_BOARD",
+})
+_GLOBAL_ENV_PREFIXES = (
+    "HERMES_KANBAN_",
+    "HERMES_TELEGRAM_",   # tuning knobs (batch delays, fallback toggles) — NOT the token
+    "TERMINAL_",          # terminal/sandbox backend settings
+)
+
+
+def _is_global_env(name: str) -> bool:
+    """Return True for genuinely process-global (non-profile-secret) env vars."""
+    if name in _GLOBAL_ENV_EXACT:
+        return True
+    return any(name.startswith(p) for p in _GLOBAL_ENV_PREFIXES)
+
+
+def get_secret(name: str, default: Optional[str] = None) -> Optional[str]:
+    """Resolve a credential by env-var name, honoring the active profile scope.
+
+    Resolution order:
+
+    1. Genuinely-global vars (``_is_global_env``) always read ``os.environ`` —
+       they are deployment settings, not profile secrets.
+    2. When a secret scope is installed (multiplexed turn), read from it; an
+       absent key returns ``default``. The scope is authoritative — we do NOT
+       fall through to ``os.environ``, because in a multiplexer ``os.environ``
+       may hold another profile's value.
+    3. No scope installed:
+       - multiplex INACTIVE (default deployment): read ``os.environ`` —
+         identical to the legacy ``os.getenv`` behavior every caller had before.
+       - multiplex ACTIVE: FAIL CLOSED. Raise ``UnscopedSecretError`` so the
+         missing scope is caught loudly instead of leaking a cross-profile value.
+    """
+    if _is_global_env(name):
+        val = os.environ.get(name)
+        return val if val is not None else default
+
+    scope = _SECRET_SCOPE.get()
+    if scope is not None:
+        val = scope.get(name)
+        return val if val is not None else default
+
+    if _MULTIPLEX_ACTIVE:
+        raise UnscopedSecretError(
+            f"get_secret({name!r}) called with no profile secret scope active "
+            f"while multiplexing is on. This credential read must run inside a "
+            f"set_secret_scope(...) block (the per-turn / per-adapter profile "
+            f"scope). Reading os.environ here would risk leaking another "
+            f"profile's value. See docs/design/multiplexing-gateway.md "
+            f"(Workstream A)."
+        )
+
+    val = os.environ.get(name)
+    return val if val is not None else default
+
+
+def load_env_file(env_path: Path) -> Dict[str, str]:
+    """Parse a ``.env`` file into a plain dict WITHOUT touching ``os.environ``.
+
+    Used to load a profile's secrets into an isolated mapping for
+    ``set_secret_scope``. Mirrors python-dotenv's basic parsing (KEY=VALUE,
+    ``export`` prefix, ``#`` comments, optional matching quotes) but never
+    mutates the process environment — that isolation is the whole point.
+    """
+    secrets: Dict[str, str] = {}
+    try:
+        text = env_path.read_text(encoding="utf-8")
+    except (FileNotFoundError, OSError, UnicodeDecodeError):
+        return secrets
+
+    for raw in text.splitlines():
+        line = raw.strip()
+        if not line or line.startswith("#"):
+            continue
+        if line.startswith("export "):
+            line = line[len("export "):].lstrip()
+        if "=" not in line:
+            continue
+        key, _, value = line.partition("=")
+        key = key.strip()
+        if not key:
+            continue
+        value = value.strip()
+        if len(value) >= 2 and value[0] == value[-1] and value[0] in ("'", '"'):
+            value = value[1:-1]
+        secrets[key] = value
+
+    return secrets
+
+
+def build_profile_secret_scope(hermes_home: Path) -> Dict[str, str]:
+    """Build a profile's secret mapping from its ``<home>/.env``.
+
+    Returns a fresh dict (safe to install via ``set_secret_scope``). Genuinely
+    global vars are intentionally NOT copied in — ``get_secret`` reads those
+    from ``os.environ`` directly, so the scope holds only profile secrets.
+    """
+    return load_env_file(Path(hermes_home) / ".env")
+
diff --git a/apps/desktop/src/app/desktop-controller.tsx b/apps/desktop/src/app/desktop-controller.tsx
index 05dfbbc764f..5ca73061135 100644
--- a/apps/desktop/src/app/desktop-controller.tsx
+++ b/apps/desktop/src/app/desktop-controller.tsx
@@ -14,6 +14,7 @@ import { useSkinCommand } from '@/themes/use-skin-command'
 import { formatRefValue } from '../components/assistant-ui/directive-text'
 import { getCronJobs, getSessionMessages, listAllProfileSessions, type SessionInfo, triggerCronJob } from '../hermes'
 import { type ChatMessage, chatMessageText, preserveLocalAssistantErrors, toChatMessages } from '../lib/chat-messages'
+import { storedSessionIdForNotification } from '../lib/session-ids'
 import {
   isMessagingSource,
   LOCAL_SESSION_SOURCE_IDS,
@@ -276,16 +277,20 @@ export function DesktopController() {
     }
   }, [])
 
-  // Notification click: the main process already focused the window; jump to its session.
+  // Notification click: the main process already focused the window; jump to its
+  // session. Notifications are tagged with the gateway *runtime* session id, but
+  // the chat route is keyed by the *stored* id — navigating with the runtime id
+  // resumes a non-existent stored session ("session not found") and strands the
+  // user. Translate runtime -> stored before navigating.
   useEffect(() => {
     const unsubscribe = window.hermesDesktop?.onFocusSession?.(sessionId => {
       if (sessionId) {
-        navigate(sessionRoute(sessionId))
+        navigate(sessionRoute(storedSessionIdForNotification(sessionId, runtimeIdByStoredSessionIdRef.current)))
       }
     })
 
     return () => unsubscribe?.()
-  }, [navigate])
+  }, [navigate, runtimeIdByStoredSessionIdRef])
 
   // Notification action button (Approve/Reject) — resolve in place, no navigation.
   useEffect(() => {
diff --git a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
index 829119f65b4..ed3f6498cd1 100644
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
@@ -32,6 +32,7 @@ import {
   clearComposerAttachments,
   type ComposerAttachment,
   setComposerAttachmentUploadState,
+  setComposerDraft,
   terminalContextBlocksFromDraft,
   updateComposerAttachment
 } from '@/store/composer'
@@ -951,8 +952,26 @@ export function usePromptActions({
             return
           }
 
+          // send / prefill carry an optional `notice` (e.g. "⊙ Goal set …")
+          // that the backend wants shown as a system line before the message
+          // is acted on. Mirrors the TUI's createSlashHandler — without it a
+          // `/goal <text>` looked like it did nothing.
+          if ((dispatch.type === 'send' || dispatch.type === 'prefill') && dispatch.notice?.trim()) {
+            renderSlashOutput(dispatch.notice.trim())
+          }
+
           const message = ('message' in dispatch ? dispatch.message : '')?.trim() ?? ''
 
+          // /undo returns a prefill directive: drop the backed-up message into
+          // the composer for editing instead of submitting it immediately.
+          if (dispatch.type === 'prefill') {
+            if (message) {
+              setComposerDraft(message)
+            }
+
+            return
+          }
+
           if (!message) {
             renderSlashOutput(
               `/${name}: ${dispatch.type === 'skill' ? 'skill payload missing message' : 'empty message'}`
diff --git a/apps/desktop/src/app/settings/providers-settings.test.tsx b/apps/desktop/src/app/settings/providers-settings.test.tsx
index 27c029b442c..1909604a07a 100644
--- a/apps/desktop/src/app/settings/providers-settings.test.tsx
+++ b/apps/desktop/src/app/settings/providers-settings.test.tsx
@@ -2,7 +2,7 @@ import { cleanup, fireEvent, render, screen, waitFor } from '@testing-library/re
 import { atom } from 'nanostores'
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
 
-import type { OAuthProvider } from '@/types/hermes'
+import type { EnvVarInfo, OAuthProvider } from '@/types/hermes'
 
 const listOAuthProviders = vi.fn()
 const disconnectOAuthProvider = vi.fn()
@@ -36,6 +36,25 @@ function provider(id: string, loggedIn: boolean, patch: Partial<OAuthProvider> =
   }
 }
 
+// One `/api/env` row (an EnvVarInfo) for the API-keys view. Mirrors the
+// `provider()` factory above: a valid base + per-test overrides, typed against
+// the real response shape so it can't drift from EnvVarInfo.
+function keyVar(patch: Partial<EnvVarInfo> = {}): EnvVarInfo {
+  return {
+    advanced: false,
+    category: 'provider',
+    description: '',
+    is_password: true,
+    is_set: false,
+    provider: '',
+    provider_label: '',
+    redacted_value: null,
+    tools: [],
+    url: '',
+    ...patch
+  }
+}
+
 beforeEach(() => {
   onboarding.set({ manual: false })
   getEnvVars.mockResolvedValue({})
@@ -97,4 +116,56 @@ describe('ProvidersSettings', () => {
     expect(screen.queryByRole('button', { name: 'Remove Qwen Code' })).toBeNull()
     expect(screen.getByText(/managed by its own CLI/)).toBeTruthy()
   })
+
+  it('renders a Keys card for a backend-tagged provider with no PROVIDER_GROUPS prefix', async () => {
+    // A provider the backend catalog tags (provider/provider_label) but that has
+    // no desktop PROVIDER_GROUPS prefix row must still render its own card —
+    // this is the GUI/CLI drift fix: membership comes from the backend, not
+    // from the hand-maintained prefix list.
+    getEnvVars.mockResolvedValue({
+      WIDGETAI_API_KEY: keyVar({
+        provider: 'widgetai',
+        provider_label: 'WidgetAI',
+        url: 'https://widgetai.example/keys'
+      })
+    })
+    listOAuthProviders.mockResolvedValue({ providers: [] })
+
+    const { ProvidersSettings } = await import('./providers-settings')
+    render(<ProvidersSettings onClose={vi.fn()} onViewChange={vi.fn()} view="keys" />)
+
+    expect(await screen.findByText('WidgetAI')).toBeTruthy()
+  })
+
+  it('orders API-key providers by priority then name, and filters them via search', async () => {
+    // These three providers have no curated PROVIDER_GROUPS priority, so they
+    // share the default priority and fall back to alphabetical among themselves
+    // (Acme, Middle, Zebra) — exercising the name tiebreak of the priority sort.
+    getEnvVars.mockResolvedValue({
+      ZEBRA_API_KEY: keyVar({ provider: 'zebra', provider_label: 'Zebra' }),
+      ACME_API_KEY: keyVar({ provider: 'acme', provider_label: 'Acme' }),
+      MIDDLE_API_KEY: keyVar({ provider: 'middle', provider_label: 'Middle' })
+    })
+    listOAuthProviders.mockResolvedValue({ providers: [] })
+
+    const { ProvidersSettings } = await import('./providers-settings')
+    render(<ProvidersSettings onClose={vi.fn()} onViewChange={vi.fn()} view="keys" />)
+
+    // Equal priority → alphabetical tiebreak: Acme, Middle, Zebra.
+    await screen.findByText('Acme')
+    const labels = screen.getAllByText(/Acme|Middle|Zebra/).map(el => el.textContent)
+    expect(labels).toEqual(['Acme', 'Middle', 'Zebra'])
+
+    // Typing narrows the list to matching providers only.
+    const search = screen.getByPlaceholderText('Search providers…')
+    fireEvent.change(search, { target: { value: 'mid' } })
+
+    await waitFor(() => expect(screen.queryByText('Acme')).toBeNull())
+    expect(screen.getByText('Middle')).toBeTruthy()
+    expect(screen.queryByText('Zebra')).toBeNull()
+
+    // A non-matching query shows the empty-state copy.
+    fireEvent.change(search, { target: { value: 'nonesuch-xyz' } })
+    expect(await screen.findByText('No providers match your search.')).toBeTruthy()
+  })
 })
diff --git a/apps/desktop/src/app/settings/providers-settings.tsx b/apps/desktop/src/app/settings/providers-settings.tsx
index 2585e13995d..31ced164fff 100644
--- a/apps/desktop/src/app/settings/providers-settings.tsx
+++ b/apps/desktop/src/app/settings/providers-settings.tsx
@@ -12,6 +12,7 @@ import {
   sortProviders
 } from '@/components/desktop-onboarding-overlay'
 import { Button } from '@/components/ui/button'
+import { SearchField } from '@/components/ui/search-field'
 import { disconnectOAuthProvider, listOAuthProviders } from '@/hermes'
 import { useI18n } from '@/i18n'
 import { Check, ChevronDown, ChevronRight, KeyRound, Loader2, Terminal, Trash2 } from '@/lib/icons'
@@ -45,8 +46,17 @@ export const PROVIDER_VIEWS = ['accounts', 'keys'] as const
 export type ProviderView = (typeof PROVIDER_VIEWS)[number]
 
 // Group the env catalog by provider — one ListRow per vendor plus optional
-// advanced overrides (base URL, region, etc.). Groups without a key field and
-// the "Other" bucket are skipped.
+// advanced overrides (base URL, region, etc.). Groups without a key field are
+// skipped.
+//
+// Grouping key precedence:
+//   1. Backend `provider_label` / `provider` (from the unified provider catalog
+//      in hermes_cli/provider_catalog.py) — the SAME provider identity
+//      `hermes model` uses. This is authoritative: a provider tagged by the
+//      backend always renders a card, even with no PROVIDER_GROUPS row.
+//   2. Desktop prefix match (`providerGroup`) — legacy fallback for provider
+//      env vars that predate the backend tagging.
+// Only entries that resolve to neither (the "Other" bucket) are skipped.
 function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGroup[] {
   const buckets = new Map<string, [string, EnvVarInfo][]>()
 
@@ -55,7 +65,9 @@ function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGr
       continue
     }
 
-    const name = providerGroup(key)
+    // Prefer the backend-supplied provider label/id so the Keys tab groups by
+    // the same identity the CLI picker uses; fall back to the prefix guess.
+    const name = info.provider_label?.trim() || info.provider?.trim() || providerGroup(key)
 
     if (name === 'Other') {
       continue
@@ -73,6 +85,9 @@ function buildProviderKeyGroups(vars: Record<string, EnvVarInfo>): ProviderKeyGr
       continue
     }
 
+    // Presentation overlay (priority, blurb, docs) is keyed by the prefix-based
+    // group name; when the backend introduced this provider it may have no
+    // overlay entry, so fall back to the backend/env metadata for display.
     const meta = providerMeta(name)
 
     groups.push({
@@ -131,6 +146,7 @@ function OAuthPicker({
   const rest = featured ? ordered.filter(p => p.id !== FEATURED_ID) : ordered
   // Keep connected accounts grouped and always visible; only the unconnected
   // providers hide behind the disclosure, so the page leads with what's set up.
+  // Both lists preserve `sortProviders` order (curated priority, then name).
   const connected = rest.filter(p => p.status?.logged_in)
   const others = rest.filter(p => !p.status?.logged_in)
   const collapsible = others.length > 0
@@ -284,6 +300,8 @@ export function ProvidersSettings({ onClose, onViewChange, view }: ProvidersSett
   const [oauthProviders, setOauthProviders] = useState<OAuthProvider[]>([])
   const [openProvider, setOpenProvider] = useState<null | string>(null)
   const [disconnecting, setDisconnecting] = useState<null | string>(null)
+  // Free-text filter for the API-keys view (provider name / env-var key / desc).
+  const [keyQuery, setKeyQuery] = useState('')
   // The onboarding overlay owns the OAuth flow. Watch its `manual` flag so we
   // re-read connection state when the user finishes (or dismisses) a sign-in
   // they launched from this page — otherwise the cards keep their stale status.
@@ -372,20 +390,49 @@ export function ProvidersSettings({ onClose, onViewChange, view }: ProvidersSett
   const keyGroups = buildProviderKeyGroups(vars)
 
   if (showApiKeys) {
+    const q = keyQuery.trim().toLowerCase()
+    const visibleGroups = q
+      ? keyGroups.filter(group => {
+          const haystack = [
+            group.name,
+            group.description ?? '',
+            group.primary[0],
+            ...group.advanced.map(([k]) => k)
+          ]
+
+          return haystack.some(s => s.toLowerCase().includes(q))
+        })
+      : keyGroups
+
     return (
       <SettingsContent>
         {keyGroups.length > 0 ? (
-          <div className="grid gap-2">
-            {keyGroups.map(group => (
-              <ProviderKeyRows
-                expanded={openProvider === group.name}
-                group={group}
-                key={group.name}
-                onExpand={() => setOpenProvider(group.name)}
-                onToggle={() => setOpenProvider(prev => (prev === group.name ? null : group.name))}
-                rowProps={rowProps}
-              />
-            ))}
+          <div className="grid gap-3">
+            <SearchField
+              aria-label={t.settings.providers.searchKeys}
+              containerClassName="w-full"
+              onChange={setKeyQuery}
+              placeholder={t.settings.providers.searchKeys}
+              value={keyQuery}
+            />
+            {visibleGroups.length > 0 ? (
+              <div className="grid gap-2">
+                {visibleGroups.map(group => (
+                  <ProviderKeyRows
+                    expanded={openProvider === group.name}
+                    group={group}
+                    key={group.name}
+                    onExpand={() => setOpenProvider(group.name)}
+                    onToggle={() => setOpenProvider(prev => (prev === group.name ? null : group.name))}
+                    rowProps={rowProps}
+                  />
+                ))}
+              </div>
+            ) : (
+              <div className="grid min-h-24 place-items-center px-4 py-6 text-center text-[length:var(--conversation-caption-font-size)] text-muted-foreground">
+                {t.settings.providers.noKeysMatch}
+              </div>
+            )}
           </div>
         ) : (
           <NoProviderKeys />
diff --git a/apps/desktop/src/app/types.ts b/apps/desktop/src/app/types.ts
index 9500468482c..1adc2bdec4e 100644
--- a/apps/desktop/src/app/types.ts
+++ b/apps/desktop/src/app/types.ts
@@ -106,6 +106,13 @@ export interface SkillCommandDispatchResponse {
 export interface SendCommandDispatchResponse {
   type: 'send'
   message: string
+  notice?: string
+}
+
+export interface PrefillCommandDispatchResponse {
+  type: 'prefill'
+  message: string
+  notice?: string
 }
 
 export type CommandDispatchResponse =
@@ -113,6 +120,7 @@ export type CommandDispatchResponse =
   | AliasCommandDispatchResponse
   | SkillCommandDispatchResponse
   | SendCommandDispatchResponse
+  | PrefillCommandDispatchResponse
 
 export type SidebarNavId = 'artifacts' | 'command-center' | 'messaging' | 'new-session' | 'settings' | 'skills'
 
diff --git a/apps/desktop/src/components/assistant-ui/thread.tsx b/apps/desktop/src/components/assistant-ui/thread.tsx
index c5b20cedd3e..1ac97c200ca 100644
--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@@ -859,7 +859,10 @@ const ProcessNotificationNote: FC<{ text: string }> = ({ text }) => {
           <summary className="cursor-pointer select-none text-muted-foreground/45 hover:text-muted-foreground/70">
             output
           </summary>
-          <pre className="mt-0.5 max-h-48 overflow-auto whitespace-pre-wrap font-mono text-[0.625rem] leading-4 text-muted-foreground/55">
+          <pre
+            className="mt-0.5 max-h-48 overflow-auto whitespace-pre-wrap font-mono text-[0.625rem] leading-4 text-muted-foreground/55"
+            data-selectable-text="true"
+          >
             {detail}
           </pre>
         </details>
diff --git a/apps/desktop/src/components/chat/terminal-output.tsx b/apps/desktop/src/components/chat/terminal-output.tsx
index 946ec2386be..034f20f2a81 100644
--- a/apps/desktop/src/components/chat/terminal-output.tsx
+++ b/apps/desktop/src/components/chat/terminal-output.tsx
@@ -41,7 +41,11 @@ export function TerminalOutput({ className, text }: TerminalOutputProps) {
   }, [text])
 
   return (
-    <div className={cn('max-h-16 overflow-auto overscroll-contain', className)} ref={ref}>
+    <div
+      className={cn('max-h-16 overflow-auto overscroll-contain', className)}
+      data-selectable-text="true"
+      ref={ref}
+    >
       <pre className="w-max min-w-full font-mono text-[0.5625rem] leading-[0.85rem] whitespace-pre text-muted-foreground/70">
         {text}
       </pre>
diff --git a/apps/desktop/src/i18n/en.ts b/apps/desktop/src/i18n/en.ts
index d27741c44db..158de543c49 100644
--- a/apps/desktop/src/i18n/en.ts
+++ b/apps/desktop/src/i18n/en.ts
@@ -581,6 +581,8 @@ export const en: Translations = {
       removedMessage: provider => `${provider} was removed.`,
       failedRemove: provider => `Could not remove ${provider}`,
       noProviderKeys: 'No provider API keys available.',
+      searchKeys: 'Search providers…',
+      noKeysMatch: 'No providers match your search.',
       loading: 'Loading providers...'
     },
     sessions: {
diff --git a/apps/desktop/src/i18n/ja.ts b/apps/desktop/src/i18n/ja.ts
index 194452ed407..244fc12ca49 100644
--- a/apps/desktop/src/i18n/ja.ts
+++ b/apps/desktop/src/i18n/ja.ts
@@ -700,6 +700,8 @@ export const ja = defineLocale({
       removedMessage: provider => `${provider} を削除しました。`,
       failedRemove: provider => `${provider} を削除できませんでした`,
       noProviderKeys: '利用可能なプロバイダー API キーがありません。',
+      searchKeys: 'プロバイダーを検索…',
+      noKeysMatch: '一致するプロバイダーがありません。',
       loading: 'プロバイダーを読み込み中...'
     },
     sessions: {
diff --git a/apps/desktop/src/i18n/types.ts b/apps/desktop/src/i18n/types.ts
index 94489e5de9e..90168d28e86 100644
--- a/apps/desktop/src/i18n/types.ts
+++ b/apps/desktop/src/i18n/types.ts
@@ -462,6 +462,8 @@ export interface Translations {
       removedMessage: (provider: string) => string
       failedRemove: (provider: string) => string
       noProviderKeys: string
+      searchKeys: string
+      noKeysMatch: string
       loading: string
     }
     sessions: {
diff --git a/apps/desktop/src/i18n/zh-hant.ts b/apps/desktop/src/i18n/zh-hant.ts
index de329631098..c1eb3b8f883 100644
--- a/apps/desktop/src/i18n/zh-hant.ts
+++ b/apps/desktop/src/i18n/zh-hant.ts
@@ -677,6 +677,8 @@ export const zhHant = defineLocale({
       removedMessage: provider => `${provider} 已移除。`,
       failedRemove: provider => `無法移除 ${provider}`,
       noProviderKeys: '沒有可用的提供方 API 金鑰。',
+      searchKeys: '搜尋提供方…',
+      noKeysMatch: '沒有符合的提供方。',
       loading: '正在載入提供方...'
     },
     sessions: {
diff --git a/apps/desktop/src/i18n/zh.ts b/apps/desktop/src/i18n/zh.ts
index ac8c5c0b958..161a438b9e7 100644
--- a/apps/desktop/src/i18n/zh.ts
+++ b/apps/desktop/src/i18n/zh.ts
@@ -774,6 +774,8 @@ export const zh: Translations = {
       removedMessage: provider => `${provider} 已移除。`,
       failedRemove: provider => `无法移除 ${provider}`,
       noProviderKeys: '没有可用的提供方 API 密钥。',
+      searchKeys: '搜索提供方…',
+      noKeysMatch: '没有匹配的提供方。',
       loading: '正在加载提供方...'
     },
     sessions: {
diff --git a/apps/desktop/src/lib/chat-runtime.test.ts b/apps/desktop/src/lib/chat-runtime.test.ts
index c2a9099a1a8..1b4efb33ad5 100644
--- a/apps/desktop/src/lib/chat-runtime.test.ts
+++ b/apps/desktop/src/lib/chat-runtime.test.ts
@@ -2,7 +2,7 @@ import { describe, expect, it } from 'vitest'
 
 import type { ComposerAttachment } from '@/store/composer'
 
-import { coerceThinkingText, optimisticAttachmentRef } from './chat-runtime'
+import { coerceThinkingText, optimisticAttachmentRef, parseCommandDispatch } from './chat-runtime'
 
 const DATA_URL = 'data:image/png;base64,iVBORw0KGgoAAAANS'
 
@@ -52,3 +52,31 @@ describe('coerceThinkingText', () => {
     ).toBe('')
   })
 })
+
+describe('parseCommandDispatch', () => {
+  it('keeps the notice on a send directive (e.g. /goal set)', () => {
+    // The backend's /goal set returns {type:send, notice:"⊙ Goal set …", message}.
+    // Dropping the notice made /goal look like it did nothing in the desktop app.
+    const parsed = parseCommandDispatch({ type: 'send', notice: '⊙ Goal set', message: 'do the thing' })
+
+    expect(parsed).toEqual({ type: 'send', message: 'do the thing', notice: '⊙ Goal set' })
+  })
+
+  it('keeps message-only send directives working (no notice)', () => {
+    expect(parseCommandDispatch({ type: 'send', message: 'hi' })).toEqual({
+      type: 'send',
+      message: 'hi',
+      notice: undefined
+    })
+  })
+
+  it('parses a prefill directive with its notice (e.g. /undo)', () => {
+    const parsed = parseCommandDispatch({ type: 'prefill', notice: 'backed up 1 turn', message: 'edit me' })
+
+    expect(parsed).toEqual({ type: 'prefill', message: 'edit me', notice: 'backed up 1 turn' })
+  })
+
+  it('rejects a prefill directive missing its message', () => {
+    expect(parseCommandDispatch({ type: 'prefill', notice: 'x' })).toBeNull()
+  })
+})
diff --git a/apps/desktop/src/lib/chat-runtime.ts b/apps/desktop/src/lib/chat-runtime.ts
index ac5273a2236..c573a1e5899 100644
--- a/apps/desktop/src/lib/chat-runtime.ts
+++ b/apps/desktop/src/lib/chat-runtime.ts
@@ -238,7 +238,12 @@ export function parseCommandDispatch(raw: unknown): CommandDispatchResponse | nu
       return typeof row.name === 'string' ? { type: 'skill', name: row.name, message: str(row.message) } : null
 
     case 'send':
-      return typeof row.message === 'string' ? { type: 'send', message: row.message } : null
+      return typeof row.message === 'string' ? { type: 'send', message: row.message, notice: str(row.notice) } : null
+
+    case 'prefill':
+      return typeof row.message === 'string'
+        ? { type: 'prefill', message: row.message, notice: str(row.notice) }
+        : null
 
     default:
       return null
diff --git a/apps/desktop/src/lib/session-ids.test.ts b/apps/desktop/src/lib/session-ids.test.ts
new file mode 100644
index 00000000000..b5653c8eecd
--- /dev/null
+++ b/apps/desktop/src/lib/session-ids.test.ts
@@ -0,0 +1,44 @@
+import { describe, expect, it } from 'vitest'
+
+import { storedSessionIdForNotification } from './session-ids'
+
+describe('storedSessionIdForNotification', () => {
+  it('translates a runtime id back to its stored id', () => {
+    // The route is keyed by the stored id, but notifications carry the runtime
+    // id. Resolving runtime -> stored keeps notification-click navigation from
+    // resuming a non-existent stored session ("session not found").
+    const map = new Map([['stored-abc', 'runtime-123']])
+
+    expect(storedSessionIdForNotification('runtime-123', map)).toBe('stored-abc')
+  })
+
+  it('returns the id unchanged when no mapping is known', () => {
+    // A notification for a session this window never opened may already carry a
+    // stored id; let the resume/REST lookup handle it as-is.
+    const map = new Map([['stored-abc', 'runtime-123']])
+
+    expect(storedSessionIdForNotification('stored-xyz', map)).toBe('stored-xyz')
+  })
+
+  it('returns the id unchanged for an empty map', () => {
+    expect(storedSessionIdForNotification('runtime-123', new Map())).toBe('runtime-123')
+  })
+
+  it('resolves the correct stored id among several sessions', () => {
+    const map = new Map([
+      ['stored-1', 'runtime-1'],
+      ['stored-2', 'runtime-2'],
+      ['stored-3', 'runtime-3']
+    ])
+
+    expect(storedSessionIdForNotification('runtime-2', map)).toBe('stored-2')
+  })
+
+  it('does not treat a stored id as a runtime id (keys are not matched)', () => {
+    // The map is stored -> runtime. A value that only appears as a *key* must
+    // not be rewritten, otherwise an already-stored id could be mangled.
+    const map = new Map([['stored-1', 'runtime-1']])
+
+    expect(storedSessionIdForNotification('stored-1', map)).toBe('stored-1')
+  })
+})
diff --git a/apps/desktop/src/lib/session-ids.ts b/apps/desktop/src/lib/session-ids.ts
new file mode 100644
index 00000000000..c97cadc2628
--- /dev/null
+++ b/apps/desktop/src/lib/session-ids.ts
@@ -0,0 +1,26 @@
+// The gateway tags every event — and therefore every native notification —
+// with the *runtime* session id (the key under which the session lives in the
+// gateway's in-memory `_sessions` map). The chat route, however, is keyed by
+// the *stored* session id (`stored_session_id`), which is a different value:
+// a brand-new chat gets a runtime id immediately but its stored id is assigned
+// when the first turn persists. Navigating to a runtime id therefore tries to
+// resume a stored session that does not exist ("session not found") and
+// strands the user, who experiences it as the running session being destroyed.
+//
+// `runtimeIdByStoredSessionId` maps stored -> runtime; this resolves the
+// reverse so notification-click navigation lands on the real route. The id is
+// returned unchanged when no mapping is known — it may already be a stored id
+// (e.g. a notification for a session this window never opened), in which case
+// the normal resume/REST lookup handles it.
+export function storedSessionIdForNotification(
+  id: string,
+  runtimeIdByStoredSessionId: ReadonlyMap<string, string>
+): string {
+  for (const [storedId, runtimeId] of runtimeIdByStoredSessionId) {
+    if (runtimeId === id) {
+      return storedId
+    }
+  }
+
+  return id
+}
diff --git a/apps/desktop/src/styles.css b/apps/desktop/src/styles.css
index 03b348c9d84..2aff7a21c77 100644
--- a/apps/desktop/src/styles.css
+++ b/apps/desktop/src/styles.css
@@ -680,6 +680,7 @@ textarea,
 [contenteditable]:not([contenteditable='false']),
 [data-slot='aui_user-message-root'],
 [data-slot='aui_assistant-message-content'],
+[data-slot='aui_system-message-root'],
 [data-selectable-text='true'],
 [data-selectable-text='true'] * {
   -webkit-user-select: text;
diff --git a/apps/desktop/src/types/hermes.ts b/apps/desktop/src/types/hermes.ts
index a497e3f10a9..b67cc3041a7 100644
--- a/apps/desktop/src/types/hermes.ts
+++ b/apps/desktop/src/types/hermes.ts
@@ -108,6 +108,12 @@ export interface EnvVarInfo {
   description: string
   is_password: boolean
   is_set: boolean
+  // Backend-derived provider grouping hints (from the unified provider catalog
+  // in hermes_cli/provider_catalog.py). When present, the Keys tab groups by
+  // this provider identity — the SAME one `hermes model` uses — instead of
+  // desktop-only env-var prefix guesses. Empty for non-provider env vars.
+  provider?: string
+  provider_label?: string
   redacted_value: null | string
   tools: string[]
   url: null | string
diff --git a/cli.py b/cli.py
index f6a9393d34a..52bfe6cdb0a 100644
--- a/cli.py
+++ b/cli.py
@@ -6959,24 +6959,43 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
             self._close_model_picker()
 
     def _handle_model_switch(self, cmd_original: str):
-        """Handle /model command — switch model for this session.
+        """Handle /model command — switch model.
 
         Supports:
           /model                              — show current model + usage hints
-          /model <name>                       — switch for this session only
-          /model <name> --global              — switch and persist to config.yaml
+          /model <name>                       — switch model (persists by default)
+          /model <name> --session             — switch for this session only
+          /model <name> --global              — switch and persist (explicit)
           /model <name> --provider <provider> — switch provider + model
           /model --provider <provider>        — switch to provider, auto-detect model
+
+        Persistence defaults to on (``model.persist_switch_by_default`` in
+        config.yaml, default True). Use ``--session`` for a one-off switch.
         """
-        from hermes_cli.model_switch import switch_model, parse_model_flags
+        from hermes_cli.model_switch import (
+            switch_model,
+            parse_model_flags,
+            resolve_persist_behavior,
+        )
         from hermes_cli.providers import get_label
 
         # Parse args from the original command
         parts = cmd_original.split(None, 1)  # split off '/model'
         raw_args = parts[1].strip() if len(parts) > 1 else ""
 
-        # Parse --provider, --global, and --refresh flags
-        model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
+        # Parse --provider, --global, --session, and --refresh flags
+        (
+            model_input,
+            explicit_provider,
+            is_global_flag,
+            force_refresh,
+            is_session,
+        ) = parse_model_flags(raw_args)
+        # Resolve the effective persistence once: --session overrides the
+        # config-gated default, --global forces persist, otherwise defer to
+        # model.persist_switch_by_default (defaults to True so /model survives
+        # across sessions).
+        persist_global = resolve_persist_behavior(is_global_flag, is_session)
 
         # --refresh: wipe the on-disk picker cache before building the
         # provider list. Forces a live re-fetch of every authed provider's
@@ -7024,7 +7043,8 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
             if not providers:
                 _cprint("  No authenticated providers found.")
                 _cprint("")
-                _cprint("  /model <name>                        switch model")
+                _cprint("  /model <name>                        switch model (persists)")
+                _cprint("  /model <name> --session              switch for this session only")
                 _cprint("  /model --provider <slug>             switch provider")
                 _cprint("  /model --refresh                     re-fetch live model lists")
                 return
@@ -7144,7 +7164,7 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
             save_config_value("model.default", result.new_model)
             if result.provider_changed:
                 save_config_value("model.provider", result.target_provider)
-            _cprint("    Saved to config.yaml (--global)")
+            _cprint("    Saved to config.yaml")
         else:
             _cprint("    (session only — add --global to persist)")
 
@@ -11917,7 +11937,13 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
             # --- /model picker modal ---
             if self._model_picker_state:
                 try:
-                    self._handle_model_picker_selection()
+                    # Picker selections persist by default (same default as
+                    # /model <name>); honour model.persist_switch_by_default.
+                    from hermes_cli.model_switch import resolve_persist_behavior
+
+                    self._handle_model_picker_selection(
+                        persist_global=resolve_persist_behavior(False, False)
+                    )
                 except Exception as _exc:
                     _cprint(f"  ✗ Model selection failed: {_exc}")
                     self._close_model_picker()
@@ -13527,13 +13553,13 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
             style=style,
             full_screen=False,
             mouse_support=False,
-            # The status bar contains wall-clock read-outs (live prompt elapsed
-            # and idle-since-last-turn). Once a turn finishes there may be no
-            # further events to invalidate the app, so prompt_toolkit would keep
-            # rendering the first post-turn value (usually ``✓ 0s``) forever.
-            # A low-rate refresh keeps the clock honest without reintroducing a
-            # custom repaint thread or touching conversation state.
-            refresh_interval=1.0,
+            # Read from display.cli_refresh_interval (default 0 = disabled).
+            # When non-zero, prompt_toolkit redraws the UI on this cadence
+            # during idle, keeping wall-clock status-bar read-outs ticking.
+            # Set to 0 to suppress background redraws entirely — avoids
+            # fighting terminal auto-scroll in non-fullscreen mode (Xshell,
+            # iTerm2, Windows Terminal). See #48309.
+            refresh_interval=float(CLI_CONFIG.get("display", {}).get("cli_refresh_interval", 0)),
             # Erase the live bottom chrome (status bar, input box, separator
             # rules) on exit instead of freezing a final copy into scrollback.
             # Without this, prompt_toolkit's render_as_done teardown repaints
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 4f7940db0b1..51bc4e5721e 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -15,6 +15,7 @@ import contextvars
 import json
 import logging
 import os
+import re
 import shutil
 import subprocess
 import sys
@@ -45,6 +46,59 @@ from hermes_time import now as _hermes_now
 logger = logging.getLogger(__name__)
 
 
+def _summarize_cron_failure_for_delivery(job: dict, error: str | None) -> str:
+    """Return a compact one-line failure message for chat delivery.
+
+    Full details stay in the cron output directory and the logs. Chat should
+    show the operator what broke without dumping provider JSON, retry noise, or
+    stack traces into the delivery channel.
+    """
+    job_name = job.get("name") or job.get("id") or "cron job"
+    text = (error or "unknown error").strip()
+    lower = text.lower()
+
+    # Provider/API failures are the common noisy path. Keep these short.
+    if "429" in text or "rate limit" in lower or "usage limit" in lower:
+        reason = "rate limit"
+        if "weekly usage limit" in lower:
+            reason = "weekly usage limit"
+        elif "quota" in lower:
+            reason = "quota limit"
+        return (
+            f"⚠️ Cron '{job_name}' failed: provider {reason}. "
+            "Fallback chain was exhausted or unavailable. "
+            "Full details saved in cron output."
+        )
+
+    if "readtimeout" in lower or "timed out" in lower or "timeout" in lower:
+        return (
+            f"⚠️ Cron '{job_name}' failed: provider timeout. "
+            "Fallback chain was exhausted or unavailable. "
+            "Full details saved in cron output."
+        )
+
+    # Match authentication/authorization wording at a word boundary and the
+    # 401/403 status codes as whole tokens, so "oauth", "4015" and similar do
+    # not trip a misleading auth message.
+    if re.search(r"authenticat|authoriz", lower) or re.search(r"\b(401|403)\b", text):
+        return (
+            f"⚠️ Cron '{job_name}' failed: provider authentication error. "
+            "Full details saved in cron output."
+        )
+
+    # Strip common exception wrappers and collapse provider payloads. Bound
+    # the input first so a multi-KB provider blob cannot slow the
+    # substitutions.
+    cleaned = re.sub(
+        r"^(RuntimeError|Exception|ValueError|HTTPStatusError):\s*",
+        "", text[:2000],
+    )
+    cleaned = re.sub(r"\s+", " ", cleaned).strip()
+    if len(cleaned) > 180:
+        cleaned = cleaned[:177].rstrip() + "..."
+    return f"⚠️ Cron '{job_name}' failed: {cleaned}"
+
+
 class CronPromptInjectionBlocked(Exception):
     """Raised by _build_job_prompt when the fully-assembled prompt trips the
     injection scanner. Caught in run_job so the operator sees a clean
@@ -1992,7 +2046,7 @@ def run_one_job(job: dict, *, adapters=None, loop=None, verbose: bool = False) -
         # Deliver the final response to the origin/target chat.
         # If the agent responded with [SILENT], skip delivery (but
         # output is already saved above).  Failed jobs always deliver.
-        deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
+        deliver_content = final_response if success else _summarize_cron_failure_for_delivery(job, error)
         # Treat whitespace-only final responses the same as empty
         # responses: do not deliver a blank message, and let the
         # empty-response guard below mark the run as a soft failure.
diff --git a/gateway/config.py b/gateway/config.py
index 0ebf23e12d0..5b89c56b375 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -545,6 +545,13 @@ class GatewayConfig:
     thread_sessions_per_user: bool = False  # When False (default), threads are shared across all participants
     max_concurrent_sessions: Optional[int] = None  # Positive int caps simultaneous active chat sessions
 
+    # Multi-profile multiplexing (opt-in; default off preserves one-gateway-per-profile).
+    # When True, the default profile's gateway serves inbound messages for every
+    # profile on the host: profiles are stamped into session keys and (in later
+    # phases) per-profile adapters/credentials are resolved. When False, the
+    # gateway behaves exactly as before — single HERMES_HOME, no profile stamping.
+    multiplex_profiles: bool = False
+
     # Unauthorized DM policy
     unauthorized_dm_behavior: str = "pair"  # "pair" or "ignore"
 
@@ -650,6 +657,7 @@ class GatewayConfig:
             "group_sessions_per_user": self.group_sessions_per_user,
             "thread_sessions_per_user": self.thread_sessions_per_user,
             "max_concurrent_sessions": self.max_concurrent_sessions,
+            "multiplex_profiles": self.multiplex_profiles,
             "unauthorized_dm_behavior": self.unauthorized_dm_behavior,
             "streaming": self.streaming.to_dict(),
             "session_store_max_age_days": self.session_store_max_age_days,
@@ -695,7 +703,12 @@ class GatewayConfig:
 
         group_sessions_per_user = data.get("group_sessions_per_user")
         thread_sessions_per_user = data.get("thread_sessions_per_user")
+        multiplex_profiles = data.get("multiplex_profiles")
         nested_gateway = data.get("gateway") if isinstance(data.get("gateway"), dict) else {}
+        if multiplex_profiles is None and isinstance(nested_gateway, dict):
+            # Also honor gateway.multiplex_profiles written by
+            # ``hermes config set gateway.multiplex_profiles true``.
+            multiplex_profiles = nested_gateway.get("multiplex_profiles")
         if "max_concurrent_sessions" in data:
             max_concurrent_raw = data.get("max_concurrent_sessions")
             max_concurrent_key = "max_concurrent_sessions"
@@ -732,6 +745,7 @@ class GatewayConfig:
             stt_enabled=_coerce_bool(stt_enabled, True),
             group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
             thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
+            multiplex_profiles=_coerce_bool(multiplex_profiles, False),
             max_concurrent_sessions=max_concurrent_sessions,
             unauthorized_dm_behavior=unauthorized_dm_behavior,
             streaming=StreamingConfig.from_dict(data.get("streaming", {})),
@@ -823,6 +837,13 @@ def load_gateway_config() -> GatewayConfig:
             if "thread_sessions_per_user" in yaml_cfg:
                 gw_data["thread_sessions_per_user"] = yaml_cfg["thread_sessions_per_user"]
 
+            # Multiplexing flag: accept both the top-level key and the nested
+            # gateway.multiplex_profiles form (from_dict resolves the nested
+            # fallback, but surface the top-level key here for parity with the
+            # other session-scope flags above).
+            if "multiplex_profiles" in yaml_cfg:
+                gw_data["multiplex_profiles"] = yaml_cfg["multiplex_profiles"]
+
             gateway_section = yaml_cfg.get("gateway")
             if isinstance(gateway_section, dict) and "max_concurrent_sessions" in gateway_section:
                 gw_data["max_concurrent_sessions"] = gateway_section["max_concurrent_sessions"]
@@ -2143,5 +2164,24 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
     except Exception as e:
         logger.debug("Plugin platform enable pass failed: %s", e)
 
+    # Relay (generic connector-fronted platform, EXPERIMENTAL). Enabled when a
+    # connector relay URL is configured via GATEWAY_RELAY_URL (env) or
+    # gateway.relay_url (config.yaml). The adapter is registered into the
+    # platform_registry at gateway startup (gateway.relay.register_relay_adapter)
+    # and dials OUT to the connector — so, like Telegram/Matrix, it has no public
+    # inbound port and just needs Platform.RELAY present+enabled in
+    # config.platforms for start_gateway()'s connect loop to bring it up. The
+    # connected-checker (Platform.RELAY in _PLATFORM_CONNECTED_CHECKERS) keys on
+    # extra["relay_url"], so mirror the URL into extra here.
+    relay_url_env = os.getenv("GATEWAY_RELAY_URL", "").strip()
+    relay_url_yaml = ""
+    existing_relay = config.platforms.get(Platform.RELAY)
+    if existing_relay is not None:
+        relay_url_yaml = str(existing_relay.extra.get("relay_url") or "").strip()
+    relay_url_val = relay_url_env or relay_url_yaml
+    if relay_url_val:
+        relay_config = _enable_from_env(Platform.RELAY)
+        relay_config.extra["relay_url"] = relay_url_val.rstrip("/")
+
     for platform_config in config.platforms.values():
         platform_config.extra.pop("_enabled_explicit", None)
diff --git a/gateway/kanban_watchers.py b/gateway/kanban_watchers.py
index 328cbd7fb5b..21753054f01 100644
--- a/gateway/kanban_watchers.py
+++ b/gateway/kanban_watchers.py
@@ -23,6 +23,58 @@ from typing import Any, Optional
 logger = logging.getLogger("gateway.run")
 
 
+def _acquire_singleton_lock(lock_path) -> "tuple[Optional[object], str]":
+    """Take an exclusive, non-blocking advisory lock for the sole dispatcher.
+
+    Only one gateway process machine-wide may run the embedded kanban
+    dispatcher: concurrent dispatchers double the reclaim frequency (each
+    runs its own ``release_stale_claims`` → promote → dispatch loop), double
+    claim-attempt events in the event log, and — with ``wal_autocheckpoint=0`` —
+    concurrent manual WAL checkpoints can corrupt index pages. The
+    ``dispatch_in_gateway`` config flag is the primary control; this lock is the
+    backstop that survives config drift and same-profile restart races.
+
+    Delegates to :func:`gateway.status._try_acquire_file_lock` (``fcntl`` on
+    POSIX, ``msvcrt`` on Windows) so the guard is cross-platform.
+
+    Returns ``(handle, "held")`` on success — the caller keeps the file handle
+    for the process lifetime and **must** release it via
+    :func:`_release_singleton_lock` when done. ``(None, "contended")`` when
+    another process holds the lock (caller must NOT dispatch). ``(None,
+    "unavailable")`` when locking cannot be performed (non-POSIX filesystem
+    without flock, or the status.py helpers are unimportable) — caller falls
+    back to config-only control.
+    """
+    try:
+        from gateway.status import _try_acquire_file_lock  # deferred; same package
+    except ImportError:
+        return None, "unavailable"
+    try:
+        Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
+        handle = open(str(lock_path), "a+", encoding="utf-8")
+    except OSError:
+        return None, "unavailable"
+    if not _try_acquire_file_lock(handle):
+        handle.close()
+        return None, "contended"
+    return handle, "held"
+
+
+def _release_singleton_lock(handle) -> None:
+    """Release a dispatcher singleton lock acquired via :func:`_acquire_singleton_lock`."""
+    if handle is None:
+        return
+    try:
+        from gateway.status import _release_file_lock
+        _release_file_lock(handle)
+    except Exception:
+        pass
+    try:
+        handle.close()
+    except Exception:
+        pass
+
+
 class GatewayKanbanWatchersMixin:
     """Kanban watcher / notifier / dispatcher loops for GatewayRunner."""
 
@@ -606,6 +658,31 @@ class GatewayKanbanWatchersMixin:
             logger.warning("kanban dispatcher: kanban_db not importable; dispatcher disabled")
             return
 
+        # Single-dispatcher backstop. dispatch_in_gateway defaults to true, so a
+        # new profile gateway (or a same-profile restart race) can silently
+        # start a second dispatcher; concurrent dispatchers double reclaim
+        # frequency, double claim-attempt events, and — with
+        # wal_autocheckpoint=0 — concurrent manual WAL checkpoints can corrupt
+        # index pages. The lock lives at the machine-global kanban root
+        # (shared across profiles by design), so it serialises ALL gateways.
+        self._kanban_dispatcher_lock_handle = None
+        _lock_path = _kb.kanban_home() / "kanban" / ".dispatcher.lock"
+        _lock_handle, _lock_state = _acquire_singleton_lock(_lock_path)
+        if _lock_state == "contended":
+            logger.info(
+                "kanban dispatcher: another gateway already holds the dispatcher "
+                "lock (%s); this gateway will NOT dispatch.", _lock_path,
+            )
+            return
+        if _lock_state == "held":
+            self._kanban_dispatcher_lock_handle = _lock_handle  # hold for process lifetime
+            logger.info("kanban dispatcher: holding singleton dispatcher lock (%s)", _lock_path)
+        else:
+            logger.warning(
+                "kanban dispatcher: advisory lock unavailable at %s; proceeding "
+                "on config control alone.", _lock_path,
+            )
+
         try:
             interval = float(kanban_cfg.get("dispatch_interval_seconds", 60) or 60)
         except (ValueError, TypeError):
@@ -1052,6 +1129,8 @@ class GatewayKanbanWatchersMixin:
                         last_warn_at = now
             except asyncio.CancelledError:
                 logger.debug("kanban dispatcher: cancelled")
+                _release_singleton_lock(self._kanban_dispatcher_lock_handle)
+                self._kanban_dispatcher_lock_handle = None
                 raise
             except Exception:
                 logger.exception("kanban dispatcher: unexpected watcher error")
@@ -1062,3 +1141,6 @@ class GatewayKanbanWatchersMixin:
             while slept < interval and self._running:
                 await asyncio.sleep(min(1.0, interval - slept))
                 slept += 1.0
+
+        _release_singleton_lock(self._kanban_dispatcher_lock_handle)
+        self._kanban_dispatcher_lock_handle = None
diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index f7e1ba42f85..09d0dc227a2 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -1043,7 +1043,13 @@ class APIServerAdapter(BasePlatformAdapter):
         — matching the semantics of the native gateway's ``session_key``.
         """
         from run_agent import AIAgent
-        from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config, GatewayRunner
+        from gateway.run import (
+            _current_max_iterations,
+            _resolve_runtime_agent_kwargs,
+            _resolve_gateway_model,
+            _load_gateway_config,
+            GatewayRunner,
+        )
         from hermes_cli.tools_config import _get_platform_tools
 
         runtime_kwargs = _resolve_runtime_agent_kwargs()
@@ -1053,7 +1059,7 @@ class APIServerAdapter(BasePlatformAdapter):
         user_config = _load_gateway_config()
         enabled_toolsets = sorted(_get_platform_tools(user_config, "api_server"))
 
-        max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
+        max_iterations = _current_max_iterations()
 
         # Load fallback provider chain so the API server platform has the
         # same fallback behaviour as Telegram/Discord/Slack (fixes #4954).
diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py
index 222adf4c2ea..d9f98282a8d 100644
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@@ -57,6 +57,11 @@ from gateway.platforms.base import (
 
 logger = logging.getLogger(__name__)
 
+# Sentinel returned by _resolve_request_profile when a /p/<profile>/ prefix
+# names a profile this gateway does not serve (→ 404). Distinct from None
+# (no prefix / multiplexing off → handle as the default profile).
+_PROFILE_REJECTED = object()
+
 _BUILTIN_DELIVER_PLATFORMS = {
     "telegram", "discord", "slack", "signal", "sms", "whatsapp",
     "matrix", "mattermost", "homeassistant", "email", "dingtalk",
@@ -189,6 +194,14 @@ class WebhookAdapter(BasePlatformAdapter):
         app = web.Application()
         app.router.add_get("/health", self._handle_health)
         app.router.add_post("/webhooks/{route_name}", self._handle_webhook)
+        # Multi-profile multiplexing: a /p/<profile>/webhooks/<route> prefix
+        # routes the inbound event to that profile. Same handler; the profile is
+        # captured from the path and stamped onto the SessionSource so the agent
+        # turn resolves that profile's config/skills/credentials. Only honored
+        # when gateway.multiplex_profiles is on (the handler validates).
+        app.router.add_post(
+            "/p/{profile}/webhooks/{route_name}", self._handle_webhook
+        )
 
         # Port conflict detection — fail fast if port is already in use
         import socket as _socket
@@ -397,6 +410,35 @@ class WebhookAdapter(BasePlatformAdapter):
         except Exception as e:
             logger.error("[webhook] Failed to reload dynamic routes: %s", e)
 
+    def _resolve_request_profile(self, request: "web.Request"):
+        """Resolve + validate the /p/<profile>/ URL prefix on a webhook request.
+
+        Returns:
+          - ``None`` when no profile prefix is present, or multiplexing is off
+            (the prefix is ignored, request handled as the default profile).
+          - the profile name (str) when present, multiplexing is on, and the
+            profile is one this gateway serves.
+          - ``_PROFILE_REJECTED`` when a prefix is present but the profile is
+            unknown/unconfigured (handler returns 404).
+        """
+        profile = (request.match_info.get("profile") or "").strip()
+        if not profile:
+            return None
+        runner = self.gateway_runner
+        cfg = getattr(runner, "config", None)
+        if not getattr(cfg, "multiplex_profiles", False):
+            # Prefix supplied but multiplexing is off — ignore it, behave as
+            # the single-profile gateway (don't 404 a would-be valid route).
+            return None
+        try:
+            from hermes_cli.profiles import profiles_to_serve
+            served = {name for name, _ in profiles_to_serve(multiplex=True)}
+        except Exception:
+            return _PROFILE_REJECTED
+        if profile not in served:
+            return _PROFILE_REJECTED
+        return profile
+
     async def _handle_webhook(self, request: "web.Request") -> "web.Response":
         """POST /webhooks/{route_name} — receive and process a webhook event."""
         # Hot-reload dynamic subscriptions on each request (mtime-gated, cheap)
@@ -405,6 +447,13 @@ class WebhookAdapter(BasePlatformAdapter):
         route_name = request.match_info.get("route_name", "")
         route_config = self._routes.get(route_name)
 
+        # Multi-profile: resolve + validate the /p/<profile>/ prefix if present.
+        profile = self._resolve_request_profile(request)
+        if profile is _PROFILE_REJECTED:
+            return web.json_response(
+                {"error": "Unknown or unconfigured profile"}, status=404
+            )
+
         if not route_config:
             return web.json_response(
                 {"error": f"Unknown route: {route_name}"}, status=404
@@ -641,6 +690,8 @@ class WebhookAdapter(BasePlatformAdapter):
             user_id=f"webhook:{route_name}",
             user_name=route_name,
         )
+        if profile and isinstance(profile, str):
+            source.profile = profile
         event = MessageEvent(
             text=prompt,
             message_type=MessageType.TEXT,
diff --git a/gateway/relay/adapter.py b/gateway/relay/adapter.py
index fc4e5f40ee7..a1a7826f8f8 100644
--- a/gateway/relay/adapter.py
+++ b/gateway/relay/adapter.py
@@ -57,6 +57,13 @@ class RelayAdapter(BasePlatformAdapter):
         self._transport = transport
         # Capability surface read by stream_consumer (getattr(..., 4096)).
         self.MAX_MESSAGE_LENGTH = descriptor.max_message_length
+        # chat_id -> guild_id (Discord) / workspace scope, learned from inbound
+        # events. The connector's egress guard resolves the owning tenant from
+        # the OUTBOUND action's metadata.guild_id; the gateway's generic delivery
+        # path (run.py _thread_metadata_for_source) only carries thread_id, so we
+        # re-attach the scope here from what we saw inbound. Keyed by chat_id
+        # (channel) since that's what send() receives. See routedEgressGuard.ts.
+        self._scope_by_chat: Dict[str, str] = {}
         self.supports_code_blocks = descriptor.markdown_dialect not in ("", "plain")
 
     # ── capability surface (from descriptor) ─────────────────────────────
@@ -108,8 +115,35 @@ class RelayAdapter(BasePlatformAdapter):
 
     async def _on_inbound(self, event) -> None:
         """Bridge a connector-delivered MessageEvent into the normal adapter path."""
+        self._capture_scope(event)
         await self.handle_message(event)
 
+    def _capture_scope(self, event) -> None:
+        """Remember chat_id -> guild scope from an inbound event so our outbound
+        (the agent's reply) can re-assert it for the connector's egress tenant
+        resolution. Never raises — scope tracking must not break inbound."""
+        try:
+            src = getattr(event, "source", None)
+            scope = getattr(src, "guild_id", None) if src else None
+            chat = getattr(src, "chat_id", None) if src else None
+            if scope and chat:
+                self._scope_by_chat[str(chat)] = str(scope)
+        except Exception:  # noqa: BLE001 - scope tracking must never break inbound
+            pass
+
+    def _with_scope(self, chat_id: str, metadata: Optional[Dict[str, Any]]) -> Dict[str, Any]:
+        """Ensure the outbound metadata carries guild_id for the connector's
+        egress tenant resolution. The connector resolves the owning tenant from
+        metadata.guild_id (Discord); without it egress is declined as
+        'target not routed to an onboarded tenant'. No-op when we have no scope
+        for this chat (e.g. DMs) or it's already present."""
+        meta: Dict[str, Any] = dict(metadata or {})
+        if not meta.get("guild_id"):
+            scope = self._scope_by_chat.get(str(chat_id))
+            if scope:
+                meta["guild_id"] = scope
+        return meta
+
     async def on_interrupt(self, session_key: str, chat_id: str) -> None:
         """Bridge a connector-delivered /stop into the adapter's interrupt path.
 
@@ -140,7 +174,7 @@ class RelayAdapter(BasePlatformAdapter):
                 "chat_id": chat_id,
                 "content": content,
                 "reply_to": reply_to,
-                "metadata": metadata or {},
+                "metadata": self._with_scope(chat_id, metadata),
             }
         )
         return SendResult(
diff --git a/gateway/relay/ws_transport.py b/gateway/relay/ws_transport.py
index b2e8eda09cd..b091d44faa8 100644
--- a/gateway/relay/ws_transport.py
+++ b/gateway/relay/ws_transport.py
@@ -54,6 +54,35 @@ _HANDSHAKE_TIMEOUT_S = 30.0
 _OUTBOUND_TIMEOUT_S = 30.0
 
 
+def _ws_dial_url(url: str) -> str:
+    """Normalize a connector URL to the ``ws(s)://…/relay`` dial target.
+
+    The relay URL is configured once (``GATEWAY_RELAY_URL`` / ``gateway.relay_url``)
+    as the connector's BASE URL (e.g. ``https://connector.example``) and shared by
+    both the provision POST (which needs ``http(s)://…/relay/provision`` — see
+    ``_provision_url``) and the WS dial (which needs ``ws(s)://…/relay``, the path
+    the connector mounts its ``WebSocketServer`` on). Two normalizations, both
+    load-bearing:
+
+      - scheme: ``https -> wss``, ``http -> ws`` (``websockets.connect`` raises
+        "scheme isn't ws or wss" on an http(s) URL).
+      - path: ensure it ends in ``/relay`` (the connector returns HTTP 400 on an
+        upgrade to any other path, since the WS server is mounted at ``/relay``).
+
+    Idempotent: an already-``ws(s)://…/relay`` URL is returned unchanged, so a URL
+    configured WITH the scheme and/or ``/relay`` still works.
+    """
+    raw = (url or "").strip()
+    if raw.startswith("https://"):
+        raw = "wss://" + raw[len("https://"):]
+    elif raw.startswith("http://"):
+        raw = "ws://" + raw[len("http://"):]
+    raw = raw.rstrip("/")
+    if not raw.endswith("/relay"):
+        raw = f"{raw}/relay"
+    return raw
+
+
 def _event_from_wire(raw: Dict[str, Any]) -> MessageEvent:
     """Rebuild a MessageEvent from the connector's normalized inbound payload.
 
@@ -118,7 +147,7 @@ class WebSocketRelayTransport:
                 "WebSocketRelayTransport requires the 'websockets' package "
                 "(install the messaging extra)."
             )
-        self._url = url
+        self._url = _ws_dial_url(url)
         self._platform = platform
         self._bot_id = bot_id
         self._connect_timeout_s = connect_timeout_s
diff --git a/gateway/run.py b/gateway/run.py
index b478576546b..0a594609b7f 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -195,6 +195,19 @@ def _gateway_platform_value(platform: Any) -> str:
     return str(getattr(platform, "value", platform) or "").strip().lower()
 
 
+def _non_conversational_metadata(
+    metadata: Optional[Dict[str, Any]] = None,
+    *,
+    platform: Any = None,
+) -> Optional[Dict[str, Any]]:
+    """Mark Discord lifecycle/status sends without changing other platforms."""
+    if _gateway_platform_value(platform) != "discord":
+        return metadata
+    merged = dict(metadata or {})
+    merged["non_conversational"] = True
+    return merged
+
+
 def _is_transient_network_error(exc: BaseException) -> bool:
     """Return True for transient network errors safe to log + swallow.
 
@@ -1173,13 +1186,31 @@ def _reload_runtime_env_preserving_config_authority() -> None:
     pick up rotated API keys. config.yaml remains authoritative for agent budget
     settings such as agent.max_turns; otherwise a stale HERMES_MAX_ITERATIONS in
     .env can replace the startup bridge on later turns.
+
+    In multiplex mode this is a NO-OP for the credential reload: secrets come
+    from the per-turn ``set_secret_scope`` (installed by ``_profile_runtime_scope``)
+    which loads the routed profile's ``.env`` into an isolated mapping. Mutating
+    the process-global ``os.environ`` here would defeat that isolation and leak
+    the default profile's keys to every profile's turns and subprocesses.
     """
+    from agent.secret_scope import is_multiplex_active
+    if is_multiplex_active():
+        # Credentials are resolved from the active profile's secret scope, not
+        # os.environ. Still honor config.yaml's agent.max_turns bridge below
+        # using the scoped home, but never reload .env into global env.
+        _bridge_max_turns_from_config(_hermes_home)
+        return
+
     load_hermes_dotenv(
         hermes_home=_hermes_home,
         project_env=Path(__file__).resolve().parents[1] / '.env',
     )
+    _bridge_max_turns_from_config(_hermes_home)
 
-    config_path = _hermes_home / 'config.yaml'
+
+def _bridge_max_turns_from_config(home: "Path") -> None:
+    """Bridge config.yaml agent.max_turns into HERMES_MAX_ITERATIONS (a global)."""
+    config_path = home / 'config.yaml'
     if not config_path.exists():
         return
     try:
@@ -1196,6 +1227,80 @@ def _reload_runtime_env_preserving_config_authority() -> None:
         os.environ["HERMES_MAX_ITERATIONS"] = str(agent_cfg["max_turns"])
 
 
+def _current_max_iterations() -> int:
+    """Return the current per-turn iteration budget after runtime env refresh."""
+    _reload_runtime_env_preserving_config_authority()
+    try:
+        return int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
+    except (TypeError, ValueError):
+        return 90
+
+
+from contextlib import contextmanager as _contextmanager
+
+
+# Platforms that bind a host TCP port (HTTP/webhook listeners). In a profile
+# multiplexer the default profile owns the single shared listener and serves
+# every profile through the /p/<profile>/ URL prefix, so a SECONDARY profile
+# enabling one of these is always a misconfiguration: it would try to bind a
+# port already held by the default's listener. We hard-error on it rather than
+# silently dropping the adapter (see _start_one_profile_adapters).
+# Stored as platform .value strings since the Platform enum is imported below.
+_PORT_BINDING_PLATFORM_VALUES = frozenset({
+    "webhook",
+    "api_server",
+    "msgraph_webhook",
+    "feishu",
+    "wecom_callback",
+    "bluebubbles",
+    "sms",
+})
+
+
+class MultiplexConfigError(RuntimeError):
+    """A profile multiplexer config is invalid (fail-fast at startup).
+
+    Distinct from a transient adapter-connect failure: a transient error is
+    logged and the gateway stays alive to retry, but a config error means the
+    operator must fix config.yaml, so it aborts startup cleanly.
+    """
+
+
+@_contextmanager
+def _profile_runtime_scope(profile_home: "Path"):
+    """Scope config/skills/memory AND credentials to a profile for one turn.
+
+    Combines the two seams the multiplexer needs:
+      1. ``set_hermes_home_override`` — redirects ``get_hermes_home()`` (config,
+         skills, memory, SOUL, sessions) to the profile's home. Contextvar, so
+         it propagates into the agent worker thread via ``copy_context()``.
+      2. ``set_secret_scope`` — installs the profile's ``.env`` secrets as the
+         authoritative credential source, so ``get_secret`` reads this profile's
+         keys and never the process-global ``os.environ`` (which in a
+         multiplexer may hold another profile's values).
+
+    Only used on the multiplexed inbound path. Single-profile gateways never
+    enter this scope, so their behavior is unchanged. Loading the profile's
+    ``.env`` here does NOT mutate ``os.environ`` — ``build_profile_secret_scope``
+    returns an isolated dict — which is what keeps subprocesses (MCP, kanban)
+    from inheriting cross-profile secrets.
+    """
+    from hermes_constants import set_hermes_home_override, reset_hermes_home_override
+    from agent.secret_scope import (
+        build_profile_secret_scope,
+        set_secret_scope,
+        reset_secret_scope,
+    )
+
+    home_token = set_hermes_home_override(str(profile_home))
+    secret_token = set_secret_scope(build_profile_secret_scope(Path(profile_home)))
+    try:
+        yield
+    finally:
+        reset_secret_scope(secret_token)
+        reset_hermes_home_override(home_token)
+
+
 _DOCKER_VOLUME_SPEC_RE = re.compile(r"^(?P<host>.+):(?P<container>/[^:]+?)(?::(?P<options>[^:]+))?$")
 _DOCKER_MEDIA_OUTPUT_CONTAINER_PATHS = {"/output", "/outputs"}
 
@@ -2240,7 +2345,22 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
     def __init__(self, config: Optional[GatewayConfig] = None):
         global _gateway_runner_ref
         self.config = config or load_gateway_config()
+        # Mark the process as a profile multiplexer when configured. This flips
+        # agent.secret_scope.get_secret() to fail-closed on any unscoped
+        # credential read, so a missed migration crashes loudly instead of
+        # leaking a cross-profile value (Workstream A). Inert when off.
+        try:
+            from agent.secret_scope import set_multiplex_active
+            set_multiplex_active(bool(getattr(self.config, "multiplex_profiles", False)))
+        except Exception:
+            logger.debug("could not set multiplex-active flag", exc_info=True)
         self.adapters: Dict[Platform, BasePlatformAdapter] = {}
+        # Multi-profile multiplexing: adapters for NON-default profiles live
+        # here, keyed by profile name then Platform. self.adapters stays the
+        # default/active profile's map so the ~93 existing self.adapters[...]
+        # sites are untouched when multiplexing is off (this dict is empty).
+        # Populated by _start_secondary_profile_adapters().
+        self._profile_adapters: Dict[str, Dict[Platform, BasePlatformAdapter]] = {}
         self._warn_if_docker_media_delivery_is_risky()
         _gateway_runner_ref = _weakref.ref(self)
 
@@ -2792,10 +2912,24 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
             except Exception:
                 pass
         config = getattr(self, "config", None)
+        # Mirror SessionStore._resolve_profile_for_key so this fallback path
+        # produces the same namespace as the primary path: None (legacy
+        # agent:main) unless multiplexing is on, then the active profile.
+        _profile = None
+        if getattr(config, "multiplex_profiles", False):
+            if source.profile:
+                _profile = source.profile
+            else:
+                try:
+                    from hermes_cli.profiles import get_active_profile_name
+                    _profile = get_active_profile_name() or "default"
+                except Exception:
+                    _profile = None
         return build_session_key(
             source,
             group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
             thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
+            profile=_profile,
         )
 
     def _telegram_topic_mode_enabled(self, source: SessionSource) -> bool:
@@ -5335,7 +5469,30 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                     "attempts": 1,
                     "next_retry": time.monotonic() + 30,
                 }
-        
+
+        # Multi-profile multiplexing: bring up adapters for every OTHER profile
+        # this gateway serves. Each profile's adapters connect under that
+        # profile's home + credential scope and stamp their inbound events with
+        # the profile so the agent turn resolves correctly. No-op when off.
+        try:
+            _secondary_connected = await self._start_secondary_profile_adapters()
+            connected_count += _secondary_connected
+        except MultiplexConfigError as e:
+            # Invalid multiplexer config — abort startup cleanly so the operator
+            # fixes config.yaml rather than running a half-wired gateway.
+            reason = str(e)
+            logger.error("Gateway multiplexer config error: %s", reason)
+            try:
+                from gateway.status import write_runtime_status
+                write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
+            except Exception:
+                pass
+            self._request_clean_exit(reason)
+            self._startup_restore_in_progress = False
+            return True
+        except Exception as e:
+            logger.error("Secondary-profile adapter startup failed: %s", e, exc_info=True)
+
         if connected_count == 0:
             if startup_nonretryable_errors:
                 reason = "; ".join(startup_nonretryable_errors)
@@ -6342,6 +6499,22 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                         time.monotonic() - _adapter_started_at,
                         e,
                     )
+
+            # Disconnect secondary-profile adapters (multiplex mode).
+            for _prof, _amap in list(getattr(self, "_profile_adapters", {}).items()):
+                for platform, adapter in list(_amap.items()):
+                    try:
+                        await adapter.cancel_background_tasks()
+                    except Exception as e:
+                        logger.debug("✗ %s bg-cancel error (profile %s): %s", platform.value, _prof, e)
+                    try:
+                        await adapter.disconnect()
+                        logger.info("✓ %s disconnected (profile: %s)", platform.value, _prof)
+                    except Exception as e:
+                        logger.error("✗ %s disconnect error (profile %s): %s", platform.value, _prof, e)
+                _amap.clear()
+            if hasattr(self, "_profile_adapters"):
+                self._profile_adapters.clear()
             logger.info(
                 "Shutdown phase: all adapters disconnected at +%.2fs",
                 _phase_elapsed(),
@@ -6511,6 +6684,175 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
         """Wait for shutdown signal."""
         await self._shutdown_event.wait()
 
+    async def _start_secondary_profile_adapters(self) -> int:
+        """Bring up adapters for every non-active profile this gateway serves.
+
+        Returns the number of secondary adapters that connected. No-op (returns
+        0) unless ``gateway.multiplex_profiles`` is on.
+
+        Each profile's adapters are created and connected under that profile's
+        HERMES_HOME + secret scope (``_profile_runtime_scope``), stored in
+        ``self._profile_adapters[profile]``, and given a message handler that
+        stamps ``source.profile`` before delegating to the shared
+        ``_handle_message`` — so the agent turn resolves that profile's config,
+        skills, and credentials. Same-platform credential collisions (two
+        profiles polling the same bot token) are detected and refused here, the
+        only point that sees every profile's resolved credentials together.
+        """
+        if not getattr(self.config, "multiplex_profiles", False):
+            return 0
+
+        try:
+            from hermes_cli.profiles import profiles_to_serve, get_active_profile_name
+        except Exception:
+            return 0
+
+        active = get_active_profile_name() or "default"
+        connected = 0
+        # (platform, token-fingerprint) -> profile that claimed it. Detects two
+        # profiles trying to poll the same bot credential (impossible to do
+        # concurrently). Seed with the active profile's adapters.
+        claimed: Dict[tuple, str] = {}
+        for _plat, _ad in self.adapters.items():
+            fp = self._adapter_credential_fingerprint(_ad)
+            if fp is not None:
+                claimed[(_plat, fp)] = active
+
+        for profile_name, profile_home in profiles_to_serve(multiplex=True):
+            if profile_name == active:
+                continue  # handled by the primary startup loop
+            try:
+                connected += await self._start_one_profile_adapters(
+                    profile_name, profile_home, claimed
+                )
+            except MultiplexConfigError:
+                # Config error (e.g. a secondary profile binding a port) is not
+                # transient — propagate so startup aborts cleanly instead of
+                # limping along with a half-configured multiplexer.
+                raise
+            except Exception as e:
+                logger.error(
+                    "Failed to start adapters for profile '%s': %s",
+                    profile_name, e, exc_info=True,
+                )
+
+        # Record served profiles in runtime status for `hermes status`.
+        try:
+            from gateway.status import write_runtime_status
+            served = [active] + sorted(self._profile_adapters.keys())
+            write_runtime_status(served_profiles=served)
+        except Exception:
+            logger.debug("could not record served_profiles", exc_info=True)
+
+        return connected
+
+    async def _start_one_profile_adapters(
+        self, profile_name: str, profile_home: "Path", claimed: Dict[tuple, str]
+    ) -> int:
+        """Create+connect one profile's adapters under its runtime scope."""
+        from gateway.config import load_gateway_config
+
+        with _profile_runtime_scope(profile_home):
+            profile_cfg = load_gateway_config()
+
+        profile_map = self._profile_adapters.setdefault(profile_name, {})
+        connected = 0
+        for platform, platform_config in profile_cfg.platforms.items():
+            if not platform_config.enabled:
+                continue
+            # A secondary profile must NOT enable a port-binding platform: the
+            # default profile's listener already serves every profile via the
+            # /p/<profile>/ prefix, so a second bind can only collide. This is a
+            # config error, not a transient failure — fail fast and loud.
+            if platform.value in _PORT_BINDING_PLATFORM_VALUES:
+                raise MultiplexConfigError(
+                    f"Profile '{profile_name}' enables the port-binding platform "
+                    f"'{platform.value}', but gateway.multiplex_profiles is on. The "
+                    f"default profile owns the single shared HTTP listener and "
+                    f"serves every profile through the /p/{profile_name}/ URL "
+                    f"prefix — a secondary profile cannot bind its own port. "
+                    f"Remove platforms.{platform.value} from profile "
+                    f"'{profile_name}'s config.yaml (configure it only on the "
+                    f"default profile)."
+                )
+            with _profile_runtime_scope(profile_home):
+                adapter = self._create_adapter(platform, platform_config)
+            if not adapter:
+                continue
+
+            # Same-token conflict detection — refuse a duplicate poll.
+            fp = self._adapter_credential_fingerprint(adapter)
+            if fp is not None:
+                owner = claimed.get((platform, fp))
+                if owner is not None:
+                    logger.error(
+                        "Profile '%s' and '%s' both configure %s with the same "
+                        "credential — refusing to start the duplicate (a single "
+                        "bot token cannot be polled twice). Give each profile its "
+                        "own %s credential.",
+                        owner, profile_name, platform.value, platform.value,
+                    )
+                    await self._safe_adapter_disconnect(adapter, platform)
+                    continue
+                claimed[(platform, fp)] = profile_name
+
+            # Stamp every inbound event from this adapter with its profile so
+            # the agent turn (and session key) resolve to the right home.
+            adapter.set_message_handler(
+                self._make_profile_message_handler(profile_name)
+            )
+            adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
+            adapter.set_session_store(self.session_store)
+            adapter.set_busy_session_handler(self._handle_active_session_busy_message)
+            adapter.set_topic_recovery_fn(self._recover_telegram_topic_thread_id)
+            adapter._busy_text_mode = self._busy_text_mode
+
+            try:
+                with _profile_runtime_scope(profile_home):
+                    success = await self._connect_adapter_with_timeout(adapter, platform)
+                if success:
+                    profile_map[platform] = adapter
+                    connected += 1
+                    logger.info("✓ %s connected (profile: %s)", platform.value, profile_name)
+                else:
+                    logger.warning("✗ %s failed to connect (profile: %s)", platform.value, profile_name)
+                    await self._safe_adapter_disconnect(adapter, platform)
+            except Exception as e:
+                logger.error("✗ %s error (profile: %s): %s", platform.value, profile_name, e)
+                await self._safe_adapter_disconnect(adapter, platform)
+        return connected
+
+    def _make_profile_message_handler(self, profile_name: str):
+        """Return a message handler that stamps source.profile then delegates."""
+        async def _handler(event):
+            try:
+                if getattr(event, "source", None) is not None and not event.source.profile:
+                    event.source.profile = profile_name
+            except Exception:
+                pass
+            return await self._handle_message(event)
+        return _handler
+
+    @staticmethod
+    def _adapter_credential_fingerprint(adapter: Any) -> Optional[str]:
+        """Return a stable, log-safe fingerprint of an adapter's credential.
+
+        Used only to detect two profiles claiming the same bot token. Returns a
+        salted hash (never the token itself) of the adapter's primary
+        credential, or None when no credential is discoverable (in which case
+        we don't attempt conflict detection for it).
+        """
+        token = None
+        for attr in ("token", "bot_token", "_token", "api_token", "_bot_token"):
+            val = getattr(adapter, attr, None)
+            if isinstance(val, str) and val.strip():
+                token = val.strip()
+                break
+        if not token:
+            return None
+        import hashlib
+        return hashlib.sha256(("hermes-mux:" + token).encode("utf-8")).hexdigest()[:16]
+
     def _create_adapter(
         self, 
         platform: Platform, 
@@ -10633,7 +10975,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
             disabled_toolsets = agent_cfg.get("disabled_toolsets") or None
 
             pr = self._provider_routing
-            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
+            max_iterations = _current_max_iterations()
             reasoning_config = self._resolve_session_reasoning_config(source=source)
             self._reasoning_config = reasoning_config
             self._service_tier = self._load_service_tier()
@@ -11737,7 +12079,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
             chunks = [clean[i:i + max_chunk] for i in range(0, len(clean), max_chunk)]
             for chunk in chunks:
                 try:
-                    await adapter.send(chat_id, f"```\n{chunk}\n```", metadata=metadata)
+                    await adapter.send(
+                        chat_id,
+                        f"```\n{chunk}\n```",
+                        metadata=_non_conversational_metadata(metadata, platform=platform),
+                    )
                 except Exception as e:
                     logger.debug("Update stream send failed: %s", e)
 
@@ -11760,12 +12106,16 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                     exit_code_raw = exit_code_path.read_text().strip() or "1"
                     exit_code = int(exit_code_raw)
                     if exit_code == 0:
-                        await adapter.send(chat_id, "✅ Hermes update finished.", metadata=metadata)
+                        await adapter.send(
+                            chat_id,
+                            "✅ Hermes update finished.",
+                            metadata=_non_conversational_metadata(metadata, platform=platform),
+                        )
                     else:
                         await adapter.send(
                             chat_id,
                             "❌ Hermes update failed (exit code {}).".format(exit_code),
-                            metadata=metadata,
+                            metadata=_non_conversational_metadata(metadata, platform=platform),
                         )
                     logger.info("Update finished (exit=%s), notified %s", exit_code, session_key)
                 except Exception as e:
@@ -11816,7 +12166,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                                     prompt=prompt_text,
                                     default=default,
                                     session_key=session_key,
-                                    metadata=metadata,
+                                    metadata=_non_conversational_metadata(metadata, platform=platform),
                                 )
                                 sent_buttons = True
                             except Exception as btn_err:
@@ -11830,7 +12180,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                                 f"{prompt_text}{default_hint}\n\n"
                                 f"Reply `{_p}approve` (yes) or `{_p}deny` (no), "
                                 f"or type your answer directly.",
-                                metadata=metadata,
+                                metadata=_non_conversational_metadata(metadata, platform=platform),
                             )
                         # Keep the prompt marker on disk until the user
                         # answers. If the gateway restarts mid-prompt, the
@@ -11854,7 +12204,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                 await adapter.send(
                     chat_id,
                     "❌ Hermes update timed out after 30 minutes.",
-                    metadata=metadata,
+                    metadata=_non_conversational_metadata(metadata, platform=platform),
                 )
             except Exception:
                 pass
@@ -11960,7 +12310,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                     msg = "✅ Hermes update finished successfully."
                 else:
                     msg = "❌ Hermes update failed. Check the gateway logs or run `hermes update` manually for details."
-                await adapter.send(chat_id, msg, metadata=metadata)
+                await adapter.send(
+                    chat_id,
+                    msg,
+                    metadata=_non_conversational_metadata(metadata, platform=platform),
+                )
                 logger.info(
                     "Sent post-update notification to %s:%s (exit=%s)",
                     platform_str,
@@ -12023,7 +12377,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
             result = await adapter.send(
                 str(chat_id),
                 "♻ Gateway restarted successfully. Your session continues.",
-                metadata=metadata,
+                metadata=_non_conversational_metadata(metadata, platform=platform),
             )
             # adapter.send() catches provider errors (e.g. "Chat not found")
             # and returns SendResult(success=False) rather than raising, so
@@ -12090,9 +12444,21 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                     adapter=adapter,
                 )
                 if metadata:
-                    result = await adapter.send(str(home.chat_id), message, metadata=metadata)
+                    result = await adapter.send(
+                        str(home.chat_id),
+                        message,
+                        metadata=_non_conversational_metadata(metadata, platform=platform),
+                    )
                 else:
-                    result = await adapter.send(str(home.chat_id), message)
+                    _startup_meta = _non_conversational_metadata(platform=platform)
+                    if _startup_meta:
+                        result = await adapter.send(
+                            str(home.chat_id),
+                            message,
+                            metadata=_startup_meta,
+                        )
+                    else:
+                        result = await adapter.send(str(home.chat_id), message)
                 if result is not None and getattr(result, "success", True) is False:
                     logger.warning(
                         "Home-channel startup notification failed for %s:%s: %s",
@@ -12733,7 +13099,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                     if adapter and chat_id:
                         try:
                             send_meta = {"thread_id": thread_id} if thread_id else None
-                            await adapter.send(chat_id, message_text, metadata=send_meta)
+                            await adapter.send(
+                                chat_id,
+                                message_text,
+                                metadata=_non_conversational_metadata(send_meta, platform=platform_name),
+                            )
                         except Exception as e:
                             logger.error("Watcher delivery error: %s", e)
                 break
@@ -12754,7 +13124,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                 if adapter and chat_id:
                     try:
                         send_meta = {"thread_id": thread_id} if thread_id else None
-                        await adapter.send(chat_id, message_text, metadata=send_meta)
+                        await adapter.send(
+                            chat_id,
+                            message_text,
+                            metadata=_non_conversational_metadata(send_meta, platform=platform_name),
+                        )
                     except Exception as e:
                         logger.error("Watcher delivery error: %s", e)
 
@@ -13740,6 +14114,64 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
         channel_prompt: Optional[str] = None,
         persist_user_message: Optional[str] = None,
         persist_user_timestamp: Optional[float] = None,
+    ) -> Dict[str, Any]:
+        """Profile-scoping wrapper around the agent run.
+
+        When multiplexing is active, resolve the inbound source's profile and
+        run the whole turn inside ``_profile_runtime_scope`` so config/skills/
+        memory resolve to that profile's home AND credentials resolve from that
+        profile's secret scope (never the process-global ``os.environ``). When
+        multiplexing is off this is a transparent pass-through — zero behavior
+        change for single-profile gateways.
+        """
+        if not getattr(getattr(self, "config", None), "multiplex_profiles", False):
+            return await self._run_agent_inner(
+                message, context_prompt, history, source, session_id,
+                session_key=session_key, run_generation=run_generation,
+                _interrupt_depth=_interrupt_depth, event_message_id=event_message_id,
+                channel_prompt=channel_prompt, persist_user_message=persist_user_message,
+                persist_user_timestamp=persist_user_timestamp,
+            )
+
+        profile_home = self._resolve_profile_home_for_source(source)
+        with _profile_runtime_scope(profile_home):
+            return await self._run_agent_inner(
+                message, context_prompt, history, source, session_id,
+                session_key=session_key, run_generation=run_generation,
+                _interrupt_depth=_interrupt_depth, event_message_id=event_message_id,
+                channel_prompt=channel_prompt, persist_user_message=persist_user_message,
+                persist_user_timestamp=persist_user_timestamp,
+            )
+
+    def _resolve_profile_home_for_source(self, source: SessionSource) -> "Path":
+        """Resolve which profile's HERMES_HOME should serve this inbound source.
+
+        Prefers the profile the source was routed to (``source.profile`` — set
+        by the /p/<profile>/ URL prefix or a per-credential adapter), falling
+        back to the active profile (the multiplexer's own home).
+        """
+        from hermes_cli.profiles import get_active_profile_name, get_profile_dir
+        try:
+            name = (source.profile or "").strip() or get_active_profile_name() or "default"
+            return get_profile_dir(name)
+        except Exception:
+            from hermes_constants import get_hermes_home
+            return get_hermes_home()
+
+    async def _run_agent_inner(
+        self,
+        message: str,
+        context_prompt: str,
+        history: List[Dict[str, Any]],
+        source: SessionSource,
+        session_id: str,
+        session_key: str = None,
+        run_generation: Optional[int] = None,
+        _interrupt_depth: int = 0,
+        event_message_id: Optional[str] = None,
+        channel_prompt: Optional[str] = None,
+        persist_user_message: Optional[str] = None,
+        persist_user_timestamp: Optional[float] = None,
     ) -> Dict[str, Any]:
         """
         Run the agent with the given message and context.
@@ -14135,6 +14567,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
             if _progress_thread_id == source.thread_id
             else {"thread_id": _progress_thread_id}
         ) if _progress_thread_id else None
+        _progress_metadata = _non_conversational_metadata(_progress_metadata, platform=source.platform)
         _progress_reply_to = (
             event_message_id
             if source.platform in (Platform.FEISHU, Platform.MATTERMOST) and source.thread_id and event_message_id
@@ -14581,9 +15014,6 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
             # session_key is now set via contextvars in _set_session_env()
             # (concurrency-safe). Keep os.environ as fallback for CLI/cron.
             os.environ["HERMES_SESSION_KEY"] = session_key or ""
-
-            # Read from env var or use default (same as CLI)
-            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
             
             # Map platform enum to the platform hint key the agent understands.
             # Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
@@ -14598,10 +15028,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
             if self._ephemeral_system_prompt:
                 combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip()
 
-            # Re-read .env and config for fresh credentials (gateway is long-lived,
-            # keys may change without restart). Keep config.yaml authoritative for
-            # runtime budget settings bridged into env vars.
-            _reload_runtime_env_preserving_config_authority()
+            max_iterations = _current_max_iterations()
 
             try:
                 model, runtime_kwargs = self._resolve_session_agent_runtime(
@@ -14799,6 +15226,9 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                                 except KeyError:
                                     pass
                             self._init_cached_agent_for_turn(agent, _interrupt_depth)
+                            # Refresh agent max_iterations from current config
+                            # (cached agent may have been created with old config)
+                            agent.max_iterations = max_iterations
                             logger.debug("Reusing cached agent for session %s", session_key)
 
             if agent is None:
@@ -14900,7 +15330,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                     _status_adapter.send(
                         _status_chat_id,
                         message,
-                        metadata=_status_thread_metadata,
+                        metadata=_non_conversational_metadata(_status_thread_metadata, platform=source.platform),
                     ),
                     _loop_for_step,
                     logger=logger,
@@ -15742,7 +16172,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                         _notify_res = await _notify_adapter.send(
                             source.chat_id,
                             _heartbeat_text,
-                            metadata=_status_thread_metadata,
+                            metadata=_non_conversational_metadata(_status_thread_metadata, platform=source.platform),
                         )
                         if getattr(_notify_res, "success", False) and getattr(
                             _notify_res, "message_id", None
diff --git a/gateway/session.py b/gateway/session.py
index f48b83fed0c..d07c65ec29f 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -92,6 +92,11 @@ class SessionSource:
     parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
     message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
     role_authorized: bool = False  # True when adapter granted access via role (not user ID)
+    # Profile this inbound message is routed to in a multiplexing gateway
+    # (from the /p/<profile>/ URL prefix or per-credential adapter ownership).
+    # None => the gateway's active/default profile. Drives both session-key
+    # namespacing and the per-turn config/credential scope.
+    profile: Optional[str] = None
     
     @property
     def description(self) -> str:
@@ -135,6 +140,8 @@ class SessionSource:
             d["parent_chat_id"] = self.parent_chat_id
         if self.message_id:
             d["message_id"] = self.message_id
+        if self.profile:
+            d["profile"] = self.profile
         return d
 
     @classmethod
@@ -153,6 +160,7 @@ class SessionSource:
             guild_id=data.get("guild_id"),
             parent_chat_id=data.get("parent_chat_id"),
             message_id=data.get("message_id"),
+            profile=data.get("profile"),
         )
     
 
@@ -615,15 +623,41 @@ def is_shared_multi_user_session(
     return not group_sessions_per_user
 
 
+def _session_key_namespace(profile: Optional[str]) -> str:
+    """Return the ``agent:<ns>`` namespace prefix for a session key.
+
+    The historical key format is ``agent:main:<platform>:<chat_type>:...`` where
+    ``main`` is a static namespace literal (NOT a branch name — branching keys
+    off ``session_id``, not this slot). Multi-profile multiplexing reuses this
+    slot to carry the profile:
+
+    - default profile (or ``None``/``""``/``"default"``) → ``agent:main`` —
+      BYTE-IDENTICAL to every key ever generated, so existing sessions and all
+      positional parsers (``parts[2]`` == platform, etc.) are unaffected.
+    - named profile ``coder`` → ``agent:coder`` — keeps the same positional
+      layout, just a different namespace, so two profiles serving the same
+      platform/chat never collide.
+    """
+    if not profile or profile == "default":
+        return "agent:main"
+    return f"agent:{profile}"
+
+
 def build_session_key(
     source: SessionSource,
     group_sessions_per_user: bool = True,
     thread_sessions_per_user: bool = False,
+    profile: Optional[str] = None,
 ) -> str:
     """Build a deterministic session key from a message source.
 
     This is the single source of truth for session key construction.
 
+    ``profile`` selects the key namespace (see :func:`_session_key_namespace`).
+    It defaults to ``None`` ⇒ the legacy ``agent:main`` namespace, so callers
+    that don't multiplex produce byte-identical keys to before. Only the
+    multiplexing gateway passes a non-default profile.
+
     DM rules:
       - DMs include chat_id when present, so each private conversation is isolated.
       - thread_id further differentiates threaded DMs within the same DM chat.
@@ -643,6 +677,7 @@ def build_session_key(
         shared session per chat.
       - Without identifiers, messages fall back to one session per platform/chat_type.
     """
+    ns = _session_key_namespace(profile)
     platform = source.platform.value
     if source.chat_type == "dm":
         dm_chat_id = source.chat_id
@@ -651,12 +686,12 @@ def build_session_key(
 
         if dm_chat_id:
             if source.thread_id:
-                return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
-            return f"agent:main:{platform}:dm:{dm_chat_id}"
+                return f"{ns}:{platform}:dm:{dm_chat_id}:{source.thread_id}"
+            return f"{ns}:{platform}:dm:{dm_chat_id}"
         # No chat_id — fall back to the sender's own identifier before the
         # bare per-platform sink.  Without this, every DM from every user that
         # arrives without a chat_id (non-standard adapters / synthetic sources)
-        # collapses into one shared "agent:main:<platform>:dm" session, and a
+        # collapses into one shared "<ns>:<platform>:dm" session, and a
         # single cached agent ends up serving multiple people's conversations —
         # cross-user history bleed.  participant_id keeps DMs isolated per user.
         dm_participant_id = source.user_id_alt or source.user_id
@@ -667,11 +702,11 @@ def build_session_key(
             )
         if dm_participant_id:
             if source.thread_id:
-                return f"agent:main:{platform}:dm:{dm_participant_id}:{source.thread_id}"
-            return f"agent:main:{platform}:dm:{dm_participant_id}"
+                return f"{ns}:{platform}:dm:{dm_participant_id}:{source.thread_id}"
+            return f"{ns}:{platform}:dm:{dm_participant_id}"
         if source.thread_id:
-            return f"agent:main:{platform}:dm:{source.thread_id}"
-        return f"agent:main:{platform}:dm"
+            return f"{ns}:{platform}:dm:{source.thread_id}"
+        return f"{ns}:{platform}:dm"
 
     participant_id = source.user_id_alt or source.user_id
     if participant_id and source.platform == Platform.WHATSAPP:
@@ -679,7 +714,7 @@ def build_session_key(
         # single group member gets two isolated per-user sessions when the
         # bridge reshuffles alias forms.
         participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
-    key_parts = ["agent:main", platform, source.chat_type]
+    key_parts = [ns, platform, source.chat_type]
 
     if source.chat_id:
         key_parts.append(source.chat_id)
@@ -775,12 +810,32 @@ class SessionStore:
                 logger.debug("Could not remove temp file %s: %s", tmp_path, e)
             raise
     
+    def _resolve_profile_for_key(self, source: Optional[SessionSource] = None) -> Optional[str]:
+        """Return the profile namespace for session keys, or None when off.
+
+        When ``multiplex_profiles`` is disabled (default), returns ``None`` so
+        keys stay in the legacy ``agent:main`` namespace — byte-identical to
+        before. When enabled, prefers the profile the inbound source was routed
+        to (``source.profile`` — set by the /p/<profile>/ URL prefix or
+        per-credential adapter), falling back to the active profile name.
+        """
+        if not getattr(self.config, "multiplex_profiles", False):
+            return None
+        if source is not None and source.profile:
+            return source.profile
+        try:
+            from hermes_cli.profiles import get_active_profile_name
+            return get_active_profile_name() or "default"
+        except Exception:
+            return None
+
     def _generate_session_key(self, source: SessionSource) -> str:
         """Generate a session key from a source."""
         return build_session_key(
             source,
             group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
             thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
+            profile=self._resolve_profile_for_key(source),
         )
     
     def _is_session_expired(self, entry: SessionEntry) -> bool:
diff --git a/gateway/slash_commands.py b/gateway/slash_commands.py
index 04c3f4ca89f..4b25d96fdbf 100644
--- a/gateway/slash_commands.py
+++ b/gateway/slash_commands.py
@@ -1030,12 +1030,13 @@ class GatewaySlashCommandsMixin:
         )
 
     async def _handle_model_command(self, event: MessageEvent) -> Optional[str]:
-        """Handle /model command — switch model for this session.
+        """Handle /model command — switch model.
 
         Supports:
           /model                              — interactive picker (Telegram/Discord) or text list
-          /model <name>                       — switch for this session only
-          /model <name> --global              — switch and persist to config.yaml
+          /model <name>                       — switch model (persists by default)
+          /model <name> --session             — switch for this session only
+          /model <name> --global              — switch and persist (explicit)
           /model <name> --provider <provider> — switch provider + model
           /model --provider <provider>        — switch to provider, auto-detect model
         """
@@ -1043,6 +1044,7 @@ class GatewaySlashCommandsMixin:
         import yaml
         from hermes_cli.model_switch import (
             switch_model as _switch_model, parse_model_flags,
+            resolve_persist_behavior,
             list_authenticated_providers,
             list_picker_providers,
         )
@@ -1050,8 +1052,15 @@ class GatewaySlashCommandsMixin:
 
         raw_args = event.get_command_args().strip()
 
-        # Parse --provider, --global, and --refresh flags
-        model_input, explicit_provider, persist_global, force_refresh = parse_model_flags(raw_args)
+        # Parse --provider, --global, --session, and --refresh flags
+        (
+            model_input,
+            explicit_provider,
+            is_global_flag,
+            force_refresh,
+            is_session,
+        ) = parse_model_flags(raw_args)
+        persist_global = resolve_persist_behavior(is_global_flag, is_session)
 
         # --refresh: bust the disk cache so the picker shows live data.
         if force_refresh:
@@ -1362,7 +1371,7 @@ class GatewaySlashCommandsMixin:
             # override rather than relying on cache signature mismatch detection.
             self._evict_cached_agent(session_key)
 
-            # Persist to config if --global
+            # Persist to config (default) unless --session opted out
             if persist_global:
                 try:
                     if config_path.exists():
diff --git a/gateway/status.py b/gateway/status.py
index 367ac33c4d7..b4bee42fdad 100644
--- a/gateway/status.py
+++ b/gateway/status.py
@@ -14,6 +14,7 @@ concurrently under distinct configurations).
 import hashlib
 import json
 import os
+import shlex
 import signal
 import subprocess
 import sys
@@ -164,20 +165,86 @@ def _read_process_cmdline(pid: int) -> Optional[str]:
     return None
 
 
+def looks_like_gateway_command_line(command: str | None) -> bool:
+    """Return True only for a real ``gateway run`` process command line.
+
+    Lifecycle decisions (is the gateway up? did restart relaunch it?) must not
+    fire on loose substring matches.  The previous ``"... gateway" in cmdline``
+    test also matched ``hermes_cli.main gateway status`` and even unrelated
+    processes like ``python -m tui_gateway`` -- which made ``restart()`` race
+    against a still-draining old process and ``status``/``start`` report false
+    positives.  This requires the actual ``gateway`` subcommand followed by
+    ``run`` (or one of the gateway-dedicated entrypoints), excluding the other
+    ``gateway`` management subcommands and any process that merely contains the
+    word "gateway".
+
+    Tokenizes quote-aware (``shlex``) so quoted Windows paths with spaces
+    (``"C:\\Program Files\\...\\hermes-gateway.exe"``) survive, and strips
+    ``--profile``/``-p`` selectors from anywhere in argv -- Hermes's
+    ``_apply_profile_override`` removes them before argparse, so the profile
+    flag (and a profile literally named ``gateway``) can legally appear on
+    either side of the ``gateway`` subcommand.
+    """
+    if not command:
+        return False
+
+    try:
+        raw_tokens = shlex.split(command, posix=False)
+    except ValueError:
+        raw_tokens = command.split()
+    # Strip surrounding quotes, normalize slashes + case per token.
+    tokens = [t.strip("\"'").replace("\\", "/").lower() for t in raw_tokens]
+    if not tokens:
+        return False
+
+    # Gateway-dedicated entrypoints carry no subcommand to inspect.
+    for token in tokens:
+        if token == "gateway/run.py" or token.endswith("/gateway/run.py"):
+            return True
+        basename = token.rsplit("/", 1)[-1]
+        if basename in ("hermes-gateway", "hermes-gateway.exe"):
+            return True
+
+    joined = " ".join(tokens)
+    has_gateway_entry = (
+        "hermes_cli.main" in joined
+        or "hermes_cli/main.py" in joined
+        or any(t.rsplit("/", 1)[-1] in ("hermes", "hermes.exe") for t in tokens)
+    )
+    if not has_gateway_entry:
+        return False
+
+    # Drop profile selectors anywhere: --profile X / -p X / --profile=X / -p=X.
+    # This consumes a profile VALUE of "gateway" too, so the real subcommand
+    # token is the one we land on below.
+    filtered: list[str] = []
+    skip_next = False
+    for token in tokens:
+        if skip_next:
+            skip_next = False
+            continue
+        if token in ("--profile", "-p"):
+            skip_next = True
+            continue
+        if token.startswith("--profile=") or token.startswith("-p="):
+            continue
+        filtered.append(token)
+
+    for i, token in enumerate(filtered):
+        if token != "gateway":
+            continue
+        if i + 1 >= len(filtered):
+            return True  # bare `hermes gateway` defaults to `run`
+        return filtered[i + 1] == "run"
+    return False
+
+
 def _looks_like_gateway_process(pid: int) -> bool:
     """Return True when the live PID still looks like the Hermes gateway."""
     cmdline = _read_process_cmdline(pid)
     if not cmdline:
         return False
-
-    patterns = (
-        "hermes_cli.main gateway",
-        "hermes_cli/main.py gateway",
-        "hermes gateway",
-        "hermes-gateway",
-        "gateway/run.py",
-    )
-    return any(pattern in cmdline for pattern in patterns)
+    return looks_like_gateway_command_line(cmdline)
 
 
 def _record_looks_like_gateway(record: dict[str, Any]) -> bool:
@@ -189,15 +256,8 @@ def _record_looks_like_gateway(record: dict[str, Any]) -> bool:
     if not isinstance(argv, list) or not argv:
         return False
 
-    # Normalize Windows backslashes so patterns match cross-platform.
-    cmdline = " ".join(str(part) for part in argv).replace("\\", "/")
-    patterns = (
-        "hermes_cli.main gateway",
-        "hermes_cli/main.py gateway",
-        "hermes gateway",
-        "gateway/run.py",
-    )
-    return any(pattern in cmdline for pattern in patterns)
+    cmdline = " ".join(str(part) for part in argv)
+    return looks_like_gateway_command_line(cmdline)
 
 
 def _build_pid_record() -> dict:
@@ -515,6 +575,7 @@ def write_runtime_status(
     platform_state: Any = _UNSET,
     error_code: Any = _UNSET,
     error_message: Any = _UNSET,
+    served_profiles: Any = _UNSET,
 ) -> None:
     """Persist gateway runtime health information for diagnostics/status."""
     path = _get_runtime_status_path()
@@ -535,6 +596,11 @@ def write_runtime_status(
         payload["restart_requested"] = bool(restart_requested)
     if active_agents is not _UNSET:
         payload["active_agents"] = max(0, int(active_agents))
+    if served_profiles is not _UNSET:
+        # Profiles this gateway multiplexes (multi-profile mode). Absent/empty
+        # for a single-profile gateway. Lets `hermes status` show per-profile
+        # coverage without a second probe.
+        payload["served_profiles"] = list(served_profiles or [])
 
     if platform is not _UNSET:
         platform_payload = payload["platforms"].get(platform, {})
diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py
index 0064881c43f..770a8de4569 100644
--- a/hermes_cli/backup.py
+++ b/hermes_cli/backup.py
@@ -34,14 +34,38 @@ logger = logging.getLogger(__name__)
 # ``hermes-agent`` is special-cased to root level only in ``_should_exclude``
 # so that skill directories like ``skills/autonomous-ai-agents/hermes-agent/``
 # are not accidentally excluded.
+#
+# The dependency/cache entries below matter for more than tidiness: without
+# them a single plugin venv, MCP-server install, or pip/uv cache living under
+# HERMES_HOME gets walked file-by-file, ballooning a backup to hundreds of
+# thousands of entries that crawl for hours — the exact "backup stuck for
+# days / 426543 files" symptom users hit. The dependency/test-env names mostly
+# mirror ``agent.skill_utils.EXCLUDED_SKILL_DIRS`` (the project's canonical
+# "regeneratable dir" set); ``.cache`` is an additional backup-only entry, as
+# it names a broad regeneratable cache convention (pip/uv/etc.) that the skill
+# scanner doesn't need to prune but a backup walk does. We deliberately do NOT
+# exclude ``.archive`` here because the curator's ``skills/.archive/`` holds
+# restorable user skills that must survive a backup.
 _EXCLUDED_DIRS = {
     "hermes-agent",     # the codebase repo — re-clone instead
     "__pycache__",      # bytecode caches — regenerated on import
     ".git",             # nested git dirs (profiles shouldn't have these, but safety)
-    "node_modules",     # js deps if website/ somehow leaks in
+    "node_modules",     # js deps — reinstalled on demand
     "backups",          # prior auto-backups — don't nest backups exponentially
     "checkpoints",      # session-local trajectory caches — regenerated per-session,
                         # session-hash-keyed so they don't port to another machine anyway
+    # Python dependency trees (plugin / MCP-server venvs under HERMES_HOME) —
+    # regenerated by reinstalling; never irreplaceable state.
+    ".venv",
+    "venv",
+    "site-packages",
+    # Tool / build caches — all regeneratable.
+    ".cache",
+    ".tox",
+    ".nox",
+    ".pytest_cache",
+    ".mypy_cache",
+    ".ruff_cache",
 }
 
 # File-name suffixes to skip
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 514e7f659b3..42e51f29909 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -123,8 +123,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
     # Configuration
     CommandDef("config", "Show current configuration", "Configuration",
                cli_only=True),
-    CommandDef("model", "Switch model for this session", "Configuration",
-               args_hint="[model] [--provider name] [--global] [--refresh]"),
+    CommandDef("model", "Switch model (persists by default)", "Configuration",
+               args_hint="[model] [--provider name] [--global|--session] [--refresh]"),
     CommandDef("codex-runtime", "Toggle codex app-server runtime for OpenAI/Codex models",
                "Configuration", aliases=("codex_runtime",),
                args_hint="[auto|codex_app_server]"),
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index bf9dc532630..cb574345d35 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1581,6 +1581,14 @@ DEFAULT_CONFIG = {
         # TUI busy indicator style: kaomoji (default), emoji, unicode (braille
         # spinner), or ascii.  Live-swappable via `/indicator <style>`.
         "tui_status_indicator": "kaomoji",
+        # Seconds between prompt_toolkit redraws in the classic CLI when idle.
+        # Default 1.0 keeps the wall-clock status-bar read-outs (idle-since-
+        # last-turn) ticking and keeps the bottom chrome alive during idle —
+        # without it prompt_toolkit stops repainting the status bar after a
+        # turn and it can go stale/disappear (#45592).
+        # Set 0 to disable the background refresh if it fights terminal
+        # auto-scroll in non-fullscreen mode on some emulators (#48309).
+        "cli_refresh_interval": 1.0,
         "user_message_preview": {  # CLI: how many submitted user-message lines to echo back in scrollback
             "first_lines": 2,
             "last_lines": 2,
@@ -3453,6 +3461,7 @@ OPTIONAL_ENV_VARS = {
                        "Required scopes: chat:write, app_mentions:read, channels:history, groups:history, "
                        "im:history, im:read, im:write, users:read, files:read, files:write",
         "prompt": "Slack Bot Token (xoxb-...)",
+        "help": "In your Slack app, add the required bot scopes, install the app to the workspace, then copy OAuth & Permissions > Bot User OAuth Token.",
         "url": "https://api.slack.com/apps",
         "password": True,
         "category": "messaging",
@@ -3462,10 +3471,19 @@ OPTIONAL_ENV_VARS = {
                        "App-Level Tokens. Also ensure Event Subscriptions include: message.im, "
                        "message.channels, message.groups, app_mention",
         "prompt": "Slack App Token (xapp-...)",
+        "help": "In your Slack app, enable Socket Mode, then create Basic Information > App-Level Tokens with the connections:write scope.",
         "url": "https://api.slack.com/apps",
         "password": True,
         "category": "messaging",
     },
+    "SLACK_ALLOWED_USERS": {
+        "description": "Comma-separated Slack member IDs allowed to use Hermes, e.g. U01ABC2DEF3. Without this, Slack may connect but deny messages by default.",
+        "prompt": "Allowed Slack member IDs",
+        "help": "In Slack, open your profile, choose More or the three-dot menu, then Copy member ID. Add multiple IDs comma-separated.",
+        "url": "https://api.slack.com/apps",
+        "password": False,
+        "category": "messaging",
+    },
     "MATTERMOST_URL": {
         "description": "Mattermost server URL (e.g. https://mm.example.com)",
         "prompt": "Mattermost server URL",
diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py
index 717c1e97658..86f8e6b09e2 100644
--- a/hermes_cli/cron.py
+++ b/hermes_cli/cron.py
@@ -25,7 +25,7 @@ _GATEWAY_LIFECYCLE_PATTERNS = re.compile(
     r"(?i)"
     r"(hermes\s+gateway\s+(restart|stop|start))"
     r"|(launchctl\s+(kickstart|unload|load|stop|restart)\s+.*hermes)"
-    r"|(systemctl\s+(restart|stop|start)\s+.*hermes)"
+    r"|(systemctl\s+(-\S+\s+)*(restart|stop|start)\s+.*hermes)"
     r"|(p?kill\s+.*hermes.*gateway)"
 )
 
diff --git a/hermes_cli/debug.py b/hermes_cli/debug.py
index 809676d1fc8..e5627f24bf5 100644
--- a/hermes_cli/debug.py
+++ b/hermes_cli/debug.py
@@ -191,10 +191,10 @@ _PRIVACY_NOTICE = """\
 ⚠️  This will upload the following to a public paste service:
   • System info (OS, Python version, Hermes version, provider, which API keys
     are configured — NOT the actual keys)
-  • Recent log lines (agent.log, errors.log, gateway.log, desktop.log — may
-    contain conversation fragments and file paths)
-  • Full agent.log, gateway.log, and desktop.log (up to 512 KB each — likely
-    contains conversation content, tool outputs, and file paths)
+  • Recent log lines (agent.log, errors.log, gateway.log, gui.log, desktop.log
+    — may contain conversation fragments and file paths)
+  • Full agent.log, gateway.log, gui.log, and desktop.log (up to 512 KB each —
+    likely contains conversation content, tool outputs, and file paths)
 
 Pastes auto-delete after 6 hours.
 """
@@ -503,6 +503,9 @@ def _capture_default_log_snapshots(
         "gateway": _capture_log_snapshot(
             "gateway", tail_lines=errors_lines, redact=redact
         ),
+        "gui": _capture_log_snapshot(
+            "gui", tail_lines=errors_lines, redact=redact
+        ),
         "desktop": _capture_log_snapshot(
             "desktop", tail_lines=errors_lines, redact=redact
         ),
@@ -574,6 +577,10 @@ def collect_debug_report(
     buf.write(log_snapshots["gateway"].tail_text)
     buf.write("\n\n")
 
+    buf.write(f"--- gui.log (last {errors_lines} lines) ---\n")
+    buf.write(log_snapshots["gui"].tail_text)
+    buf.write("\n\n")
+
     buf.write(f"--- desktop.log (last {errors_lines} lines) ---\n")
     buf.write(log_snapshots["desktop"].tail_text)
     buf.write("\n")
@@ -639,6 +646,7 @@ def build_debug_share(
     )
     agent_log = log_snapshots["agent"].full_text
     gateway_log = log_snapshots["gateway"].full_text
+    gui_log = log_snapshots["gui"].full_text
     desktop_log = log_snapshots["desktop"].full_text
 
     # Prepend dump header to each full log so every paste is self-contained.
@@ -646,6 +654,8 @@ def build_debug_share(
         agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
     if gateway_log:
         gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
+    if gui_log:
+        gui_log = dump_text + "\n\n--- full gui.log ---\n" + gui_log
     if desktop_log:
         desktop_log = dump_text + "\n\n--- full desktop.log ---\n" + desktop_log
 
@@ -657,6 +667,8 @@ def build_debug_share(
             agent_log = _REDACTION_BANNER + agent_log
         if gateway_log:
             gateway_log = _REDACTION_BANNER + gateway_log
+        if gui_log:
+            gui_log = _REDACTION_BANNER + gui_log
         if desktop_log:
             desktop_log = _REDACTION_BANNER + desktop_log
 
@@ -670,6 +682,7 @@ def build_debug_share(
     for label, content in (
         ("agent.log", agent_log),
         ("gateway.log", gateway_log),
+        ("gui.log", gui_log),
         ("desktop.log", desktop_log),
     ):
         if not content:
@@ -712,11 +725,14 @@ def run_debug_share(args):
         )
         agent_log = log_snapshots["agent"].full_text
         gateway_log = log_snapshots["gateway"].full_text
+        gui_log = log_snapshots["gui"].full_text
         desktop_log = log_snapshots["desktop"].full_text
         if agent_log:
             agent_log = dump_text + "\n\n--- full agent.log ---\n" + agent_log
         if gateway_log:
             gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log
+        if gui_log:
+            gui_log = dump_text + "\n\n--- full gui.log ---\n" + gui_log
         if desktop_log:
             desktop_log = dump_text + "\n\n--- full desktop.log ---\n" + desktop_log
         if redact:
@@ -725,12 +741,15 @@ def run_debug_share(args):
                 agent_log = _REDACTION_BANNER + agent_log
             if gateway_log:
                 gateway_log = _REDACTION_BANNER + gateway_log
+            if gui_log:
+                gui_log = _REDACTION_BANNER + gui_log
             if desktop_log:
                 desktop_log = _REDACTION_BANNER + desktop_log
         print(report)
         for title, body in (
             ("FULL agent.log", agent_log),
             ("FULL gateway.log", gateway_log),
+            ("FULL gui.log", gui_log),
             ("FULL desktop.log", desktop_log),
         ):
             if body:
diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 7e5406a11dd..f1dddd087f4 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -319,23 +319,12 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
     # gateway.  See #13242.
     exclude_pids = exclude_pids | _get_ancestor_pids()
     pids: list[int] = []
-    patterns = [
-        "hermes_cli.main gateway",
-        "hermes_cli.main --profile",
-        "hermes_cli.main -p",
-        "hermes_cli/main.py gateway",
-        "hermes_cli/main.py --profile",
-        "hermes_cli/main.py -p",
-        "hermes gateway",
-        # Windows: only match invocations that actually carry the ``gateway``
-        # subcommand or the gateway-dedicated console-script shim. Bare
-        # ``hermes.exe --profile`` / ``hermes.exe -p`` would also match
-        # ``hermes.exe --profile foo dashboard`` and other CLI subcommands,
-        # producing false-positive gateway PIDs (Copilot review).
-        "hermes.exe gateway",
-        "hermes-gateway.exe",
-        "gateway/run.py",
-    ]
+    # Strict command-line matcher shared with gateway.status: requires the
+    # actual ``gateway run`` subcommand (or the dedicated entrypoints), so this
+    # scan no longer false-matches ``gateway status``/``dashboard`` siblings or
+    # unrelated processes like ``python -m tui_gateway``. Lazy import mirrors the
+    # circular-import avoidance used elsewhere in this module.
+    from gateway.status import looks_like_gateway_command_line
     current_home = str(get_hermes_home().resolve())
     current_home_lc = current_home.lower()
     current_profile_arg = _profile_arg(current_home)
@@ -430,8 +419,7 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
                     current_cmd = line[len("CommandLine=") :]
                 elif line.startswith("ProcessId="):
                     pid_str = line[len("ProcessId=") :]
-                    current_cmd_lc = current_cmd.lower()
-                    if any(p in current_cmd_lc for p in patterns) and (
+                    if looks_like_gateway_command_line(current_cmd) and (
                         all_profiles or _matches_current_profile(current_cmd)
                     ):
                         try:
@@ -456,8 +444,7 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
                             with open(f"/proc/{pid}/cmdline", "rb") as _f:
                                 cmdline = _f.read().decode("utf-8", errors="replace")
                             cmdline = cmdline.replace("\x00", " ")
-                            cmdline_lc = cmdline.lower()
-                            if any(p in cmdline_lc for p in patterns) and (
+                            if looks_like_gateway_command_line(cmdline) and (
                                 all_profiles or _matches_current_profile(cmdline)
                             ):
                                 _append_unique_pid(pids, pid, exclude_pids)
@@ -500,8 +487,7 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
 
                     if pid is None:
                         continue
-                    command_lc = command.lower()
-                    if any(pattern in command_lc for pattern in patterns) and (
+                    if looks_like_gateway_command_line(command) and (
                         all_profiles or _matches_current_profile(command)
                     ):
                         _append_unique_pid(pids, pid, exclude_pids)
@@ -3865,6 +3851,86 @@ def _running_under_gateway_supervisor() -> bool:
     return False
 
 
+def _guard_named_profile_under_multiplexer(force: bool = False) -> None:
+    """Refuse a named-profile gateway when a multiplexer is already serving it.
+
+    When the default profile's gateway runs with gateway.multiplex_profiles=on,
+    it is the sole inbound process for EVERY profile on the host. Starting a
+    separate gateway for a named profile would double-bind that profile's
+    platforms (two pollers on one bot token, port fights). In that mode a
+    named-profile ``hermes gateway run`` is always a misconfiguration, so we
+    hard-error with a pointer to the multiplexer. ``--force`` overrides.
+
+    Inert unless ALL of: (a) this invocation is a named profile, (b) a default-
+    profile gateway is running, (c) that gateway's config has multiplexing on.
+    """
+    if force:
+        return
+    # (a) Are we a named profile? Default/custom-hash homes return "".
+    try:
+        suffix = _profile_suffix()
+    except Exception:
+        return
+    if not suffix:
+        return  # default profile (or unrecognized) — this guard doesn't apply
+
+    try:
+        from hermes_constants import get_default_hermes_root
+        default_root = get_default_hermes_root()
+        # (b) Is the default-profile gateway running?
+        from gateway.status import get_running_pid as _default_running_pid  # noqa
+    except Exception:
+        return
+
+    try:
+        import yaml as _yaml
+        from gateway.status import _read_pid_record  # type: ignore
+
+        # (b) default gateway PID file present + alive
+        default_pid_path = default_root / "gateway.pid"
+        rec = _read_pid_record(default_pid_path)
+        if not rec:
+            return
+        from gateway.status import _pid_exists, _pid_from_record
+        pid = _pid_from_record(rec)
+        if not pid or not _pid_exists(pid):
+            return
+
+        # (c) default config has multiplexing on
+        cfg_path = default_root / "config.yaml"
+        if not cfg_path.exists():
+            return
+        with open(cfg_path, encoding="utf-8") as f:
+            cfg = _yaml.safe_load(f) or {}
+        multiplex = bool(
+            cfg.get("multiplex_profiles")
+            or (cfg.get("gateway", {}) or {}).get("multiplex_profiles")
+        )
+        if not multiplex:
+            return
+    except Exception:
+        logger.debug("Multiplexer-conflict probe failed", exc_info=True)
+        return
+
+    print_error(
+        f"The default gateway is running as a profile multiplexer and already "
+        f"serves profile '{suffix}'."
+    )
+    print(
+        "  When gateway.multiplex_profiles is on, the default gateway is the\n"
+        "  single inbound process for every profile. Starting a separate\n"
+        "  gateway for this profile would double-bind its platforms (two\n"
+        "  pollers on one bot token, port conflicts).\n"
+    )
+    print("  Manage the multiplexer instead (from the default profile):")
+    print()
+    print("    hermes gateway restart")
+    print()
+    print("  Pass --force to start a separate profile gateway anyway (not")
+    print("  recommended while the multiplexer is running).")
+    sys.exit(1)
+
+
 def _guard_supervised_gateway_conflict(force: bool = False) -> None:
     """Refuse a foreground gateway when a service manager already supervises one.
 
@@ -3977,6 +4043,7 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False, fo
                systemd/launchd service is already supervising this profile.
     """
     _guard_official_docker_root_gateway()
+    _guard_named_profile_under_multiplexer(force=force)
     _guard_supervised_gateway_conflict(force=force)
     _guard_existing_gateway_process_conflict(replace=replace)
     sys.path.insert(0, str(PROJECT_ROOT))
diff --git a/hermes_cli/gateway_windows.py b/hermes_cli/gateway_windows.py
index 08c7d8c019c..466031bfaa7 100644
--- a/hermes_cli/gateway_windows.py
+++ b/hermes_cli/gateway_windows.py
@@ -1302,10 +1302,54 @@ def stop() -> None:
         print("✗ No gateway was running")
 
 
+def _wait_for_gateway_absent(timeout_s: float = 30.0, interval_s: float = 0.5) -> bool:
+    """Block until no gateway process is detectable, or the timeout elapses.
+
+    ``stop()`` can return while the previous gateway is still draining
+    in-flight agents (the drain runs up to the restart-drain timeout). Uses the
+    authoritative ``get_running_pid()`` (lock + liveness + start-time +
+    gateway-shape) plus the now-strict ``_gateway_pids()`` scan so a relaunch
+    never races a still-alive old process.
+    """
+    from gateway.status import get_running_pid
+
+    deadline = time.monotonic() + max(timeout_s, interval_s)
+    while time.monotonic() < deadline:
+        if get_running_pid() is None and not _gateway_pids():
+            return True
+        time.sleep(interval_s)
+    return get_running_pid() is None and not _gateway_pids()
+
+
 def restart() -> None:
-    """Stop the gateway then start it again."""
+    """Stop the gateway then start it again.
+
+    Waits for the old gateway to be authoritatively gone before relaunching --
+    otherwise ``start()``'s "already running" guard sees the still-draining old
+    process and no-ops, and when that process later exits nothing replaces it (a
+    silent outage). Fails loudly if the process can't be cleared or the relaunch
+    doesn't produce a running gateway.
+    """
     _assert_windows()
+    from hermes_cli.gateway import kill_gateway_processes
+
     stop()
+
+    if not _wait_for_gateway_absent(timeout_s=30.0):
+        print("⚠ Gateway still present after stop; forcing termination before restart...")
+        kill_gateway_processes(all_profiles=False, force=True)
+        if not _wait_for_gateway_absent(timeout_s=10.0):
+            raise RuntimeError(
+                "Gateway process still detected after force kill; refusing to "
+                "start a duplicate. Investigate stray PIDs before retrying."
+            )
+
     # Give Windows a moment to release the listening port.
     time.sleep(1.0)
     start()
+
+    if not _wait_for_gateway_ready(timeout_s=15.0):
+        raise RuntimeError(
+            "Gateway restart did not produce a running gateway process. "
+            "Check logs/gateway.log and run `hermes gateway status`."
+        )
diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py
index b684450e6bb..c82d762d592 100644
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
@@ -121,6 +121,16 @@ DEFAULT_CLAIM_TTL_SECONDS = 15 * 60
 # effect of normal API traffic.
 DEFAULT_CLAIM_HEARTBEAT_MAX_STALE_SECONDS = 60 * 60
 
+# Grace added to a claim when a reclaim is deferred because the previous
+# host-local worker is still alive after a termination attempt. Releasing the
+# claim in that state would spawn a duplicate alongside the surviving worker —
+# the runaway seen when a cgroup memory.high throttle parks a worker in
+# uninterruptible (D) state, where a pending SIGKILL cannot be delivered until
+# the throttle lifts. Holding the claim a short grace and retrying next tick
+# stops the duplication; once no duplicate is spawned the pressure eases, the
+# signal lands, and the following tick reclaims cleanly.
+RECLAIM_DEFER_GRACE_SECONDS = 120
+
 
 def _resolve_claim_ttl_seconds(ttl_seconds: Optional[int] = None) -> int:
     """Return the effective claim TTL, honoring the kanban env override.
@@ -3286,6 +3296,14 @@ def release_stale_claims(
         termination = _terminate_reclaimed_worker(
             row["worker_pid"], row["claim_lock"], signal_fn=signal_fn,
         )
+        # Never release a claim while our own worker is still alive: that would
+        # spawn a duplicate beside it. Hold the claim and retry next tick.
+        if _worker_survived_termination(termination):
+            _defer_reclaim_for_live_worker(
+                conn, row["id"], row["claim_lock"], now, termination,
+                reason="ttl_expired_worker_alive",
+            )
+            continue
         with write_txn(conn):
             cur = conn.execute(
                 "UPDATE tasks SET status = 'ready', claim_lock = NULL, "
@@ -5113,7 +5131,13 @@ def _terminate_reclaimed_worker(
     info["termination_attempted"] = True
     try:
         kill(int(pid), signal.SIGTERM)
-    except (ProcessLookupError, OSError):
+    except ProcessLookupError:
+        # Process is already gone — that's a successful termination, not a
+        # survival. Leaving terminated=False here would make the reclaim guard
+        # misread a dead worker as still-alive and defer forever.
+        info["terminated"] = True
+        return info
+    except OSError:
         return info
 
     for _ in range(10):
@@ -5136,6 +5160,63 @@ def _terminate_reclaimed_worker(
     return info
 
 
+def _worker_survived_termination(termination: dict) -> bool:
+    """True when we tried to kill our own host-local worker and it is still alive.
+
+    Reclaiming in this state would release the claim and let the dispatcher
+    spawn a second worker while the first is still running — the duplication
+    loop. Only host-local workers we actually signalled count: a non-local
+    claim lock or a no-op attempt (no ``os.kill`` available) must fall through
+    to the normal release path, since we cannot manage that worker anyway.
+    """
+    return bool(
+        termination.get("termination_attempted")
+        and termination.get("host_local")
+        and not termination.get("terminated")
+    )
+
+
+def _defer_reclaim_for_live_worker(
+    conn: sqlite3.Connection,
+    task_id: str,
+    claim_lock: Optional[str],
+    now: int,
+    termination: dict,
+    *,
+    reason: str,
+) -> None:
+    """Hold a claim whose worker survived termination instead of releasing it.
+
+    Extends ``claim_expires`` by ``RECLAIM_DEFER_GRACE_SECONDS`` so the task
+    stays ``running`` (no duplicate spawn) and records a ``reclaim_deferred``
+    event so the hold is visible in ``hermes kanban tail``. The next dispatch
+    tick retries the kill; this is self-correcting because not spawning a
+    duplicate is what lets the throttled worker finally die.
+    """
+    grace = now + RECLAIM_DEFER_GRACE_SECONDS
+    with write_txn(conn):
+        cur = conn.execute(
+            "UPDATE tasks SET claim_expires = ? "
+            "WHERE id = ? AND status = 'running' AND claim_lock IS ?",
+            (grace, task_id, claim_lock),
+        )
+        if cur.rowcount != 1:
+            return
+        run_id = _current_run_id(conn, task_id)
+        if run_id is not None:
+            conn.execute(
+                "UPDATE task_runs SET claim_expires = ? WHERE id = ?",
+                (grace, run_id),
+            )
+        payload = {
+            "reason": reason,
+            "claim_lock": claim_lock,
+            "claim_expires_now": grace,
+        }
+        payload.update(termination)
+        _append_event(conn, task_id, "reclaim_deferred", payload, run_id=run_id)
+
+
 def heartbeat_worker(
     conn: sqlite3.Connection,
     task_id: str,
@@ -5374,6 +5455,15 @@ def detect_stale_running(
             pid, lock, signal_fn=signal_fn,
         )
 
+        # Never release a claim while our own worker is still alive: that would
+        # spawn a duplicate beside it. Hold the claim and retry next tick.
+        if _worker_survived_termination(termination):
+            _defer_reclaim_for_live_worker(
+                conn, tid, lock, now, termination,
+                reason="heartbeat_stale_worker_alive",
+            )
+            continue
+
         with write_txn(conn):
             cur = conn.execute(
                 "UPDATE tasks SET status = 'ready', claim_lock = NULL, "
diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index 2ed5b14790c..7f6fe70d90a 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -299,34 +299,46 @@ class ModelSwitchResult:
 # Flag parsing
 # ---------------------------------------------------------------------------
 
-def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool]:
-    """Parse --provider, --global, and --refresh flags from /model command args.
+def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool, bool]:
+    """Parse --provider, --global, --session, and --refresh flags from /model command args.
 
-    Returns (model_input, explicit_provider, is_global, force_refresh).
+    Returns ``(model_input, explicit_provider, is_global, force_refresh, is_session)``.
+
+    ``is_global`` and ``is_session`` are independent flag presences; the
+    *effective* persistence decision is resolved by
+    :func:`resolve_persist_behavior` so the config-gated default
+    (``model.persist_switch_by_default``) is applied in one place.
 
     Examples::
 
-        "sonnet"                         -> ("sonnet", "", False, False)
-        "sonnet --global"                -> ("sonnet", "", True, False)
-        "sonnet --provider anthropic"    -> ("sonnet", "anthropic", False, False)
-        "--provider my-ollama"           -> ("", "my-ollama", False, False)
-        "--refresh"                      -> ("", "", False, True)
-        "sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True, False)
+        "sonnet"                         -> ("sonnet", "", False, False, False)
+        "sonnet --global"                -> ("sonnet", "", True, False, False)
+        "sonnet --session"               -> ("sonnet", "", False, False, True)
+        "sonnet --provider anthropic"    -> ("sonnet", "anthropic", False, False, False)
+        "--provider my-ollama"           -> ("", "my-ollama", False, False, False)
+        "--refresh"                      -> ("", "", False, True, False)
+        "sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True, False, False)
     """
     is_global = False
     explicit_provider = ""
     force_refresh = False
+    is_session = False
 
     # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
     # A single Unicode dash before a flag keyword becomes "--"
     import re as _re
-    raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global|refresh)', r'--\1', raw_args)
+    raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global|session|refresh)', r'--\1', raw_args)
 
     # Extract --global
     if "--global" in raw_args:
         is_global = True
         raw_args = raw_args.replace("--global", "").strip()
 
+    # Extract --session (explicit session-only; overrides the persist default)
+    if "--session" in raw_args:
+        is_session = True
+        raw_args = raw_args.replace("--session", "").strip()
+
     # Extract --refresh (bust the model picker disk cache before listing)
     if "--refresh" in raw_args:
         force_refresh = True
@@ -345,7 +357,37 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool, bool]:
             i += 1
 
     model_input = " ".join(filtered).strip()
-    return (model_input, explicit_provider, is_global, force_refresh)
+    return (model_input, explicit_provider, is_global, force_refresh, is_session)
+
+
+def resolve_persist_behavior(is_global: bool, is_session: bool) -> bool:
+    """Decide whether a ``/model`` switch should persist to ``config.yaml``.
+
+    Resolution order:
+
+    1. ``--session`` explicitly opts out → ``False`` (this session only).
+    2. ``--global`` explicitly opts in → ``True``.
+    3. Otherwise defer to ``model.persist_switch_by_default`` in
+       ``config.yaml`` (defaults to ``True``, so a plain ``/model <name>``
+       survives across sessions — the behavior users expect).
+
+    The config read is defensive: on a fresh install ``model`` may be a
+    flat string rather than a dict, in which case the built-in default
+    (``True``) applies.
+    """
+    if is_session:
+        return False
+    if is_global:
+        return True
+    try:
+        from hermes_cli.config import load_config
+
+        model_cfg = load_config().get("model")
+        if isinstance(model_cfg, dict):
+            return bool(model_cfg.get("persist_switch_by_default", True))
+    except Exception:
+        pass
+    return True
 
 
 # ---------------------------------------------------------------------------
diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py
index 881dd481445..490077884e5 100644
--- a/hermes_cli/profiles.py
+++ b/hermes_cli/profiles.py
@@ -29,7 +29,7 @@ import subprocess
 import sys
 from dataclasses import dataclass
 from pathlib import Path, PurePosixPath, PureWindowsPath
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
 from agent.skill_utils import is_excluded_skill_path
 
@@ -781,6 +781,47 @@ def list_profiles() -> List[ProfileInfo]:
     return profiles
 
 
+def profiles_to_serve(multiplex: bool) -> List[Tuple[str, Path]]:
+    """Return the ``(profile_name, hermes_home)`` pairs a gateway should serve.
+
+    This is the single chokepoint for "which profiles does the inbound gateway
+    handle" so later multiplexing phases never re-derive the set.
+
+    - ``multiplex=False`` (default): returns exactly one entry for the *active*
+      profile — byte-for-byte the single-profile behavior the gateway has
+      always had. The name is ``"default"`` for the default profile or the
+      active named profile's id.
+    - ``multiplex=True``: returns the default profile plus every valid named
+      profile under ``profiles/``, each paired with its own HERMES_HOME.
+
+    Intentionally lightweight (a directory scan + name validation only): no
+    per-profile config reads, gateway-running probes, or skill counts like
+    :func:`list_profiles`. It runs on gateway startup and must stay cheap.
+
+    The returned ``hermes_home`` is the path to pass to
+    ``set_hermes_home_override`` when scoping a turn to that profile.
+    """
+    active = get_active_profile_name() or "default"
+    if not multiplex:
+        return [(active, get_profile_dir(active))]
+
+    serve: List[Tuple[str, Path]] = [("default", _get_default_hermes_home())]
+
+    profiles_root = _get_profiles_root()
+    if profiles_root.is_dir():
+        for entry in sorted(profiles_root.iterdir()):
+            if not entry.is_dir():
+                continue
+            name = entry.name
+            if name == "default":
+                continue  # default is the built-in entry already added above
+            if not _PROFILE_ID_RE.match(name):
+                continue
+            serve.append((name, entry))
+
+    return serve
+
+
 def create_profile(
     name: str,
     clone_from: Optional[str] = None,
diff --git a/hermes_cli/provider_catalog.py b/hermes_cli/provider_catalog.py
new file mode 100644
index 00000000000..6dba5d8842f
--- /dev/null
+++ b/hermes_cli/provider_catalog.py
@@ -0,0 +1,170 @@
+"""Unified provider catalog — one source of truth for the provider universe.
+
+The provider list shown by ``hermes model`` (CLI/TUI) and the desktop Settings
+→ Providers tabs (Accounts + API keys) **must be the same set**.  Historically
+they were not: the CLI picker read :data:`hermes_cli.models.CANONICAL_PROVIDERS`
+(which auto-extends from ``plugins/model-providers/<name>/``), while the desktop
+tabs read separate hand-maintained lists (``_OAUTH_PROVIDER_CATALOG``,
+``OPTIONAL_ENV_VARS`` + ``PROVIDER_GROUPS``) that nobody kept in sync.  Every
+provider added after those lists were written silently went missing from the
+GUI — e.g. GitHub Copilot showing up only under "tools", or ``openai-api`` being
+configurable from the CLI but not the desktop app.
+
+This module fixes that at the root: it derives ONE descriptor per provider from
+the same universe ``hermes model`` renders (``CANONICAL_PROVIDERS``), joining:
+
+* ``auth_type`` / ``api_key_env_vars`` / ``base_url_env_var`` from
+  :data:`hermes_cli.auth.PROVIDER_REGISTRY` (credential truth), and
+* ``display_name`` / ``description`` / ``signup_url`` from the provider's
+  :class:`providers.base.ProviderProfile` when one exists, falling back to the
+  ``CANONICAL_PROVIDERS`` entry's ``label`` / ``tui_desc`` and the
+  ``OPTIONAL_ENV_VARS`` signup URL otherwise (many profiles leave these blank,
+  and four canonical providers have no profile at all — lmstudio, openai-api,
+  tencent-tokenhub, xai-oauth — so the fallbacks are load-bearing).
+
+Each descriptor is tagged with the ``tab`` it belongs on (``keys`` vs
+``accounts``) based purely on how the provider authenticates.  The desktop
+``/api/env`` and ``/api/providers/oauth`` endpoints derive their MEMBERSHIP from
+this catalog; the old hand lists are demoted to presentation/override overlays
+(bespoke OAuth flow + status resolvers, richer copy, icons, ordering) and no
+longer decide which providers exist.
+
+Parity contract (locked by tests): the union of the two tabs equals the
+``CANONICAL_PROVIDERS`` universe, i.e. exactly what ``hermes model`` shows.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+# Auth types that authenticate via an account / sign-in flow rather than a
+# pasted API key.  These route to the desktop "Accounts" tab; everything else
+# (api_key, and aws_sdk which is configured via AWS_REGION/AWS_PROFILE) routes
+# to the "API keys" tab.  Mirrors the auth_type strings used in
+# hermes_cli.auth.PROVIDER_REGISTRY and providers.base.ProviderProfile.
+_ACCOUNTS_AUTH_TYPES: frozenset[str] = frozenset(
+    {
+        "oauth_device_code",
+        "oauth_external",
+        "oauth_minimax",
+        "external_process",  # copilot-acp: spawns `copilot --acp --stdio`
+        "copilot",           # GitHub Copilot token / gh auth
+    }
+)
+
+
+@dataclass(frozen=True)
+class ProviderDescriptor:
+    """One provider, as seen by every surface (CLI picker + both GUI tabs)."""
+
+    slug: str                      # canonical id, e.g. "google-gemini-cli"
+    label: str                     # human display name
+    description: str               # one-line description
+    auth_type: str                 # api_key | oauth_* | external_process | copilot | aws_sdk
+    tab: str                       # "keys" | "accounts"
+    api_key_env_vars: tuple[str, ...]  # credential env vars (may be empty)
+    base_url_env_var: str          # base-URL override env var (may be "")
+    signup_url: str                # signup / console URL (may be "")
+    order: int                     # CANONICAL_PROVIDERS index — mirrors `hermes model`
+
+
+def tab_for_auth_type(auth_type: str) -> str:
+    """Return the desktop tab ("keys"|"accounts") a provider's auth maps to."""
+    return "accounts" if auth_type in _ACCOUNTS_AUTH_TYPES else "keys"
+
+
+def _split_env_vars(env_vars: tuple[str, ...]) -> tuple[tuple[str, ...], str]:
+    """Split a profile's ``env_vars`` into (api_key_vars, base_url_var)."""
+    keys = tuple(v for v in env_vars if not (v.endswith("_BASE_URL") or v.endswith("_URL")))
+    base = next((v for v in env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), "")
+    return keys, base
+
+
+def provider_catalog() -> list[ProviderDescriptor]:
+    """Return one descriptor per provider in the ``hermes model`` universe.
+
+    Membership is :data:`CANONICAL_PROVIDERS` (the list the CLI/TUI picker
+    renders, which auto-extends from provider plugins).  Auth + env come from
+    ``PROVIDER_REGISTRY``; display metadata from ``ProviderProfile`` with
+    canonical/env fallbacks so providers without a profile (or with blank
+    profile metadata) still resolve sensibly.
+    """
+    from hermes_cli.models import CANONICAL_PROVIDERS
+
+    # PROVIDER_REGISTRY / list_providers are imported lazily and defensively:
+    # this module is on the import path of the web server and the CLI, and we
+    # never want a provider-plugin import error to blank the whole catalog.
+    try:
+        from hermes_cli.auth import PROVIDER_REGISTRY
+    except Exception:
+        PROVIDER_REGISTRY = {}
+
+    try:
+        from providers import list_providers
+
+        profiles = {p.name: p for p in list_providers()}
+    except Exception:
+        profiles = {}
+
+    try:
+        from hermes_cli.config import OPTIONAL_ENV_VARS
+    except Exception:
+        OPTIONAL_ENV_VARS = {}
+
+    out: list[ProviderDescriptor] = []
+    for order, entry in enumerate(CANONICAL_PROVIDERS):
+        slug = entry.slug
+        cfg = PROVIDER_REGISTRY.get(slug)
+        prof = profiles.get(slug)
+
+        # auth_type: registry is authoritative; fall back to profile, then api_key.
+        auth_type = (
+            (getattr(cfg, "auth_type", "") if cfg else "")
+            or (getattr(prof, "auth_type", "") if prof else "")
+            or "api_key"
+        )
+
+        # Credential env vars: registry first (it already normalizes these),
+        # else derive from the profile's env_vars tuple.
+        if cfg and getattr(cfg, "api_key_env_vars", ()):
+            api_key_vars = tuple(cfg.api_key_env_vars)
+            base_url_var = getattr(cfg, "base_url_env_var", "") or ""
+        elif prof and getattr(prof, "env_vars", ()):
+            api_key_vars, base_url_var = _split_env_vars(tuple(prof.env_vars))
+        else:
+            api_key_vars, base_url_var = (), ""
+
+        label = (
+            (getattr(prof, "display_name", "") if prof else "")
+            or entry.label
+            or slug
+        )
+        description = (
+            (getattr(prof, "description", "") if prof else "")
+            or entry.tui_desc
+            or label
+        )
+        signup_url = (getattr(prof, "signup_url", "") if prof else "") or ""
+        if not signup_url and api_key_vars:
+            info = OPTIONAL_ENV_VARS.get(api_key_vars[0]) or {}
+            signup_url = info.get("url") or ""
+
+        out.append(
+            ProviderDescriptor(
+                slug=slug,
+                label=label,
+                description=description,
+                auth_type=auth_type,
+                tab=tab_for_auth_type(auth_type),
+                api_key_env_vars=api_key_vars,
+                base_url_env_var=base_url_var,
+                signup_url=signup_url,
+                order=order,
+            )
+        )
+    return out
+
+
+def provider_catalog_by_slug() -> dict[str, ProviderDescriptor]:
+    """Convenience: the catalog keyed by slug."""
+    return {d.slug: d for d in provider_catalog()}
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 78b92dcbad9..68919eaac62 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -12,6 +12,7 @@ logger = logging.getLogger(__name__)
 
 from hermes_cli import auth as auth_mod
 from agent.credential_pool import CredentialPool, PooledCredential, get_custom_provider_pool_key, load_pool
+from agent.secret_scope import get_secret as _get_secret
 from hermes_cli.auth import (
     AuthError,
     DEFAULT_CODEX_BASE_URL,
@@ -35,6 +36,19 @@ from hermes_constants import OPENROUTER_BASE_URL
 from utils import base_url_host_matches, base_url_hostname, env_int
 
 
+def _getenv(name: str, default: str = "") -> str:
+    """Profile-scoped replacement for ``os.getenv`` on credential/provider reads.
+
+    Routes through the secret scope (Workstream A): identical to ``os.getenv``
+    when multiplexing is off, scope-aware (and fail-closed on an unscoped read)
+    when on. Genuinely-global vars are handled inside ``get_secret`` and still
+    read ``os.environ``. Keeps the ``(name, default) -> str`` contract every
+    call site here already relies on.
+    """
+    val = _get_secret(name, default)
+    return val if val is not None else default
+
+
 def _normalize_custom_provider_name(value: str) -> str:
     return value.strip().lower().replace(" ", "-")
 
@@ -156,7 +170,7 @@ def _host_derived_api_key(base_url: str) -> str:
     if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"):
         return ""
     env_name = f"{sanitized}_API_KEY"
-    return (os.getenv(env_name, "") or "").strip()
+    return (_getenv(env_name, "") or "").strip()
 
 
 def _auto_detect_local_model(base_url: str) -> str:
@@ -437,7 +451,7 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str:
 
     # Prefer the persisted config selection over any stale shell/.env
     # provider override so chat uses the endpoint the user last saved.
-    env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+    env_provider = _getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
     if env_provider:
         return env_provider
 
@@ -542,7 +556,7 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
             name_norm = _normalize_custom_provider_name(ep_name)
             # Resolve the API key from the env var name stored in key_env
             key_env = str(entry.get("key_env", "") or "").strip()
-            resolved_api_key = os.getenv(key_env, "").strip() if key_env else ""
+            resolved_api_key = _getenv(key_env, "").strip() if key_env else ""
             # Fall back to inline api_key when key_env is absent or unresolvable
             if not resolved_api_key:
                 resolved_api_key = str(entry.get("api_key", "") or "").strip()
@@ -824,8 +838,8 @@ def _resolve_named_custom_runtime(
         api_key_candidates = [
             (explicit_api_key or "").strip(),
             # Gate env key fallbacks on authoritative hosts (#28660)
-            (os.getenv("OPENAI_API_KEY", "").strip()     if _da_is_openai_url else ""),
-            (os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter  else ""),
+            (_getenv("OPENAI_API_KEY", "").strip()     if _da_is_openai_url else ""),
+            (_getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter  else ""),
             # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
             # who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
             # intuitive match without configuring `custom_providers` first.
@@ -878,11 +892,11 @@ def _resolve_named_custom_runtime(
     api_key_candidates = [
         (explicit_api_key or "").strip(),
         str(custom_provider.get("api_key", "") or "").strip(),
-        os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
+        _getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
         # Gate provider env keys on their authoritative hosts — sending
         # OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660).
-        (os.getenv("OPENAI_API_KEY", "").strip()     if _cp_is_openai_url  else ""),
-        (os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter  else ""),
+        (_getenv("OPENAI_API_KEY", "").strip()     if _cp_is_openai_url  else ""),
+        (_getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter  else ""),
         # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host as a final
         # fallback when key_env wasn't set explicitly.
         _host_derived_api_key(base_url),
@@ -941,8 +955,8 @@ def _resolve_openrouter_runtime(
         except Exception:
             pass
 
-    env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
-    env_custom_base_url = os.getenv("CUSTOM_BASE_URL", "").strip()
+    env_openrouter_base_url = _getenv("OPENROUTER_BASE_URL", "").strip()
+    env_custom_base_url = _getenv("CUSTOM_BASE_URL", "").strip()
 
     # Use config base_url when available and the provider context matches.
     # OPENAI_BASE_URL env var is no longer consulted — config.yaml is
@@ -982,8 +996,8 @@ def _resolve_openrouter_runtime(
     if _is_openrouter_context:
         api_key_candidates = [
             explicit_api_key,
-            os.getenv("OPENROUTER_API_KEY"),
-            os.getenv("OPENAI_API_KEY"),
+            _getenv("OPENROUTER_API_KEY"),
+            _getenv("OPENAI_API_KEY"),
         ]
     else:
         # Custom endpoint: use api_key from config when using config base_url (#1760).
@@ -1003,9 +1017,9 @@ def _resolve_openrouter_runtime(
         api_key_candidates = [
             explicit_api_key,
             (cfg_api_key if use_config_base_url else ""),
-            (os.getenv("OLLAMA_API_KEY")     if _is_ollama_url                       else ""),
-            (os.getenv("OPENAI_API_KEY")     if (_is_openai_url or _is_openai_azure) else ""),
-            (os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url                   else ""),
+            (_getenv("OLLAMA_API_KEY")     if _is_ollama_url                       else ""),
+            (_getenv("OPENAI_API_KEY")     if (_is_openai_url or _is_openai_azure) else ""),
+            (_getenv("OPENROUTER_API_KEY") if _is_openrouter_url                   else ""),
             # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
             # who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
             # intuitive match. Helper returns "" for IPs/loopback and for env
@@ -1108,7 +1122,7 @@ def _resolve_azure_foundry_runtime(
         if inferred:
             cfg_api_mode = inferred
 
-    env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
+    env_base_url = _getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
     base_url = explicit_base_url_clean or cfg_base_url or env_base_url
     if not base_url:
         raise AuthError(
@@ -1197,7 +1211,7 @@ def _resolve_azure_foundry_runtime(
         except Exception:
             api_key = ""
     if not api_key:
-        api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
+        api_key = _getenv("AZURE_FOUNDRY_API_KEY", "").strip()
     if not api_key:
         raise AuthError(
             "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
@@ -1297,7 +1311,7 @@ def _resolve_explicit_runtime(
         expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
         if not api_key:
             creds = resolve_nous_runtime_credentials(
-                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+                timeout_seconds=float(_getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
             )
             api_key = creds.get("api_key", "")
             expires_at = creds.get("expires_at")
@@ -1326,7 +1340,7 @@ def _resolve_explicit_runtime(
     if pconfig and pconfig.auth_type == "api_key":
         env_url = ""
         if pconfig.base_url_env_var:
-            env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
+            env_url = _getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
 
         base_url = explicit_base_url
         if not base_url:
@@ -1398,8 +1412,8 @@ def resolve_runtime_provider(
     if requested_provider == "anthropic" and "azure.com" in _eff_base:
         _azure_key = (
             (explicit_api_key or "").strip()
-            or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
-            or os.getenv("ANTHROPIC_API_KEY", "").strip()
+            or _getenv("AZURE_ANTHROPIC_KEY", "").strip()
+            or _getenv("ANTHROPIC_API_KEY", "").strip()
         )
         return {
             "provider": "anthropic",
@@ -1454,8 +1468,8 @@ def resolve_runtime_provider(
     if provider == "openrouter":
         cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
         cfg_base_url = str(model_cfg.get("base_url") or "").strip()
-        env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
-        env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
+        env_openai_base_url = _getenv("OPENAI_BASE_URL", "").strip()
+        env_openrouter_base_url = _getenv("OPENROUTER_BASE_URL", "").strip()
         has_custom_endpoint = bool(
             explicit_base_url
             or env_openai_base_url
@@ -1511,7 +1525,7 @@ def resolve_runtime_provider(
     if provider == "nous":
         try:
             creds = resolve_nous_runtime_credentials(
-                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+                timeout_seconds=float(_getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
             )
             return {
                 "provider": "nous",
@@ -1664,7 +1678,7 @@ def resolve_runtime_provider(
             for hint_key in ("key_env", "api_key_env"):
                 env_var = str(model_cfg.get(hint_key) or "").strip()
                 if env_var:
-                    token = os.getenv(env_var, "").strip()
+                    token = _getenv(env_var, "").strip()
                     if token:
                         break
             # Next: an inline api_key on the model config (useful in multi-profile
@@ -1674,8 +1688,8 @@ def resolve_runtime_provider(
             # Finally fall back to the historical fixed names.
             if not token:
                 token = (
-                    os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
-                    or os.getenv("ANTHROPIC_API_KEY", "").strip()
+                    _getenv("AZURE_ANTHROPIC_KEY", "").strip()
+                    or _getenv("ANTHROPIC_API_KEY", "").strip()
                 )
             if not token:
                 raise AuthError(
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 4fa3c7c7c5f..6701d67394f 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -1554,6 +1554,7 @@ async def upload_managed_file_stream(
     )
     tmp_path = Path(tmp_name)
     total = 0
+    renamed = False
     try:
         with os.fdopen(tmp_fd, "wb") as out:
             while True:
@@ -1565,16 +1566,21 @@ async def upload_managed_file_stream(
                     raise HTTPException(status_code=413, detail="File is too large")
                 out.write(chunk)
         os.replace(tmp_path, target)
+        renamed = True
     except HTTPException:
-        tmp_path.unlink(missing_ok=True)
         raise
     except PermissionError:
-        tmp_path.unlink(missing_ok=True)
         raise HTTPException(status_code=403, detail="File is not writable")
     except OSError as exc:
-        tmp_path.unlink(missing_ok=True)
         raise HTTPException(status_code=500, detail=f"Could not write file: {exc}")
     finally:
+        # Clean up the temp file on every non-success exit, including
+        # BaseException paths the `except` clauses above don't catch — most
+        # importantly asyncio.CancelledError when a browser aborts a large
+        # upload mid-stream (the exact NS-501 scenario). os.replace clears
+        # tmp_path on success, so only unlink when the rename didn't happen.
+        if not renamed:
+            tmp_path.unlink(missing_ok=True)
         await file.close()
 
     return {
@@ -2316,6 +2322,43 @@ def _gateway_display_command(profile: Optional[str], verb: str) -> str:
     return " ".join(["hermes", *_gateway_subcommand(profile, verb)])
 
 
+# Slack member IDs (users U..., Enterprise Grid W...). Kept in sync with the
+# frontend SLACK_MEMBER_ID_RE in web/src/pages/ChannelsPage.tsx.
+_SLACK_MEMBER_ID_RE = re.compile(r"[UW][A-Z0-9]{2,}")
+
+
+def _validate_messaging_env_value(platform_id: str, key: str, value: str) -> None:
+    """Reject platform credentials that are clearly in the wrong field."""
+    if platform_id != "slack" or not value:
+        return
+
+    if key == "SLACK_BOT_TOKEN" and not value.startswith("xoxb-"):
+        raise HTTPException(
+            status_code=400,
+            detail="Slack Bot Token must start with xoxb-. Paste the bot token from OAuth & Permissions.",
+        )
+    if key == "SLACK_APP_TOKEN" and not value.startswith("xapp-"):
+        raise HTTPException(
+            status_code=400,
+            detail="Slack App Token must start with xapp-. Paste the app-level token from Basic Information > App-Level Tokens.",
+        )
+    if key == "SLACK_ALLOWED_USERS":
+        # Mirror the gateway's parse (gateway/platforms/slack.py): split on comma,
+        # strip, and drop empty entries so a trailing/interior comma isn't rejected
+        # here when the runtime would accept it. "*" is the allow-all wildcard.
+        user_ids = [part.strip() for part in value.split(",") if part.strip()]
+        invalid = [
+            user_id
+            for user_id in user_ids
+            if user_id != "*" and not _SLACK_MEMBER_ID_RE.fullmatch(user_id)
+        ]
+        if invalid:
+            raise HTTPException(
+                status_code=400,
+                detail="Slack allowed user IDs must be comma-separated member IDs like U01ABC2DEF3.",
+            )
+
+
 def _spawn_gateway_restart(profile: Optional[str] = None) -> Tuple[subprocess.Popen, bool]:
     """Spawn ``hermes gateway restart``, reusing an in-flight restart.
 
@@ -3925,28 +3968,135 @@ async def update_config(body: ConfigUpdate, profile: Optional[str] = None):
         raise HTTPException(status_code=500, detail="Internal server error")
 
 
+def _catalog_provider_env_metadata() -> dict:
+    """Map provider env vars → desktop card metadata, derived from the catalog.
+
+    Returns ``{env_var: {provider, provider_label, description, url, is_password,
+    advanced}}`` for every API-key provider in the unified ``provider_catalog()``
+    (i.e. the ``hermes model`` universe). This is what lets the desktop Keys tab
+    render a card for a provider even when its env var was never hand-added to
+    ``OPTIONAL_ENV_VARS`` — closing the drift where CLI-configurable providers
+    (openai-api, kilocode, novita, tencent-tokenhub, copilot, …) were missing
+    from the GUI.
+
+    Hand ``OPTIONAL_ENV_VARS`` prose is layered ON TOP of this in the endpoint;
+    this only supplies membership + grouping + sensible fallbacks.
+    """
+    try:
+        from hermes_cli.provider_catalog import provider_catalog
+    except Exception:
+        return {}
+
+    # Env vars already declared with a NON-provider category (e.g. the shared
+    # GITHUB_TOKEN, which is a Skills-Hub "tool" credential) must not be
+    # promoted into a provider card. Copilot lists GITHUB_TOKEN among its auth
+    # aliases, but its provider card uses the provider-owned COPILOT_GITHUB_TOKEN.
+    try:
+        from hermes_cli.config import OPTIONAL_ENV_VARS as _OPT
+    except Exception:
+        _OPT = {}
+    _non_provider_keys = {
+        k for k, v in _OPT.items()
+        if (v or {}).get("category") and (v or {}).get("category") != "provider"
+    }
+
+    meta: dict = {}
+    for d in provider_catalog():
+        if d.tab != "keys":
+            continue
+        # API-key vars: the first is the primary (password) field; any aliases
+        # are kept as additional password fields so users can clear them too.
+        for env_var in d.api_key_env_vars:
+            if env_var in _non_provider_keys:
+                continue  # don't hijack a shared tool/messaging credential
+            meta.setdefault(
+                env_var,
+                {
+                    "provider": d.slug,
+                    "provider_label": d.label,
+                    "description": d.description,
+                    "url": d.signup_url or None,
+                    "is_password": True,
+                    "advanced": False,
+                    "category": "provider",
+                },
+            )
+        # Base-URL override is an advanced, non-secret field for the same card.
+        if d.base_url_env_var:
+            meta.setdefault(
+                d.base_url_env_var,
+                {
+                    "provider": d.slug,
+                    "provider_label": d.label,
+                    "description": f"{d.label} base URL override",
+                    "url": None,
+                    "is_password": False,
+                    "advanced": True,
+                    "category": "provider",
+                },
+            )
+
+        # AWS-SDK providers (Bedrock) authenticate via the AWS credential chain
+        # rather than a pasted API key, so they have no api_key_env_vars. Tag
+        # their AWS_* settings to the provider card so they still appear on the
+        # Keys tab (otherwise Bedrock — a `hermes model` provider — would be
+        # invisible in the desktop app).
+        if d.auth_type == "aws_sdk":
+            for aws_var in ("AWS_REGION", "AWS_PROFILE"):
+                existing = meta.get(aws_var, {})
+                meta[aws_var] = {
+                    "provider": d.slug,
+                    "provider_label": d.label,
+                    "description": existing.get("description") or f"{d.label} ({aws_var})",
+                    "url": existing.get("url"),
+                    "is_password": False,
+                    "advanced": existing.get("advanced", True),
+                    "category": "provider",
+                }
+    return meta
+
+
 @app.get("/api/env")
 async def get_env_vars(profile: Optional[str] = None):
     with _profile_scope(profile):
         env_on_disk = load_env()
     channel_keys = _channel_managed_env_keys()
-    result = {}
-    for var_name, info in OPTIONAL_ENV_VARS.items():
+    catalog_meta = _catalog_provider_env_metadata()
+
+    def _row(var_name: str, info: dict) -> dict:
         value = env_on_disk.get(var_name)
-        result[var_name] = {
+        cat_meta = catalog_meta.get(var_name) or {}
+        # Hand OPTIONAL_ENV_VARS prose wins where present; the catalog fills any
+        # gaps (description/url) and always supplies provider grouping hints.
+        return {
             "is_set": bool(value),
             "redacted_value": redact_key(value) if value else None,
-            "description": info.get("description", ""),
-            "url": info.get("url"),
-            "category": info.get("category", ""),
-            "is_password": info.get("password", False),
+            "description": info.get("description") or cat_meta.get("description", ""),
+            "url": info.get("url") if info.get("url") is not None else cat_meta.get("url"),
+            "category": info.get("category") or cat_meta.get("category", ""),
+            "is_password": info.get("password", cat_meta.get("is_password", False)),
             "tools": info.get("tools", []),
-            "advanced": info.get("advanced", False),
+            "advanced": info.get("advanced", cat_meta.get("advanced", False)),
             # True when this var is a messaging-platform credential owned by a
             # Channels page card. The Keys/Env page uses this to hide it and
             # avoid duplicating the (richer) Channels configuration UI.
             "channel_managed": var_name in channel_keys,
+            # Provider grouping hints derived from the unified provider catalog
+            # so the desktop Keys tab groups by the SAME provider identity the
+            # CLI `hermes model` picker uses (not desktop-only prefix guesses).
+            "provider": cat_meta.get("provider", ""),
+            "provider_label": cat_meta.get("provider_label", ""),
         }
+
+    result = {}
+    for var_name, info in OPTIONAL_ENV_VARS.items():
+        result[var_name] = _row(var_name, info)
+    # Synthesize rows for catalog provider env vars that have no hand entry in
+    # OPTIONAL_ENV_VARS — these are the providers that were CLI-configurable but
+    # invisible in the desktop app until now.
+    for var_name in catalog_meta:
+        if var_name not in result:
+            result[var_name] = _row(var_name, {})
     return result
 
 
@@ -4146,9 +4296,9 @@ _PLATFORM_OVERRIDES: dict[str, dict[str, Any]] = {
     },
     "slack": {
         "name": "Slack",
-        "description": "Use Hermes from Slack via Socket Mode.",
+        "description": "Use Hermes from Slack via Socket Mode. Add allowed Slack member IDs so connected bots can respond.",
         "docs_url": "https://api.slack.com/apps",
-        "env_vars": ("SLACK_BOT_TOKEN", "SLACK_APP_TOKEN"),
+        "env_vars": ("SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"),
         "required_env": ("SLACK_BOT_TOKEN", "SLACK_APP_TOKEN"),
     },
     "mattermost": {
@@ -4633,6 +4783,7 @@ def _messaging_env_info(key: str) -> dict[str, Any]:
     return {
         "description": info.get("description", ""),
         "prompt": info.get("prompt", key),
+        "help": info.get("help", ""),
         "url": info.get("url"),
         "is_password": info.get("password", False),
         "advanced": info.get("advanced", False),
@@ -5212,6 +5363,7 @@ async def update_messaging_platform(
                     )
                 trimmed = value.strip()
                 if trimmed:
+                    _validate_messaging_env_value(platform_id, key, trimmed)
                     save_env_value(key, trimmed)
 
             if body.enabled is not None:
@@ -5413,13 +5565,53 @@ def _claude_code_only_status() -> Dict[str, Any]:
     return {"logged_in": False, "source": None}
 
 
-# Provider catalog. The order matters — it's how we render the UI list.
-# ``cli_command`` is what the dashboard surfaces as the copy-to-clipboard
-# fallback while Phase 2 (in-browser flows) isn't built yet.
-# ``flow`` describes the OAuth shape so the future modal can pick the
-# right UI: ``pkce`` = open URL + paste callback code, ``device_code`` =
-# show code + verification URL + poll, ``external`` = read-only (delegated
-# to a third-party CLI like Claude Code or Qwen).
+def _gemini_cli_status() -> Dict[str, Any]:
+    """Status for the google-gemini-cli OAuth provider (Code Assist login)."""
+    try:
+        from hermes_cli import auth as hauth
+        raw = hauth.get_gemini_oauth_auth_status()
+    except Exception as e:
+        return {"logged_in": False, "error": str(e)}
+    return {
+        "logged_in": bool(raw.get("logged_in")),
+        "source": raw.get("source") or "google_oauth",
+        "source_label": raw.get("email") or raw.get("auth_file") or "Google Code Assist",
+        "token_preview": _truncate_token(raw.get("api_key")),
+        "expires_at": None,
+        "has_refresh_token": True,
+    }
+
+
+def _copilot_acp_status() -> Dict[str, Any]:
+    """Status for copilot-acp — credentials are owned by the Copilot CLI.
+
+    There is no cheap programmatic credential probe for the ACP subprocess, so
+    this is a read-only "managed by the Copilot CLI" card (like claude-code):
+    Hermes never claims a login state it can't verify.
+    """
+    return {
+        "logged_in": False,
+        "source": "copilot_cli",
+        "source_label": "Managed by the GitHub Copilot CLI",
+        "token_preview": None,
+        "expires_at": None,
+        "has_refresh_token": False,
+    }
+
+
+# Explicit, hand-tuned OAuth/account provider cards. These carry the bits that
+# can't be derived from the unified provider catalog: the OAuth ``flow`` shape,
+# the per-provider ``status_fn``, the ``cli_command`` fallback, and curated
+# display order. They are the OVERRIDE BASE for ``_build_oauth_catalog()``,
+# which unions them with every accounts-tab provider in ``provider_catalog()``
+# so newly-added OAuth/external providers appear automatically (no hand edit).
+# This tuple also still includes two entries that are NOT catalog providers but
+# must show on the Accounts tab: the api-key Anthropic PKCE card and the
+# synthetic ``claude-code`` subscription row.
+# ``flow`` describes the OAuth shape so the modal can pick the right UI:
+# ``pkce`` = open URL + paste callback code, ``device_code`` = show code +
+# verification URL + poll, ``external`` = read-only (delegated to a third-party
+# CLI like Claude Code or Qwen), ``loopback`` = 127.0.0.1 callback listener.
 _OAUTH_PROVIDER_CATALOG: tuple[Dict[str, Any], ...] = (
     {
         "id": "nous",
@@ -5469,6 +5661,22 @@ _OAUTH_PROVIDER_CATALOG: tuple[Dict[str, Any], ...] = (
         "docs_url": "https://hermes-agent.nousresearch.com/docs/guides/xai-grok-oauth",
         "status_fn": None,  # dispatched via auth.get_xai_oauth_auth_status
     },
+    {
+        "id": "google-gemini-cli",
+        "name": "Google Gemini (OAuth + Code Assist)",
+        "flow": "external",
+        "cli_command": "hermes auth add google-gemini-cli",
+        "docs_url": "https://ai.google.dev/gemini-api/docs",
+        "status_fn": _gemini_cli_status,
+    },
+    {
+        "id": "copilot-acp",
+        "name": "GitHub Copilot (ACP)",
+        "flow": "external",
+        "cli_command": "copilot /login",
+        "docs_url": "https://docs.github.com/en/copilot",
+        "status_fn": _copilot_acp_status,
+    },
     # ── Anthropic / Claude entries sit at the bottom: the API-key path
     # first, then the subscription OAuth path (which only works with extra
     # usage credits on top of a Claude Max plan — see disclaimer in name).
@@ -5555,6 +5763,31 @@ def _resolve_provider_status(provider_id: str, status_fn) -> Dict[str, Any]:
                 "has_refresh_token": True,
                 "last_refresh": raw.get("last_refresh"),
             }
+        # No hand-written branch for this provider id: fall through to the
+        # canonical slug-driven dispatcher so accounts-tab providers derived
+        # from the unified catalog (which carry status_fn=None) still reflect
+        # real login state instead of rendering permanently logged-out. This
+        # closes the membership-auto-extends-but-status-doesn't gap: add an
+        # OAuth/account provider plugin and its card shows the right state.
+        raw = hauth.get_auth_status(provider_id)
+        if isinstance(raw, dict) and "logged_in" in raw:
+            return {
+                "logged_in": bool(raw.get("logged_in")),
+                "source": raw.get("source") or raw.get("provider") or provider_id,
+                "source_label": (
+                    raw.get("source_label")
+                    or raw.get("auth_store")
+                    or raw.get("auth_store_path")
+                    or raw.get("base_url")
+                    or raw.get("name")
+                    or ""
+                ),
+                "token_preview": _truncate_token(
+                    raw.get("access_token") or raw.get("api_key")
+                ),
+                "expires_at": raw.get("expires_at") or raw.get("access_expires_at"),
+                "has_refresh_token": bool(raw.get("has_refresh_token")),
+            }
     except Exception as e:
         return {"logged_in": False, "error": str(e)}
     return {"logged_in": False}
@@ -5598,6 +5831,56 @@ def _oauth_provider_disconnect_hint(provider: Dict[str, Any], status: Dict[str,
     return None
 
 
+def _build_oauth_catalog() -> list[Dict[str, Any]]:
+    """Build the Accounts-tab provider list.
+
+    MEMBERSHIP is the union of:
+      1. ``_OAUTH_PROVIDER_CATALOG`` — the explicit, hand-tuned cards that carry
+         bespoke flow / status_fn / cli_command (including the api-key Anthropic
+         PKCE card and the synthetic claude-code subscription row, which are not
+         catalog providers), and
+      2. every accounts-tab provider in the unified ``provider_catalog()`` (the
+         ``hermes model`` universe) — so any OAuth/external provider added as a
+         plugin appears automatically, with sensible defaults, even if no
+         explicit card was written for it.
+
+    The explicit catalog wins on metadata; the unified catalog guarantees we
+    never silently drop a provider the CLI picker offers. Order: explicit cards
+    first (their curated order), then any catalog-only providers appended in
+    ``hermes model`` order.
+    """
+    rows: list[Dict[str, Any]] = []
+    seen: set[str] = set()
+
+    # 1. Explicit hand-tuned cards (authoritative metadata + curated order).
+    for entry in _OAUTH_PROVIDER_CATALOG:
+        if entry["id"] in seen:
+            continue
+        seen.add(entry["id"])
+        rows.append(dict(entry))
+
+    # 2. Catalog accounts-providers not already covered — keeps the Accounts tab
+    #    in lockstep with the `hermes model` universe (zero-edit for new plugins).
+    try:
+        from hermes_cli.provider_catalog import provider_catalog
+        for d in provider_catalog():
+            if d.tab != "accounts" or d.slug in seen:
+                continue
+            seen.add(d.slug)
+            rows.append({
+                "id": d.slug,
+                "name": d.label,
+                "flow": "external",
+                "cli_command": f"hermes auth add {d.slug}",
+                "docs_url": d.signup_url or "",
+                "status_fn": None,
+            })
+    except Exception:
+        pass
+
+    return rows
+
+
 @app.get("/api/providers/oauth")
 async def list_oauth_providers(profile: Optional[str] = None):
     """Enumerate every OAuth-capable LLM provider with current status.
@@ -5617,10 +5900,14 @@ async def list_oauth_providers(profile: Optional[str] = None):
           token_preview    last N chars of the token, never the full token
           expires_at       ISO timestamp string or null
           has_refresh_token bool
+
+    Membership is derived from the unified provider_catalog() so this stays in
+    sync with the `hermes model` picker; _OAUTH_OVERRIDES supplies per-provider
+    flow/status/cli metadata.
     """
     with _profile_scope(profile):
         providers = []
-        for p in _OAUTH_PROVIDER_CATALOG:
+        for p in _build_oauth_catalog():
             status = _resolve_provider_status(p["id"], p.get("status_fn"))
             disconnect_hint = _oauth_provider_disconnect_hint(p, status)
             providers.append({
@@ -5647,7 +5934,7 @@ async def disconnect_oauth_provider(
     _require_token(request)
 
     with _profile_scope(profile):
-        catalog_by_id = {p["id"]: p for p in _OAUTH_PROVIDER_CATALOG}
+        catalog_by_id = {p["id"]: p for p in _build_oauth_catalog()}
         provider = catalog_by_id.get(provider_id)
         if provider is None:
             raise HTTPException(
@@ -10914,6 +11201,7 @@ def _resolve_chat_argv(
     # the dashboard PTY path.
     env.setdefault("HERMES_TUI_DISABLE_MOUSE", "1")
     env.setdefault("HERMES_TUI_INLINE", "1")
+    env["HERMES_TUI_DASHBOARD"] = "1"
 
     if profile_dir is not None:
         env["HERMES_HOME"] = str(profile_dir)
diff --git a/hermes_state.py b/hermes_state.py
index 36e5c91fe8a..8847593d47c 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -1836,6 +1836,43 @@ class SessionDB:
 
         return cleaned
 
+    def _is_compression_ancestor(
+        self, conn, *, ancestor_id: str, descendant_id: str
+    ) -> bool:
+        """Return True if *ancestor_id* is a compression predecessor of
+        *descendant_id* (walking parent links up the continuation chain).
+
+        The continuation edge is the canonical one shared with
+        :func:`_ephemeral_child_sql` / :meth:`set_session_archived`
+        (``_COMPRESSION_CHILD_SQL``): a parent → child edge counts only when the
+        parent ended with ``end_reason = 'compression'`` and the child started
+        at or after the parent's ``ended_at``, which distinguishes continuations
+        from delegate subagents / branch children that also carry a
+        ``parent_session_id``. Expressed as a single recursive CTE rather than a
+        per-hop Python walk so the edge definition lives in exactly one place.
+        """
+        if not ancestor_id or not descendant_id or ancestor_id == descendant_id:
+            return False
+        # Walk parent links up from the descendant, following only compression
+        # continuation edges, and check whether ancestor_id is reached.
+        edge = _COMPRESSION_CHILD_SQL.format(a="child")
+        row = conn.execute(
+            f"""
+            WITH RECURSIVE ancestors(id) AS (
+                SELECT ?
+                UNION
+                SELECT parent.id
+                FROM ancestors a
+                JOIN sessions child ON child.id = a.id
+                JOIN sessions parent ON parent.id = child.parent_session_id
+                WHERE {edge}
+            )
+            SELECT 1 FROM ancestors WHERE id = ? AND id != ? LIMIT 1
+            """,
+            (descendant_id, ancestor_id, descendant_id),
+        ).fetchone()
+        return row is not None
+
     def set_session_title(self, session_id: str, title: str) -> bool:
         """Set or update a session's title.
 
@@ -1854,9 +1891,29 @@ class SessionDB:
                 )
                 conflict = cursor.fetchone()
                 if conflict:
-                    raise ValueError(
-                        f"Title '{title}' is already in use by session {conflict['id']}"
-                    )
+                    conflict_id = conflict["id"]
+                    # A compression continuation is the live, projected-forward
+                    # head of its conversation; its compressed predecessors are
+                    # ended and hidden from the session list (list_sessions_rich
+                    # projects roots → tip). When the title that "conflicts" is
+                    # held by such a hidden ancestor, the user has no way to free
+                    # it — renaming the visible tip back to the base name would
+                    # dead-end with "already in use by <session they can't see>".
+                    # Treat this as a transfer: move the title off the ancestor
+                    # onto the continuation. Uniqueness is preserved (still only
+                    # one session carries the exact title) and the parent-link
+                    # lineage is untouched.
+                    if self._is_compression_ancestor(
+                        conn, ancestor_id=conflict_id, descendant_id=session_id
+                    ):
+                        conn.execute(
+                            "UPDATE sessions SET title = NULL WHERE id = ?",
+                            (conflict_id,),
+                        )
+                    else:
+                        raise ValueError(
+                            f"Title '{title}' is already in use by session {conflict_id}"
+                        )
             cursor = conn.execute(
                 "UPDATE sessions SET title = ? WHERE id = ?",
                 (title, session_id),
diff --git a/nix/devShell.nix b/nix/devShell.nix
index 2670c579541..c131bbb5ba7 100644
--- a/nix/devShell.nix
+++ b/nix/devShell.nix
@@ -12,7 +12,6 @@
     let
       packages = builtins.attrValues self'.packages;
       hermesNpmLib = self'.packages.default.passthru.hermesNpmLib;
-      fixLockfilesExe = pkgs.lib.getExe self'.packages.fix-lockfiles;
 
       # Collect all packageJsonPath values from npm workspace packages.
       npmPackageJsonPaths = builtins.filter (p: p != null) (
@@ -33,7 +32,7 @@
         shellHook = ''
           echo "Hermes Agent dev shell"
           ${combinedNonNpm}
-          ${hermesNpmLib.mkNpmDevShellHook npmPackageJsonPaths fixLockfilesExe}
+          ${hermesNpmLib.mkNpmDevShellHook npmPackageJsonPaths}
           echo "Ready. Run 'hermes' to start."
         '';
       };
diff --git a/nix/lib.nix b/nix/lib.nix
index 180f00f2ee0..a7a6eab7c5b 100644
--- a/nix/lib.nix
+++ b/nix/lib.nix
@@ -2,8 +2,7 @@
 #
 # All npm packages in this repo are workspace members sharing a single
 # root package-lock.json.  mkNpmPassthru provides the shared src, npmDeps,
-# npmRoot, and npmDepsFetcherVersion so individual .nix files don't
-# duplicate them.  One hash to rule them all.
+# npmRoot, and npmConfigHook so individual .nix files don't duplicate them.
 #
 # mkNpmPassthru returns packageJsonPath (e.g. "ui-tui/package.json")
 # instead of a per-package devShellHook.  The root devshell hook
@@ -19,28 +18,19 @@ let
   # The workspace root — where the single package-lock.json lives.
   src = ../.;
 
-  # Single npm deps fetch from the workspace root lockfile.
-  # All workspace packages share this derivation.
-  npmDepsHash = "sha256-kbjJksq7limRIYqP3DwI+GNgCXkG96tXcsQqmuEedxo=";
-
-  npmDeps = pkgs.fetchNpmDeps {
-    inherit src;
-    fetcherVersion = 2;
-    hash = npmDepsHash;
-  };
+  # npm dependencies for the workspace, shared by all members. importNpmLock
+  # resolves each package from the lockfile's own `integrity` hashes, so the
+  # lockfile is the single source of truth — no separate dependency hash to
+  # keep in sync with it.
+  npmDeps = pkgs.importNpmLock.importNpmLock { npmRoot = src; };
 in
 {
   # Returns a buildNpmPackage-compatible attrs set that provides:
-  #   src, npmDeps, npmRoot, npmDepsFetcherVersion
-  #   patchPhase             — ensures root lockfile has exactly one trailing newline
-  #   nativeBuildInputs      — [ updateLockfileScript ] (list, prepend with ++ for more)
-  #   passthru.packageJsonPath — relative path to this workspace's package.json
-  #   nodejs                 — fixed nodejs version for all packages we use in the repo
-  #
-  # NOTE: npmConfigHook runs `diff` between the source lockfile and the
-  # npm-deps cache lockfile. fetchNpmDeps preserves whatever trailing
-  # newlines the lockfile has. The patchPhase normalizes to exactly one
-  # trailing newline so both sides always match.
+  #   src, npmDeps, npmRoot      — workspace source + importNpmLock dep set
+  #   npmConfigHook              — importNpmLock's offline `npm install` hook
+  #   nativeBuildInputs          — [ updateLockfileScript ] (list, prepend with ++ for more)
+  #   passthru.packageJsonPath   — relative path to this workspace's package.json
+  #   nodejs                     — fixed nodejs version for all packages we use in the repo
   #
   # Usage:
   #   npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
@@ -62,35 +52,15 @@ in
     in
     {
       inherit src npmDeps nodejs;
+      # importNpmLock's hook installs the rewritten lockfile (every `resolved`
+      # rewritten to a /nix/store file: path) into the unpacked workspace and
+      # runs `npm install` offline, so every workspace member's dependencies
+      # resolve without network access.
+      npmConfigHook = pkgs.importNpmLock.npmConfigHook;
       npmRoot = ".";
-      npmDepsFetcherVersion = 2;
 
       ELECTRON_SKIP_BINARY_DOWNLOAD = 1;
 
-      patchPhase = ''
-        runHook prePatch
-        # Normalize trailing newlines on the root lockfile so source and
-        # npm-deps always match, regardless of what fetchNpmDeps preserves.
-        sed -i -z 's/\\n*$/\\n/' package-lock.json
-
-        # Make npmConfigHook's byte-for-byte diff newline-agnostic by
-        # replacing its hardcoded /nix/store/.../diff with a wrapper that
-        # normalizes trailing newlines on both sides before comparing.
-        mkdir -p "$TMPDIR/bin"
-        cat > "$TMPDIR/bin/diff" << DIFFWRAP
-        #!/bin/sh
-        f1=\\$(mktemp) && sed -z 's/\\n*$/\\n/' "\\$1" > "\\$f1"
-        f2=\\$(mktemp) && sed -z 's/\\n*$/\\n/' "\\$2" > "\\$f2"
-        ${pkgs.diffutils}/bin/diff "\\$f1" "\\$f2" && rc=0 || rc=\\$?
-        rm -f "\\$f1" "\\$f2"
-        exit \\$rc
-        DIFFWRAP
-        chmod +x "$TMPDIR/bin/diff"
-        export PATH="$TMPDIR/bin:$PATH"
-
-        runHook postPatch
-      '';
-
       nativeBuildInputs = [
         (pkgs.writeShellScriptBin "update_${attr}_lockfile" ''
           set -euox pipefail
@@ -104,7 +74,6 @@ in
           CI=true ${pkgs.lib.getExe' nodejs "npm"} install --workspaces
           ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
 
-          # Hash lives in lib.nix — just rebuild to verify.
           nix build .#${attr}
           echo "Lockfile updated and build verified for .#${attr}"
         '')
@@ -120,12 +89,9 @@ in
   # Takes a list of package.json relative paths (from mkNpmPassthru .passthru.packageJsonPath),
   # stamps all of them, and if any changed:
   #   1. Runs `npm i --package-lock-only` from root to update the lockfile
-  #   2. If the lockfile changed, runs `npm ci` + fix-lockfiles
-  #
-  # fixLockfilesExe: absolute path to the fix-lockfiles binary
-  # (from pkgs.lib.getExe self'.packages.fix-lockfiles in devShell.nix).
+  #   2. If the lockfile changed, runs `npm ci`
   mkNpmDevShellHook =
-    packageJsonPaths: fixLockfilesExe:
+    packageJsonPaths:
     pkgs.writeShellScript "npm-dev-hook" ''
       REPO_ROOT=$(git rev-parse --show-toplevel)
 
@@ -158,172 +124,4 @@ in
         echo "$LOCK_STAMP_VALUE" > "$LOCK_STAMP"
       fi
     '';
-
-  # Build `fix-lockfiles` bin that checks/updates the single npmDepsHash
-  #   fix-lockfiles --check   # exit 1 if any hash is stale
-  #   fix-lockfiles --apply   # rewrite stale hashes in place
-  #   fix-lockfiles           # alias of --apply
-  # Writes machine-readable fields (stale, changed, report) to $GITHUB_OUTPUT
-  # when set, so CI workflows can post a sticky PR comment directly.
-  mkFixLockfiles =
-    {
-      attr, # flake package attr for fallback verification build, e.g. "tui"
-    }:
-    pkgs.writeShellScriptBin "fix-lockfiles" ''
-      set -uox pipefail
-      MODE="''${1:---apply}"
-      case "$MODE" in
-        --check|--apply) ;;
-        -h|--help)
-          echo "usage: fix-lockfiles [--check|--apply]"
-          exit 0 ;;
-        *)
-          echo "usage: fix-lockfiles [--check|--apply]" >&2
-          exit 2 ;;
-      esac
-
-      REPO_ROOT="$(git rev-parse --show-toplevel)"
-      cd "$REPO_ROOT"
-
-      # When running in GH Actions, emit Markdown links in the report pointing
-      # at the offending line of the nix file (and the lockfile) at the exact
-      # commit that was checked. LINK_SHA should be set by the workflow to the
-      # PR head SHA; falls back to GITHUB_SHA (which on pull_request is the
-      # test-merge commit, still browseable).
-      LINK_SERVER="''${GITHUB_SERVER_URL:-https://github.com}"
-      LINK_REPO="''${GITHUB_REPOSITORY:-}"
-      LINK_SHA="''${LINK_SHA:-''${GITHUB_SHA:-}}"
-
-      STALE=0
-      FIXED=0
-      REPORT=""
-
-      # All workspace packages share the root package-lock.json, so
-      # we only need to check the hash once.
-      LOCK_FILE="package-lock.json"
-      LIB_FILE="nix/lib.nix"
-      NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "$LOCK_FILE" 2>/dev/null)
-      if [ -z "$NEW_HASH" ]; then
-        echo "prefetch-npm-deps failed, falling back to nix build" >&2
-        OUTPUT=$(nix build ".#${attr}.npmDeps" --no-link --print-build-logs 2>&1)
-        STATUS=$?
-        if [ "$STATUS" -eq 0 ]; then
-          echo "ok (via nix build)"
-          exit 0
-        fi
-        NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
-        if [ -z "$NEW_HASH" ]; then
-          if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
-            echo "skipped (transient cache failure — see primary nix build for real status)" >&2
-            echo "$OUTPUT" | tail -8 >&2
-            exit 0
-          fi
-          echo "build failed with no hash mismatch:" >&2
-          echo "$OUTPUT" | tail -40 >&2
-          exit 1
-        fi
-      fi
-
-      OLD_HASH=$(grep -oE 'npmDepsHash = "sha256-[^"]+"' "$LIB_FILE" | head -1 \
-        | sed -E 's/npmDepsHash = "(.*)"/\1/')
-
-      # prefetch-npm-deps says the hash already matches — but it only hashes the
-      # lockfile *contents* and can disagree with fetchNpmDeps + npmConfigHook,
-      # which validate the full source lockfile against the realized deps cache.
-      # Trusting prefetch alone produced false "ok" results while the actual
-      # build was broken (e.g. lockfile engines/os/cpu fields the pinned nixpkgs
-      # strips from the deps cache, tripping npmConfigHook). So when prefetch
-      # claims the hash is current, confirm with a real consumer build before
-      # believing it.
-      if [ "$NEW_HASH" = "$OLD_HASH" ]; then
-        if VERIFY_OUT=$(nix build ".#${attr}" --no-link --print-build-logs 2>&1); then
-          echo "ok"
-          if [ -n "''${GITHUB_OUTPUT:-}" ]; then
-            { echo "stale=false"; echo "changed=false"; } >> "$GITHUB_OUTPUT"
-          fi
-          exit 0
-        fi
-        # Build failed despite a matching hash. A fixed-output 'got:' means
-        # prefetch genuinely disagreed with fetchNpmDeps — adopt the real hash
-        # and fall through to the stale-handling path below.
-        CORRECT_HASH=$(echo "$VERIFY_OUT" | awk '/got:/ {print $2; exit}')
-        if [ -n "$CORRECT_HASH" ]; then
-          echo "prefetch-npm-deps reported current ($OLD_HASH) but fetchNpmDeps wants $CORRECT_HASH" >&2
-          NEW_HASH="$CORRECT_HASH"
-        elif echo "$VERIFY_OUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
-          echo "skipped (transient cache failure — see primary nix build for real status)" >&2
-          echo "$VERIFY_OUT" | tail -8 >&2
-          exit 0
-        else
-          # Not a stale-hash problem — surface it honestly instead of "ok".
-          echo "::error::nix build .#${attr} failed and it is NOT a stale npmDepsHash (no 'got:' hash in output)." >&2
-          echo "The committed lockfile may be incompatible with the pinned nixpkgs" >&2
-          echo "(e.g. engines/os/cpu fields that prefetch-npm-deps strips from the" >&2
-          echo "deps cache, tripping npmConfigHook). fix-lockfiles cannot repair this." >&2
-          echo "$VERIFY_OUT" | tail -40 >&2
-          if [ -n "''${GITHUB_OUTPUT:-}" ]; then
-            { echo "stale=false"; echo "changed=false"; } >> "$GITHUB_OUTPUT"
-          fi
-          exit 1
-        fi
-      fi
-
-      HASH_LINE=$(grep -n 'npmDepsHash = "sha256-' "$LIB_FILE" | head -1 | cut -d: -f1)
-      echo "stale: $LIB_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH"
-      STALE=1
-
-      if [ -n "$LINK_REPO" ] && [ -n "$LINK_SHA" ]; then
-        LIB_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LIB_FILE#L$HASH_LINE"
-        LOCK_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LOCK_FILE"
-        REPORT="- [\`$LIB_FILE:$HASH_LINE\`]($LIB_URL): \`$OLD_HASH\` → \`$NEW_HASH\` — lockfile: [\`$LOCK_FILE\`]($LOCK_URL)"$'\\n'
-      else
-        REPORT="- \`$LIB_FILE:$HASH_LINE\`: \`$OLD_HASH\` → \`$NEW_HASH\`"$'\\n'
-      fi
-
-      if [ "$MODE" = "--apply" ]; then
-        sed -i -E "s|npmDepsHash = \"sha256-[^\"]+\";|npmDepsHash = \"$NEW_HASH\";|" "$LIB_FILE"
-        if ! nix build ".#${attr}.npmDeps" --no-link --print-build-logs 2>/dev/null; then
-          # prefetch-npm-deps may disagree with fetchNpmDeps (it hashes
-          # the lockfile contents, not the full source tree).  Extract the
-          # correct hash from the nix build error and retry.
-          RETRY_OUTPUT=$(nix build ".#${attr}.npmDeps" --no-link --print-build-logs 2>&1)
-          CORRECT_HASH=$(echo "$RETRY_OUTPUT" | awk '/got:/ {print $2; exit}')
-          if [ -n "$CORRECT_HASH" ]; then
-            echo "prefetch-npm-deps gave $NEW_HASH but nix wants $CORRECT_HASH — retrying" >&2
-            sed -i -E "s|npmDepsHash = \"sha256-[^\"]+\";|npmDepsHash = \"$CORRECT_HASH\";|" "$LIB_FILE"
-            if ! nix build ".#${attr}.npmDeps" --no-link --print-build-logs; then
-              echo "verification build failed after hash retry" >&2
-              exit 1
-            fi
-            NEW_HASH="$CORRECT_HASH"
-          else
-            echo "verification build failed after hash update" >&2
-            exit 1
-          fi
-        fi
-        FIXED=1
-        echo "fixed"
-      fi
-
-      if [ -n "''${GITHUB_OUTPUT:-}" ]; then
-        {
-          [ "$STALE" -eq 1 ] && echo "stale=true" || echo "stale=false"
-          [ "$FIXED" -eq 1 ] && echo "changed=true" || echo "changed=false"
-          if [ -n "$REPORT" ]; then
-            echo "report<<REPORT_EOF"
-            printf "%s" "$REPORT"
-            echo "REPORT_EOF"
-          fi
-        } >> "$GITHUB_OUTPUT"
-      fi
-
-      if [ "$STALE" -eq 1 ] && [ "$MODE" = "--check" ]; then
-        echo
-        echo "Stale lockfile hash detected. Run:"
-        echo "  nix run .#fix-lockfiles"
-        exit 1
-      fi
-
-      exit 0
-    '';
 }
diff --git a/nix/packages.nix b/nix/packages.nix
index d585beec6b4..131444fb3fd 100644
--- a/nix/packages.nix
+++ b/nix/packages.nix
@@ -50,8 +50,6 @@
         tui = hermesAgent.hermesTui;
         web = hermesAgent.hermesWeb;
         desktop = hermesAgent.hermesDesktop;
-
-        fix-lockfiles = hermesAgent.hermesNpmLib.mkFixLockfiles { attr = "tui"; };
       };
     };
 }
diff --git a/plugins/memory/hindsight/README.md b/plugins/memory/hindsight/README.md
index d8f96a45e1e..be2e24528bb 100644
--- a/plugins/memory/hindsight/README.md
+++ b/plugins/memory/hindsight/README.md
@@ -144,4 +144,4 @@ Available in `hybrid` and `tools` memory modes:
 
 ## Client Version
 
-Requires `hindsight-client >= 0.4.22`. The plugin auto-upgrades on session start if an older version is detected.
+Requires `hindsight-client >= 0.6.1`. The plugin auto-upgrades on session start if an older version is detected.
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index 03ebda28eca..dbe4ecd06c0 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -50,7 +50,8 @@ logger = logging.getLogger(__name__)
 
 _DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
 _DEFAULT_LOCAL_URL = "http://localhost:8888"
-_MIN_CLIENT_VERSION = "0.4.22"
+# Keep in sync with tools/lazy_deps.py ("memory.hindsight") and plugin.yaml.
+_MIN_CLIENT_VERSION = "0.6.1"
 _DEFAULT_TIMEOUT = 120  # seconds — cloud API can take 30-40s per request
 _DEFAULT_IDLE_TIMEOUT = 300  # seconds — Hindsight embedded daemon default
 # Mirrors hindsight-integrations/openclaw — Hindsight 0.5.0 added
@@ -100,6 +101,17 @@ def _check_local_runtime() -> tuple[bool, str | None]:
         return False, str(exc)
 
 
+def _ensure_cloud_client_dependency() -> None:
+    """Install the Hindsight cloud client lazily before importing it."""
+    try:
+        from tools.lazy_deps import ensure as _lazy_ensure
+        _lazy_ensure("memory.hindsight", prompt=False)
+    except ImportError:
+        pass
+    except Exception as exc:
+        raise ImportError(str(exc)) from exc
+
+
 # ---------------------------------------------------------------------------
 # Hindsight API capability probe — mirrors hindsight-integrations/openclaw.
 # ---------------------------------------------------------------------------
@@ -730,7 +742,6 @@ class HindsightMemoryProvider(MemoryProvider):
         env_writes: dict = {}
 
         # Step 2: Install/upgrade deps for selected mode
-        _MIN_CLIENT_VERSION = "0.4.22"
         cloud_dep = f"hindsight-client>={_MIN_CLIENT_VERSION}"
         local_dep = "hindsight-all"
         if mode == "local_embedded":
@@ -990,6 +1001,7 @@ class HindsightMemoryProvider(MemoryProvider):
                 kwargs["idle_timeout"] = idle_timeout
                 self._client = HindsightEmbedded(**kwargs)
             else:
+                _ensure_cloud_client_dependency()
                 from hindsight_client import Hindsight
                 timeout = self._timeout or _DEFAULT_TIMEOUT
                 kwargs = {"base_url": self._api_url, "timeout": float(timeout)}
diff --git a/plugins/memory/hindsight/plugin.yaml b/plugins/memory/hindsight/plugin.yaml
index b12c09142bb..9dfa763af7f 100644
--- a/plugins/memory/hindsight/plugin.yaml
+++ b/plugins/memory/hindsight/plugin.yaml
@@ -2,7 +2,7 @@ name: hindsight
 version: 1.0.0
 description: "Hindsight — long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval."
 pip_dependencies:
-  - "hindsight-client>=0.4.22"
+  - "hindsight-client>=0.6.1"
 requires_env: []
 hooks:
   - on_session_end
diff --git a/plugins/memory/openviking/__init__.py b/plugins/memory/openviking/__init__.py
index 7ebe6869a46..b4d44be88af 100644
--- a/plugins/memory/openviking/__init__.py
+++ b/plugins/memory/openviking/__init__.py
@@ -45,10 +45,11 @@ from typing import Any, Callable, Dict, List, Optional, Set
 from urllib.parse import urlparse
 from urllib.request import url2pathname
 
+from agent.message_content import flatten_message_text
 from agent.memory_provider import MemoryProvider
 from agent.skill_commands import extract_user_instruction_from_skill_message
 from tools.registry import tool_error
-from utils import atomic_json_write
+from utils import atomic_json_write, env_var_enabled
 
 logger = logging.getLogger(__name__)
 
@@ -70,6 +71,7 @@ _TIMEOUT = 30.0
 _SESSION_DRAIN_TIMEOUT = 10.0
 _DEFERRED_COMMIT_TIMEOUT = (_TIMEOUT * 2) + 5.0
 _REMOTE_RESOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://")
+_SYNC_TRACE_ENV = "HERMES_OPENVIKING_SYNC_TRACE"
 
 # Maps the viking_remember `category` enum to a viking:// subdirectory.
 # Keep in sync with REMEMBER_SCHEMA.parameters.properties.category.enum.
@@ -156,6 +158,18 @@ def _derive_openviking_user_text(content: Any) -> str:
     return extract_user_instruction_from_skill_message(content) or ""
 
 
+def _sync_trace_enabled() -> bool:
+    return env_var_enabled(_SYNC_TRACE_ENV)
+
+
+def _preview(value: Any, limit: int = 160) -> str:
+    text = "" if value is None else str(value)
+    text = text.replace("\n", "\\n")
+    if len(text) > limit:
+        return text[:limit] + "..."
+    return text
+
+
 # ---------------------------------------------------------------------------
 # Process-level atexit safety net — ensures pending sessions are committed
 # even if shutdown_memory_provider is never called (e.g. gateway crash,
@@ -488,6 +502,25 @@ ADD_RESOURCE_SCHEMA = {
 }
 
 
+# Recall tools (read-only) whose results we never re-ingest into OpenViking —
+# echoing recalled memory back into the session transcript would re-store it.
+# Write tools (viking_remember / viking_add_resource) are intentionally NOT
+# here. Derived from the canonical schema names so renames can't desync.
+_OPENVIKING_RECALL_TOOL_NAMES = {
+    SEARCH_SCHEMA["name"],
+    READ_SCHEMA["name"],
+    BROWSE_SCHEMA["name"],
+}
+
+# Canonical tool_status values emitted in OpenViking batch tool parts.
+_TOOL_STATUS_COMPLETED = "completed"
+_TOOL_STATUS_ERROR = "error"
+_TOOL_STATUS_PENDING = "pending"
+# Inbound status aliases (from varied tool-result shapes) -> canonical above.
+_TOOL_STATUS_ERROR_ALIASES = {"error", "failed", "failure"}
+_TOOL_STATUS_COMPLETED_ALIASES = {"completed", "complete", "success", "succeeded"}
+
+
 def _zip_directory(dir_path: Path) -> Path:
     """Create a temporary zip file containing a directory tree."""
     root = dir_path.resolve()
@@ -2221,7 +2254,10 @@ class OpenVikingMemoryProvider(MemoryProvider):
 
     def _commit_session(self, sid: str, turn_count: int, *, context: str) -> bool:
         try:
-            self._client.post(f"/api/v1/sessions/{sid}/commit")
+            self._client.post(
+                f"/api/v1/sessions/{sid}/commit",
+                {"keep_recent_count": 0},
+            )
             self._mark_session_committed(sid)
             logger.info("OpenViking session %s committed %s (%d turns)", sid, context, turn_count)
             return True
@@ -2293,7 +2329,265 @@ class OpenVikingMemoryProvider(MemoryProvider):
         with self._prefetch_lock:
             self._prefetch_result = ""
 
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+    @staticmethod
+    def _message_text(content: Any) -> str:
+        """Extract text from OpenAI-style string/list content."""
+        return flatten_message_text(content)
+
+    @classmethod
+    def _message_matches_text(cls, message: Dict[str, Any], expected: Any) -> bool:
+        expected_text = cls._message_text(expected).strip()
+        if not expected_text:
+            return False
+        actual_text = cls._message_text(message.get("content")).strip()
+        return actual_text == expected_text
+
+    @classmethod
+    def _extract_current_turn_messages(
+        cls,
+        messages: Optional[List[Dict[str, Any]]],
+        user_content: str,
+        assistant_content: str,
+    ) -> List[Dict[str, Any]]:
+        """Slice the completed turn out of Hermes' full canonical transcript."""
+        if not messages:
+            return []
+
+        end_idx: Optional[int] = None
+        if cls._message_text(assistant_content).strip():
+            for idx in range(len(messages) - 1, -1, -1):
+                message = messages[idx]
+                if (
+                    isinstance(message, dict)
+                    and message.get("role") == "assistant"
+                    and cls._message_matches_text(message, assistant_content)
+                ):
+                    end_idx = idx
+                    break
+        if end_idx is None:
+            for idx in range(len(messages) - 1, -1, -1):
+                message = messages[idx]
+                if isinstance(message, dict) and message.get("role") == "assistant":
+                    end_idx = idx
+                    break
+        if end_idx is None:
+            end_idx = len(messages) - 1
+
+        start_idx: Optional[int] = None
+        if cls._message_text(user_content).strip():
+            for idx in range(end_idx, -1, -1):
+                message = messages[idx]
+                if (
+                    isinstance(message, dict)
+                    and message.get("role") == "user"
+                    and cls._message_matches_text(message, user_content)
+                ):
+                    start_idx = idx
+                    break
+        if start_idx is None:
+            for idx in range(end_idx, -1, -1):
+                message = messages[idx]
+                if isinstance(message, dict) and message.get("role") == "user":
+                    start_idx = idx
+                    break
+        if start_idx is None:
+            return []
+
+        return [message for message in messages[start_idx : end_idx + 1] if isinstance(message, dict)]
+
+    @staticmethod
+    def _tool_call_id(tool_call: Dict[str, Any]) -> str:
+        return str(tool_call.get("id") or tool_call.get("tool_call_id") or "")
+
+    @staticmethod
+    def _tool_call_name(tool_call: Dict[str, Any]) -> str:
+        function = tool_call.get("function")
+        if isinstance(function, dict):
+            return str(function.get("name") or "")
+        return str(tool_call.get("name") or "")
+
+    @staticmethod
+    def _is_openviking_recall_tool_name(tool_name: Any) -> bool:
+        return str(tool_name or "").strip().lower() in _OPENVIKING_RECALL_TOOL_NAMES
+
+    @staticmethod
+    def _tool_call_input(tool_call: Dict[str, Any]) -> Dict[str, Any]:
+        function = tool_call.get("function")
+        raw_args: Any = None
+        if isinstance(function, dict):
+            raw_args = function.get("arguments")
+        if raw_args is None:
+            raw_args = tool_call.get("args")
+        if raw_args is None:
+            return {}
+        if isinstance(raw_args, dict):
+            return raw_args
+        if isinstance(raw_args, str):
+            if not raw_args.strip():
+                return {}
+            try:
+                parsed = json.loads(raw_args)
+            except Exception:
+                return {"value": raw_args}
+            if isinstance(parsed, dict):
+                return parsed
+            return {"value": parsed}
+        return {"value": raw_args}
+
+    @classmethod
+    def _tool_result_status(cls, message: Dict[str, Any]) -> str:
+        raw_status = str(message.get("status") or message.get("tool_status") or "").lower()
+        if raw_status in _TOOL_STATUS_ERROR_ALIASES:
+            return _TOOL_STATUS_ERROR
+        if raw_status in _TOOL_STATUS_COMPLETED_ALIASES:
+            return _TOOL_STATUS_COMPLETED
+
+        text = cls._message_text(message.get("content")).strip()
+        if text:
+            try:
+                parsed = json.loads(text)
+            except Exception:
+                parsed = None
+            if isinstance(parsed, dict):
+                status = str(parsed.get("status") or "").lower()
+                exit_code = parsed.get("exit_code")
+                if (
+                    status in _TOOL_STATUS_ERROR_ALIASES
+                    or parsed.get("success") is False
+                    or bool(parsed.get("error"))
+                    or (isinstance(exit_code, int) and exit_code != 0)
+                ):
+                    return _TOOL_STATUS_ERROR
+
+        return _TOOL_STATUS_COMPLETED
+
+    @classmethod
+    def _messages_to_openviking_batch(
+        cls,
+        messages: List[Dict[str, Any]],
+        *,
+        assistant_peer_id: str = "",
+    ) -> List[Dict[str, Any]]:
+        """Convert Hermes canonical messages into OpenViking batch payloads."""
+        assistant_peer_id = str(assistant_peer_id or "").strip()
+        tool_calls_by_id: Dict[str, Dict[str, Any]] = {}
+        completed_tool_ids: set[str] = set()
+        skipped_tool_ids: set[str] = set()
+        for message in messages:
+            if not isinstance(message, dict):
+                continue
+            if message.get("role") == "tool":
+                tool_id = str(message.get("tool_call_id") or message.get("id") or "")
+                if tool_id:
+                    completed_tool_ids.add(tool_id)
+                    if cls._is_openviking_recall_tool_name(message.get("name")):
+                        skipped_tool_ids.add(tool_id)
+                continue
+            if message.get("role") != "assistant":
+                continue
+            for tool_call in message.get("tool_calls") or []:
+                if not isinstance(tool_call, dict):
+                    continue
+                tool_id = cls._tool_call_id(tool_call)
+                tool_name = cls._tool_call_name(tool_call)
+                if tool_id:
+                    tool_calls_by_id[tool_id] = {
+                        "tool_name": tool_name,
+                        "tool_input": cls._tool_call_input(tool_call),
+                    }
+                    if cls._is_openviking_recall_tool_name(tool_name):
+                        skipped_tool_ids.add(tool_id)
+
+        payload_messages: List[Dict[str, Any]] = []
+        pending_tool_parts: List[Dict[str, Any]] = []
+
+        def payload_message(role: str, parts: List[Dict[str, Any]]) -> Dict[str, Any]:
+            payload: Dict[str, Any] = {"role": role, "parts": parts}
+            if role == "assistant" and assistant_peer_id:
+                payload["peer_id"] = assistant_peer_id
+            return payload
+
+        def flush_tool_parts() -> None:
+            nonlocal pending_tool_parts
+            if pending_tool_parts:
+                payload_messages.append(payload_message("assistant", pending_tool_parts))
+                pending_tool_parts = []
+
+        for message in messages:
+            if not isinstance(message, dict):
+                continue
+
+            role = str(message.get("role") or "")
+            if role in {"system", "developer"}:
+                continue
+
+            if role == "tool":
+                tool_id = str(message.get("tool_call_id") or message.get("id") or "")
+                prior_call = tool_calls_by_id.get(tool_id, {})
+                tool_name = str(message.get("name") or prior_call.get("tool_name") or "")
+                if tool_id in skipped_tool_ids or cls._is_openviking_recall_tool_name(tool_name):
+                    continue
+                tool_part = {
+                    "type": "tool",
+                    "tool_id": tool_id,
+                    "tool_name": tool_name,
+                    "tool_input": prior_call.get("tool_input", {}),
+                    "tool_output": cls._message_text(message.get("content")),
+                    "tool_status": cls._tool_result_status(message),
+                }
+                pending_tool_parts.append(tool_part)
+                continue
+
+            if role not in {"user", "assistant"}:
+                continue
+
+            flush_tool_parts()
+            parts: List[Dict[str, Any]] = []
+            text = cls._message_text(message.get("content"))
+            if text:
+                parts.append({"type": "text", "text": text})
+
+            if role == "assistant":
+                for tool_call in message.get("tool_calls") or []:
+                    if not isinstance(tool_call, dict):
+                        continue
+                    tool_id = cls._tool_call_id(tool_call)
+                    tool_name = cls._tool_call_name(tool_call)
+                    if tool_id in skipped_tool_ids or cls._is_openviking_recall_tool_name(tool_name):
+                        continue
+                    if tool_id in completed_tool_ids:
+                        continue
+                    # Reuse the tool_input parsed in the pre-scan when available
+                    # (non-empty ids are cached); fall back to parsing for the
+                    # uncached empty-id case so we never drop arguments.
+                    prior_call = tool_calls_by_id.get(tool_id) if tool_id else None
+                    tool_input = (
+                        prior_call["tool_input"]
+                        if prior_call is not None
+                        else cls._tool_call_input(tool_call)
+                    )
+                    parts.append({
+                        "type": "tool",
+                        "tool_id": tool_id,
+                        "tool_name": tool_name,
+                        "tool_input": tool_input,
+                        "tool_status": _TOOL_STATUS_PENDING,
+                    })
+
+            if parts:
+                payload_messages.append(payload_message(role, parts))
+
+        flush_tool_parts()
+        return payload_messages
+
+    def sync_turn(
+        self,
+        user_content: str,
+        assistant_content: str,
+        *,
+        session_id: str = "",
+        messages: Optional[List[Dict[str, Any]]] = None,
+    ) -> None:
         """Record the conversation turn in OpenViking's session (non-blocking)."""
         if not self._client:
             return
@@ -2302,6 +2596,40 @@ class OpenVikingMemoryProvider(MemoryProvider):
         if not user_content:
             return
 
+        turn_messages = (
+            self._extract_current_turn_messages(messages, user_content, assistant_content)
+            if messages is not None
+            else []
+        )
+        if turn_messages:
+            turn_messages = [dict(message) for message in turn_messages]
+            for message in turn_messages:
+                if message.get("role") == "user":
+                    message["content"] = user_content
+                    break
+        batch_messages = self._messages_to_openviking_batch(
+            turn_messages,
+            assistant_peer_id=getattr(self, "_agent", _DEFAULT_AGENT),
+        )
+
+        if _sync_trace_enabled():
+            logger.info(
+                "OpenViking sync_turn trace: session_arg=%r cached_session=%r "
+                "messages_param_supported=true messages_present=%s message_count=%s "
+                "turn_message_count=%d batch_message_count=%d user_len=%d assistant_len=%d "
+                "user_preview=%r assistant_preview=%r",
+                session_id,
+                self._session_id,
+                messages is not None,
+                len(messages) if messages is not None else None,
+                len(turn_messages),
+                len(batch_messages),
+                len(str(user_content or "")),
+                len(str(assistant_content or "")),
+                _preview(user_content),
+                _preview(assistant_content),
+            )
+
         # Snapshot the sid and bump the turn counter atomically so a
         # concurrent on_session_switch/on_session_end can't interleave its
         # snapshot+reset between the read and the increment (lost turn) and so
@@ -2313,24 +2641,39 @@ class OpenVikingMemoryProvider(MemoryProvider):
             self._turn_count += 1
 
         def _sync():
-            try:
-                client = self._new_client()
+            def _post_turn(client: _VikingClient) -> None:
+                if batch_messages:
+                    payload = {"messages": batch_messages}
+                    if _sync_trace_enabled():
+                        logger.info(
+                            "OpenViking sync_turn trace: POST /api/v1/sessions/%s/messages/batch payload=%s",
+                            sid,
+                            json.dumps(payload, ensure_ascii=False),
+                        )
+                    try:
+                        client.post(f"/api/v1/sessions/{sid}/messages/batch", payload)
+                        return
+                    except Exception as batch_error:
+                        logger.warning(
+                            "OpenViking structured sync failed; falling back to text sync: %s",
+                            batch_error,
+                        )
+
                 self._post_session_turn(
                     client,
                     sid,
                     user_content[:4000],
-                    assistant_content[:4000],
+                    self._message_text(assistant_content)[:4000],
                 )
+
+            try:
+                client = self._new_client()
+                _post_turn(client)
             except Exception as e:
                 logger.debug("OpenViking sync_turn failed, reconnecting: %s", e)
                 try:
                     client = self._new_client()
-                    self._post_session_turn(
-                        client,
-                        sid,
-                        user_content[:4000],
-                        assistant_content[:4000],
-                    )
+                    _post_turn(client)
                 except Exception as retry_error:
                     logger.warning("OpenViking sync_turn failed: %s", retry_error)
 
diff --git a/plugins/platforms/discord/adapter.py b/plugins/platforms/discord/adapter.py
index 8146ca9de10..607123bbd29 100644
--- a/plugins/platforms/discord/adapter.py
+++ b/plugins/platforms/discord/adapter.py
@@ -14,6 +14,7 @@ import hashlib
 import json
 import logging
 import os
+import re
 import struct
 import subprocess
 import tempfile
@@ -29,6 +30,7 @@ VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
 _DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
 _DISCORD_COMMAND_SYNC_STATE_SUBDIR = "gateway"
 _DISCORD_COMMAND_SYNC_STATE_FILENAME = "discord_command_sync_state.json"
+_DISCORD_NONCONVERSATIONAL_STATE_FILENAME = "discord_nonconversational_messages.json"
 _DISCORD_COMMAND_SYNC_MUTATION_INTERVAL_SECONDS = 4.5
 _DISCORD_COMMAND_SYNC_MAX_RATE_LIMIT_SLEEP_SECONDS = 30.0
 # Discord enforces a hard cap of 100 global application (slash) commands per
@@ -37,6 +39,37 @@ _DISCORD_COMMAND_SYNC_MAX_RATE_LIMIT_SLEEP_SECONDS = 30.0
 # every slash command — not just the overflow ones. We keep the desired set
 # at or below this limit at registration time.
 _DISCORD_MAX_APP_COMMANDS = 100
+_DISCORD_NONCONVERSATIONAL_METADATA_KEYS = frozenset({
+    "non_conversational",
+    "non_conversational_history",
+})
+# Upgrade-bridge fallback only. The primary mechanism is the persisted
+# non-conversational message-ID set populated from explicitly marked sends
+# (metadata["non_conversational"]). These regexes exist solely to recognize
+# status bumps emitted by an older gateway version that pre-dates the marking,
+# so they don't partition history after an upgrade. New emitters should set the
+# metadata flag, not rely on a regex here.
+_DISCORD_NONCONVERSATIONAL_HISTORY_MESSAGE_PATTERNS = (
+    re.compile(r"^\s*💾\s*Self-improvement review:\s+\S[\s\S]*$", re.IGNORECASE),
+    # Legacy/background-review test doubles used this shorter form before the
+    # self-improvement prefix became the stable emitter contract.
+    re.compile(
+        r"^\s*💾\s+Skill\s+['\"].+?['\"]\s+(?:created|updated|improved|patched)\.?\s*$",
+        re.IGNORECASE,
+    ),
+    re.compile(r"^\s*⏳\s+Working\s+—\s+\d+\s+min(?:\s|$)", re.IGNORECASE),
+    re.compile(
+        r"^\s*\[Background process\s+\S+\s+"
+        r"(?:finished with exit code|is still running~)[\s\S]*\]\s*$",
+        re.IGNORECASE,
+    ),
+    re.compile(
+        r"^\s*(?:✅|❌)\s+Hermes update\s+"
+        r"(?:finished|failed|timed out)[\s\S]*$",
+        re.IGNORECASE,
+    ),
+    re.compile(r"^\s*♻️?\s+Gateway\s+(?:restarted successfully|online\b)[\s\S]*$", re.IGNORECASE),
+)
 
 try:
     import discord
@@ -55,7 +88,6 @@ from pathlib import Path as _Path
 sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
 
 from gateway.config import Platform, PlatformConfig
-import re
 
 from gateway.platforms.helpers import MessageDeduplicator, ThreadParticipationTracker
 from utils import atomic_json_write
@@ -132,6 +164,73 @@ def _find_discord_windows_bundled_opus(discord_module: Any = None) -> Optional[s
     return None
 
 
+class _DiscordNonConversationalMessageTracker:
+    """Persistent bounded set of Discord message IDs that are status noise."""
+
+    _MAX_TRACKED = 2000
+
+    def __init__(self, max_tracked: int = _MAX_TRACKED):
+        self._max_tracked = max_tracked
+        self._ids: dict[str, None] = dict.fromkeys(self._load())
+
+    def _state_path(self) -> _Path:
+        from hermes_constants import get_hermes_home
+
+        return (
+            get_hermes_home()
+            / _DISCORD_COMMAND_SYNC_STATE_SUBDIR
+            / _DISCORD_NONCONVERSATIONAL_STATE_FILENAME
+        )
+
+    def _load(self) -> list[str]:
+        path = self._state_path()
+        if not path.exists():
+            return []
+        try:
+            data = json.loads(path.read_text(encoding="utf-8"))
+            if isinstance(data, list):
+                return [str(message_id) for message_id in data if str(message_id).strip()]
+        except Exception:
+            logger.debug("[%s] Failed to load non-conversational Discord IDs", "Discord")
+        return []
+
+    def _save(self) -> None:
+        ids = list(self._ids)
+        if len(ids) > self._max_tracked:
+            ids = ids[-self._max_tracked:]
+            self._ids = dict.fromkeys(ids)
+        try:
+            atomic_json_write(self._state_path(), ids, indent=None)
+        except Exception:
+            logger.debug("[%s] Failed to save non-conversational Discord IDs", "Discord", exc_info=True)
+
+    def mark_many(self, message_ids: List[str]) -> None:
+        changed = False
+        for message_id in message_ids:
+            key = str(message_id or "").strip()
+            if key and key not in self._ids:
+                self._ids[key] = None
+                changed = True
+        if changed:
+            self._save()
+
+    def __contains__(self, message_id: str) -> bool:
+        return str(message_id or "") in self._ids
+
+
+def _metadata_marks_nonconversational(metadata: Optional[Dict[str, Any]]) -> bool:
+    """Return True when an outbound send was explicitly marked as status-only."""
+    if not isinstance(metadata, dict):
+        return False
+    return any(bool(metadata.get(key)) for key in _DISCORD_NONCONVERSATIONAL_METADATA_KEYS)
+
+
+def _looks_like_nonconversational_history_message(content: str) -> bool:
+    """Fallback recognizer for legacy status bumps missing persisted IDs."""
+    text = content or ""
+    return any(pattern.match(text) for pattern in _DISCORD_NONCONVERSATIONAL_HISTORY_MESSAGE_PATTERNS)
+
+
 def _clean_discord_id(entry: str) -> str:
     """Strip common prefixes from a Discord user ID or username entry.
 
@@ -681,6 +780,9 @@ class DiscordAdapter(BasePlatformAdapter):
         # history backfill to skip the full scan on hot paths.  Falls back to
         # scanning channel.history() on cache miss (cold start / restart).
         self._last_self_message_id: Dict[str, str] = {}
+        # Persistent set of bot-authored lifecycle/status message IDs that
+        # should not act as conversational history boundaries after restart.
+        self._nonconversational_messages = _DiscordNonConversationalMessageTracker()
 
     def _handle_bot_task_done(self, task: asyncio.Task) -> None:
         """Surface post-startup discord.py task exits to the gateway supervisor.
@@ -1577,6 +1679,7 @@ class DiscordAdapter(BasePlatformAdapter):
             thread_id = None
             if metadata and metadata.get("thread_id"):
                 thread_id = metadata["thread_id"]
+            nonconversational = _metadata_marks_nonconversational(metadata)
 
             if thread_id:
                 # Fetch the thread directly — threads are addressed by their own ID.
@@ -1654,7 +1757,10 @@ class DiscordAdapter(BasePlatformAdapter):
             # backfill — avoids a full channel.history() scan on hot paths.
             if message_ids:
                 _target_id = thread_id or chat_id
-                self._last_self_message_id[_target_id] = message_ids[-1]
+                if nonconversational:
+                    self._nonconversational_messages.mark_many(message_ids)
+                elif not _looks_like_nonconversational_history_message(content):
+                    self._last_self_message_id[_target_id] = message_ids[-1]
 
             return SendResult(
                 success=True,
@@ -4203,23 +4309,29 @@ class DiscordAdapter(BasePlatformAdapter):
                 after=_after_obj,
                 oldest_first=False,
             ):
+                # Skip system messages (pins, joins, thread renames, etc.)
+                if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
+                    continue
+
+                content = getattr(msg, "clean_content", msg.content) or ""
+                if (
+                    str(getattr(msg, "id", "")) in self._nonconversational_messages
+                    or _looks_like_nonconversational_history_message(content)
+                ):
+                    continue
+
                 # Stop at our own message — this is the partition point.
                 # Everything before this is already in the session transcript.
                 # (Redundant when _after_obj is set, but needed for cold start.)
                 if msg.author == self._client.user:
                     break
 
-                # Skip system messages (pins, joins, thread renames, etc.)
-                if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
-                    continue
-
                 # Respect DISCORD_ALLOW_BOTS for other bots.
                 # For history context, "mentions" is treated as "all" — we are
                 # deciding what context to show, not whether to respond.
                 if getattr(msg.author, "bot", False) and not include_other_bots:
                     continue
 
-                content = getattr(msg, "clean_content", msg.content) or ""
                 if not content and msg.attachments:
                     content = "(attachment)"
                 if not content:
@@ -4566,6 +4678,13 @@ class DiscordAdapter(BasePlatformAdapter):
         Open-ended mode (``choices`` empty/None): renders the question as
         plain embed text — no buttons. The gateway's text-intercept captures
         the next message in this session and resolves the clarify.
+
+        Choice normalisation: ``choices`` may contain bare strings OR dicts
+        (LLMs sometimes emit ``[{"description": "..."}]`` instead of bare
+        strings, which would otherwise render as raw Python repr on the
+        button label). Dict choices are unwrapped against the canonical
+        LLM tool-call keys ``label``, ``description``, ``text``, ``title``
+        in that order. Dicts with none of those keys are dropped.
         """
         if not self._client or not DISCORD_AVAILABLE:
             return SendResult(success=False, error="Not connected")
@@ -4591,8 +4710,37 @@ class DiscordAdapter(BasePlatformAdapter):
                 color=discord.Color.orange(),
             )
 
+            # Normalise choices: LLMs sometimes emit `[{"description": "..."}]`
+            # instead of bare strings, which would render as raw Python repr on
+            # the button label. Unwrap the common shapes, then stringify.
+            def _flatten_choice(c):
+                if c is None:
+                    return ""
+                if isinstance(c, str):
+                    return c.strip()
+                if isinstance(c, dict):
+                    # Prefer the canonical LLM tool-call user-facing keys
+                    # in the order the LLM is most likely to emit them.
+                    # 'name' and 'value' are deliberately NOT here: they're
+                    # Discord-component-shaped fields that could appear in
+                    # dicts that aren't meant to be choices (e.g., a
+                    # developer-error wiring that passes a Button-shaped
+                    # object). Picking them would leak raw enum values
+                    # or 4-char model identifiers onto user-facing buttons.
+                    # If a dict has none of the canonical keys, drop it
+                    # rather than picking some random field — a garbage
+                    # button label is worse than no button at all.
+                    for key in ("label", "description", "text", "title"):
+                        v = c.get(key)
+                        if isinstance(v, str) and v.strip():
+                            return v.strip()
+                    return ""
+                if isinstance(c, (list, tuple)):
+                    return " ".join(_flatten_choice(x) for x in c).strip()
+                return str(c).strip()
+
             clean_choices = [
-                str(c).strip() for c in (choices or []) if c is not None and str(c).strip()
+                s for s in (_flatten_choice(c) for c in (choices or [])) if s
             ]
             # Discord allows up to 5 buttons per row, 5 rows per view = 25.
             # We reserve one slot for the "Other" button, so cap at 24 choices.
@@ -4657,6 +4805,8 @@ class DiscordAdapter(BasePlatformAdapter):
             )
             msg = await channel.send(embed=embed, view=view)
             view._message = msg  # store for on_timeout expiration editing
+            if _metadata_marks_nonconversational(metadata):
+                self._nonconversational_messages.mark_many([str(msg.id)])
             return SendResult(success=True, message_id=str(msg.id))
         except Exception as e:
             return SendResult(success=False, error=str(e))
@@ -6129,10 +6279,47 @@ def _define_discord_view_classes() -> None:
             self.resolved = False
 
             for index, choice in enumerate(self.choices):
-                # Discord button labels are capped at 80 chars.
-                label_body = choice if len(choice) <= 75 else choice[:72] + "..."
+                # Discord button labels are capped at 80 chars. On mobile the
+                # visible width is much narrower (often <40 chars before it
+                # wraps to 2 lines and the second line gets cut off), so we
+                # cap aggressively and cut at a word boundary when possible
+                # to keep the trailing text readable.
+                #
+                # Cut strategy (most-preferred to least-preferred):
+                #   1. Last space in the trailing half of the budget
+                #      (cleanest word boundary)
+                #   2. Last soft boundary in the trailing half of the
+                #      budget (hyphen, comma, period, paren)
+                #   3. Hard cut at the budget limit (last resort)
+                prefix = f"{index + 1}. "
+                budget = 80 - len(prefix)
+                if len(choice) <= budget:
+                    label_body = choice
+                else:
+                    truncated = choice[: budget - 1].rstrip()
+                    cut_at = -1
+                    # 1. Last space in the trailing half of the budget.
+                    space = truncated.rfind(" ")
+                    if space >= budget // 2:
+                        cut_at = space
+                    # 2. Soft boundary — only if no word boundary found.
+                    # Find the latest soft boundary in the trailing half
+                    # of the budget; that maximizes preserved text length.
+                    # Cut AT the soft boundary (inclusive) so the label
+                    # ends on the soft char (e.g. "-" or ",") rather than
+                    # on the alpha char that followed it.
+                    if cut_at < 0:
+                        latest_soft = max(
+                            (truncated.rfind(s) for s in ("-", ",", ".", ")")),
+                            default=-1,
+                        )
+                        if latest_soft >= budget // 2:
+                            cut_at = latest_soft + 1
+                    if cut_at > 0:
+                        truncated = truncated[:cut_at]
+                    label_body = truncated.rstrip() + "…"
                 button = discord.ui.Button(
-                    label=f"{index + 1}. {label_body}",
+                    label=f"{prefix}{label_body}",
                     style=discord.ButtonStyle.primary,
                     custom_id=f"clarify:{clarify_id}:{index}",
                 )
diff --git a/pyproject.toml b/pyproject.toml
index 6e371126dd2..cab849dc755 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -258,7 +258,7 @@ youtube = [
 # `hermes dashboard` (localhost SPA + API).  Not in core to keep the default install lean.
 # starlette==1.0.1 pinned for CVE-2026-48710 (BadHost) — fastapi pulls Starlette
 # transitively and pre-1.0.1 is the vulnerable range. See the mcp extra above.
-web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1", "python-multipart==0.0.20"]
+web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1", "python-multipart==0.0.27"]
 all = [
   # Policy (2026-05-12): `[all]` includes only extras that genuinely
   # CAN'T be lazy-installed via `tools/lazy_deps.py` — i.e. things every
diff --git a/run_agent.py b/run_agent.py
index 65b95483e54..7c195b35ca8 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -4076,11 +4076,13 @@ class AIAgent:
         # Defensive: strip Responses-only kwargs that can leak in under an
         # api_mode-flip race (the Anthropic SDK raises a non-retryable
         # TypeError on them). See #31673.
-        from agent.anthropic_adapter import sanitize_anthropic_kwargs
-        sanitize_anthropic_kwargs(
-            api_kwargs, log_prefix=getattr(self, "log_prefix", "")
+        from agent.anthropic_adapter import create_anthropic_message
+        return create_anthropic_message(
+            self._anthropic_client,
+            api_kwargs,
+            log_prefix=getattr(self, "log_prefix", ""),
+            prefer_stream=not bool(getattr(self, "_disable_streaming", False)),
         )
-        return self._anthropic_client.messages.create(**api_kwargs)
 
     def _rebuild_anthropic_client(self) -> None:
         """Rebuild the Anthropic client after an interrupt or stale call.
diff --git a/scripts/release.py b/scripts/release.py
index 6c5d33ec3a1..772b11541cd 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
 
 # Auto-extracted from noreply emails + manual overrides
 AUTHOR_MAP = {
+    "charles@salesondemand.io": "salesondemandio",
     "victor@rocketfueldev.com": "victor-kyriazakos",
     "87440198+JoaoMarcos44@users.noreply.github.com": "JoaoMarcos44",
     "286497132+srojk34@users.noreply.github.com": "srojk34",
@@ -56,6 +57,8 @@ AUTHOR_MAP = {
     "despitemeguru@gmail.com": "definitelynotguru",
     "chaslui@outlook.com": "ChasLui",
     "rio.jeong@thebytesize.ai": "rio-jeong",
+    "cdddo@users.noreply.github.com": "Cdddo",
+    "carlos.dddo@gmail.com": "Cdddo",
     "yehaotian@xuanshudeMac-mini.local": "ArcanePivot",
     "dbeyer7@gmail.com": "benegessarit",
     "264773240+MrDiamondBallz@users.noreply.github.com": "MrDiamondBallz",
@@ -103,6 +106,7 @@ AUTHOR_MAP = {
     "290859878+synapsesx@users.noreply.github.com": "synapsesx",
     "157689911+itsflownium@users.noreply.github.com": "itsflownium",
     "dirtyren@users.noreply.github.com": "dirtyren",
+    "johnjacobkenny@users.noreply.github.com": "johnjacobkenny",
     "chanyoung.kim@nota.ai": "channkim",
     "stevenn.damatoo@gmail.com": "x1erra",
     "evansrory@gmail.com": "zimigit2020",
@@ -415,6 +419,7 @@ AUTHOR_MAP = {
     "androidhtml@yandex.com": "hllqkb",
     "25840394+Bongulielmi@users.noreply.github.com": "Bongulielmi",
     "jonathan.troyer@overmatch.com": "JTroyerOvermatch",
+    "53142663+tt-a1i@users.noreply.github.com": "tt-a1i",  # PR #48933 (SSE-only Anthropic stream aggregation, #48923)
     "harryykyle1@gmail.com": "hharry11",
     "wysie@users.noreply.github.com": "wysie",
     "ronhi@buildabear1.localdomain": "RonHillDev",  # PR #29523 salvage (machine-local commit email)
@@ -1528,6 +1533,7 @@ AUTHOR_MAP = {
     "erik.engervall@gmail.com": "erikengervall",  # PR #28774 (firecrawl integration tag)
     "egilewski@egilewski.com": "egilewski",  # PR #30432 (MEDIA path traversal fix, GHSA-jmf9-9729-7pp8)
     "edison@mcclean.codes": "McClean-Edison",  # PR #29817 (register_auxiliary_task plugin API)
+    "OYLFLMH@users.noreply.github.com": "OYLFLMH",  # PR #48312 salvage (cli_refresh_interval config, #48309)
     "zhangsamuel12@gmail.com": "SamuelZ12",  # PR #7480 (show recap after in-session resume)
     "490408354@qq.com": "daizhonggeng",  # PR #9020 (numbered /resume selection)
     "claw@openclaw.ai": "wanwan2qq",  # PR #10215 (strip brackets/quotes from /resume; gateway session-ID lookup)
@@ -1577,6 +1583,8 @@ AUTHOR_MAP = {
     "sunsky.lau@gmail.com": "liuhao1024",  # PR #45494 salvage (claim session slot before auto-resume task; #45456)
     "andrewdmwalker@gmail.com": "capt-marbles",  # PR #38440 salvage (resolve xAI OAuth credentials across profiles; #43589)
     "infinitycrew39@gmail.com": "infinitycrew39",  # PR #47945 salvage (scope langfuse trace state by turn/request ids; #48292)
+    "eurekaxun@163.com": "huangxun375-stack",  # PR #37251 / #48894 structured OpenViking sync
+    "218421507+Sahil-SS9@users.noreply.github.com": "Sahil-SS9",  # PR #48466/#44919/#44909/#42209 salvage (cron/checkpoint/kanban/skill)
 }
 
 
diff --git a/skills/software-development/simplify-code/SKILL.md b/skills/software-development/simplify-code/SKILL.md
index 63c3e11cefa..b6205091642 100644
--- a/skills/software-development/simplify-code/SKILL.md
+++ b/skills/software-development/simplify-code/SKILL.md
@@ -87,8 +87,20 @@ toolsets (so they can `git`, `read_file`, and `search_files`/grep).
 
 Tell each reviewer to:
 - Search the existing codebase for evidence (don't reason from the diff alone).
-- Report findings as a concrete list: `file:line → problem → suggested fix`.
-- Rank each finding `high` / `medium` / `low` confidence.
+- **Apply Chesterton's Fence:** before flagging anything for removal, run
+  `git blame` on the line to understand why it exists. If you can't determine
+  the original purpose, mark it `confidence: low` — don't guess.
+- Report findings as structured output with confidence and risk:
+  ```
+  file:line → problem → suggested fix | confidence: high/medium/low | risk: SAFE/CAREFUL/RISKY
+  ```
+  - **SAFE** = proven not to affect behavior (unused imports, commented-out
+    code, pass-through wrappers). Auto-apply these.
+  - **CAREFUL** = improves without changing semantics (rename local variable,
+    flatten nested ternary, extract helper). Apply with test verification.
+  - **RISKY** = may change behavior or breaks public contracts (N+1
+    restructuring, public API rename, memory lifecycle change). Flag for
+    human review — do NOT auto-apply.
 - Skip nits and style-only churn. Only flag things that materially improve
   the code.
 
@@ -112,7 +124,11 @@ Pass these three goals (drop any the user's focus excludes):
 > blocks that should share an abstraction); leaky abstractions (exposing
 > internals, breaking an existing encapsulation boundary); stringly-typed
 > code (raw strings where a constant/enum/registry already exists — check the
-> canonical registries before flagging). For each, give the concrete refactor.
+> canonical registries before flagging); AI-generated slop patterns (extra
+> comments restating obvious code like `// increment counter` above `count++`;
+> unnecessary defensive null-checks on already-validated inputs; `as any`
+> casts that bypass the type system; patterns inconsistent with the rest of
+> the file). For each, give the concrete refactor.
 
 **Reviewer 3 — Efficiency**
 > Review this diff for efficiency problems. Look for: unnecessary work
@@ -122,8 +138,10 @@ Pass these three goals (drop any the user's focus excludes):
 > TOCTOU anti-patterns (existence pre-checks before an op instead of doing
 > the op and handling the error); memory issues (unbounded growth, missing
 > cleanup, listener/handle leaks); overly broad reads (loading whole files
-> when a slice would do). For each, give the concrete fix and why it's faster
-> or lighter.
+> when a slice would do); silent failures (empty catch blocks, ignored error
+> returns, `except: pass`, `.catch(() => {})` with no handling, error
+> propagation gaps — these hide bugs and should at minimum log before
+> swallowing). For each, give the concrete fix and why it's faster or safer.
 
 ### Phase 3 — Aggregate and apply
 
@@ -138,13 +156,22 @@ Wait for all three to return (batch mode returns them together).
    Don't apply a perf "fix" that hurts clarity unless the path is genuinely
    hot. When two suggestions are mutually exclusive and both defensible, pick
    the one that touches less code and note the alternative.
-4. **Apply** the surviving fixes directly with `patch` / `write_file` — unless
-   the user asked for a dry run, in which case present the list and ask first.
+4. **Apply in risk-tier order:**
+   - **SAFE first** (auto-apply): unused imports, commented-out code,
+     pass-through wrappers, redundant type assertions. Run tests after.
+   - **CAREFUL next** (apply with verification, one file at a time): rename
+     locals, flatten ternaries, extract helpers, consolidate dupes. Run tests
+     after each file. Revert any that break.
+   - **RISKY last** (flag for review — do NOT auto-apply): N+1 restructuring,
+     public API changes, concurrency fixes, error-handling changes. Present
+     each with risk description and test coverage status.
+   If the user opted for a dry run, present all three tiers and apply nothing.
 5. **Verify** you didn't break anything: run the project's targeted tests for
    the touched files (not the full suite), and re-run any linter/type check the
    repo uses. If a fix breaks a test, revert that one fix and report it.
 6. **Summarize** what you changed: a short list of applied fixes grouped by
-   reviewer category, plus any findings you deliberately skipped and why.
+   reviewer category and risk tier, plus any findings you deliberately skipped
+   and why.
 
 ## Pitfalls
 
@@ -166,6 +193,16 @@ Wait for all three to return (batch mode returns them together).
 - **Large diffs blow context.** If the diff is huge, scope it down before
   delegating — three subagents each carrying a 5000-line diff is expensive and
   may truncate.
+- **Over-trusting dead code tools.** `knip`, `ts-prune`, and `depcheck` flag
+  exports that ARE used dynamically (string-based imports, reflection). Always
+  grep for the symbol name before removing — a clean tool report is not proof.
+- **Renaming without checking public contracts.** Export names, API route
+  paths, DB column names, and config keys are contracts — even if the name is
+  bad, renaming breaks consumers. Tag public-contract changes as RISKY; never
+  auto-rename them.
+- **Removing "unnecessary" error handling.** An empty catch block or ignored
+  error might be intentional — the error is expected and benign in that
+  context. Flag it, don't remove it; let the human decide.
 
 ## Related
 
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index b2960b703c7..8ec6102f2e5 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -38,6 +38,20 @@ def _jwt_with_claims(claims: dict) -> str:
     return f"{header}.{payload}.sig"
 
 
+class _FakeAnthropicStream:
+    def __init__(self, final_message):
+        self._final_message = final_message
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def get_final_message(self):
+        return self._final_message
+
+
 @pytest.fixture(autouse=True)
 def _clean_env(monkeypatch):
     """Strip provider env vars so each test starts clean."""
@@ -990,6 +1004,37 @@ class TestVisionClientFallback:
         assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
         assert model == "claude-haiku-4-5-20251001"
 
+    def test_anthropic_auxiliary_client_aggregates_stream_response(self):
+        from agent.auxiliary_client import AnthropicAuxiliaryClient
+
+        final_message = SimpleNamespace(
+            content=[SimpleNamespace(type="text", text="streamed aux response")],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=3, output_tokens=4),
+        )
+        messages_api = SimpleNamespace(
+            stream=MagicMock(return_value=_FakeAnthropicStream(final_message)),
+            create=MagicMock(return_value="raw event-stream text"),
+        )
+        real_client = SimpleNamespace(messages=messages_api)
+        client = AnthropicAuxiliaryClient(
+            real_client,
+            "claude-sonnet-4-20250514",
+            "sk-test",
+            "https://sse-only.example/v1",
+        )
+
+        response = client.chat.completions.create(
+            messages=[{"role": "user", "content": "summarize"}],
+            max_tokens=16,
+        )
+
+        messages_api.stream.assert_called_once()
+        messages_api.create.assert_not_called()
+        assert response.choices[0].message.content == "streamed aux response"
+        assert response.usage.prompt_tokens == 3
+        assert response.usage.completion_tokens == 4
+
 
 class TestAuxiliaryPoolAwareness:
     def test_try_nous_uses_pool_entry(self):
diff --git a/tests/agent/test_message_content.py b/tests/agent/test_message_content.py
new file mode 100644
index 00000000000..0207d63600b
--- /dev/null
+++ b/tests/agent/test_message_content.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+from agent.message_content import flatten_message_text
+
+
+def test_flatten_message_text_accepts_chat_and_responses_text_parts():
+    content = [
+        {"type": "text", "text": "chat text"},
+        {"type": "input_text", "text": "user text"},
+        {"type": "output_text", "text": "assistant text"},
+        {"type": "summary_text", "text": "summary text"},
+    ]
+
+    assert flatten_message_text(content) == "chat text\nuser text\nassistant text\nsummary text"
+
+
+def test_flatten_message_text_accepts_object_parts():
+    content = [
+        SimpleNamespace(type="output_text", text="object text"),
+        {"content": "legacy content"},
+    ]
+
+    assert flatten_message_text(content) == "object text\nlegacy content"
diff --git a/tests/agent/test_secret_scope.py b/tests/agent/test_secret_scope.py
new file mode 100644
index 00000000000..1b8a1cace40
--- /dev/null
+++ b/tests/agent/test_secret_scope.py
@@ -0,0 +1,130 @@
+"""Tests for the profile-scoped credential primitive (Workstream A / Phase 2)."""
+import pytest
+
+from agent import secret_scope as ss
+
+
+@pytest.fixture(autouse=True)
+def _reset_multiplex():
+    """Ensure each test starts and ends with multiplexing off (it's a global)."""
+    ss.set_multiplex_active(False)
+    yield
+    ss.set_multiplex_active(False)
+
+
+class TestMultiplexInactiveBackwardCompat:
+    """Default deployment: get_secret transparently reads os.environ."""
+
+    def test_reads_environ(self, monkeypatch):
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test")
+        assert ss.get_secret("ANTHROPIC_API_KEY") == "sk-test"
+
+    def test_missing_returns_default(self, monkeypatch):
+        monkeypatch.delenv("NOPE_KEY", raising=False)
+        assert ss.get_secret("NOPE_KEY") is None
+        assert ss.get_secret("NOPE_KEY", "fallback") == "fallback"
+
+    def test_no_raise_without_scope(self, monkeypatch):
+        monkeypatch.delenv("SOME_KEY", raising=False)
+        # multiplex off => unscoped read is fine, returns default
+        assert ss.get_secret("SOME_KEY") is None
+
+
+class TestMultiplexActiveFailClosed:
+    """Multiplex on: an unscoped secret read raises instead of leaking."""
+
+    def test_unscoped_read_raises(self, monkeypatch):
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-leaky")
+        ss.set_multiplex_active(True)
+        with pytest.raises(ss.UnscopedSecretError):
+            ss.get_secret("ANTHROPIC_API_KEY")
+
+    def test_scoped_read_uses_scope_not_environ(self, monkeypatch):
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-from-environ")
+        ss.set_multiplex_active(True)
+        token = ss.set_secret_scope({"ANTHROPIC_API_KEY": "sk-from-scope"})
+        try:
+            assert ss.get_secret("ANTHROPIC_API_KEY") == "sk-from-scope"
+        finally:
+            ss.reset_secret_scope(token)
+
+    def test_scoped_missing_key_returns_default_not_environ(self, monkeypatch):
+        # Even though the value exists in os.environ, a scope is authoritative:
+        # an absent scope key must NOT fall through to the (cross-profile) env.
+        monkeypatch.setenv("OPENAI_API_KEY", "sk-other-profile")
+        ss.set_multiplex_active(True)
+        token = ss.set_secret_scope({"ANTHROPIC_API_KEY": "sk-mine"})
+        try:
+            assert ss.get_secret("OPENAI_API_KEY") is None
+            assert ss.get_secret("OPENAI_API_KEY", "d") == "d"
+        finally:
+            ss.reset_secret_scope(token)
+
+    def test_global_env_still_reads_environ_under_multiplex(self, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", "/opt/data")
+        ss.set_multiplex_active(True)
+        # No scope, multiplex on — but HERMES_HOME is global, so no raise.
+        assert ss.get_secret("HERMES_HOME") == "/opt/data"
+
+    def test_kanban_prefix_is_global(self, monkeypatch):
+        monkeypatch.setenv("HERMES_KANBAN_DB", "/x/kanban.db")
+        ss.set_multiplex_active(True)
+        assert ss.get_secret("HERMES_KANBAN_DB") == "/x/kanban.db"
+
+
+class TestScopeIsolation:
+    """Two scopes never see each other's secrets."""
+
+    def test_nested_scopes_restore(self):
+        ss.set_multiplex_active(True)
+        t1 = ss.set_secret_scope({"K": "a"})
+        try:
+            assert ss.get_secret("K") == "a"
+            t2 = ss.set_secret_scope({"K": "b"})
+            try:
+                assert ss.get_secret("K") == "b"
+            finally:
+                ss.reset_secret_scope(t2)
+            assert ss.get_secret("K") == "a"
+        finally:
+            ss.reset_secret_scope(t1)
+
+
+class TestEnvFileParsing:
+    """load_env_file parses without mutating os.environ."""
+
+    def test_parses_basic(self, tmp_path):
+        env = tmp_path / ".env"
+        env.write_text(
+            "# comment\n"
+            "ANTHROPIC_API_KEY=sk-abc\n"
+            "export OPENAI_API_KEY=sk-def\n"
+            'QUOTED="quoted-value"\n'
+            "SINGLE='single'\n"
+            "\n"
+            "BAD_LINE_NO_EQUALS\n"
+        )
+        out = ss.load_env_file(env)
+        assert out == {
+            "ANTHROPIC_API_KEY": "sk-abc",
+            "OPENAI_API_KEY": "sk-def",
+            "QUOTED": "quoted-value",
+            "SINGLE": "single",
+        }
+
+    def test_does_not_mutate_environ(self, tmp_path, monkeypatch):
+        monkeypatch.delenv("ZZZ_KEY", raising=False)
+        env = tmp_path / ".env"
+        env.write_text("ZZZ_KEY=secret\n")
+        ss.load_env_file(env)
+        import os
+        assert "ZZZ_KEY" not in os.environ
+
+    def test_missing_file_returns_empty(self, tmp_path):
+        assert ss.load_env_file(tmp_path / "nope.env") == {}
+
+    def test_build_profile_secret_scope(self, tmp_path):
+        (tmp_path / ".env").write_text("ANTHROPIC_API_KEY=sk-profile\n")
+        assert ss.build_profile_secret_scope(tmp_path) == {
+            "ANTHROPIC_API_KEY": "sk-profile"
+        }
diff --git a/tests/conftest.py b/tests/conftest.py
index 2da7d4a1eb4..468926b0f51 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -534,6 +534,14 @@ def pytest_configure(config):  # noqa: D401 — pytest hook
         "behaviour — e.g. PTY tests that signal their own child).",
     )
 
+    # The pyproject addopts pin ``--timeout-method=signal`` relies on
+    # ``signal.SIGALRM``, which does not exist on Windows — pytest-timeout
+    # raises AttributeError at timer setup and the whole run aborts before any
+    # test executes. Fall back to the thread-based timer on Windows so the
+    # suite runs natively there (POSIX keeps the more reliable signal method).
+    if sys.platform == "win32" and getattr(config.option, "timeout_method", None) == "signal":
+        config.option.timeout_method = "thread"
+
 
 @pytest.fixture(autouse=True)
 def _live_system_guard(request, monkeypatch):
diff --git a/tests/gateway/relay/test_relay_adapter.py b/tests/gateway/relay/test_relay_adapter.py
index 64d6aab2f86..f176eb5728c 100644
--- a/tests/gateway/relay/test_relay_adapter.py
+++ b/tests/gateway/relay/test_relay_adapter.py
@@ -75,3 +75,68 @@ async def test_send_without_transport_returns_failure():
     result = await a.send("chat1", "hello")
     assert result.success is False
     assert result.error == "no transport"
+
+
+class _CaptureTransport:
+    """Minimal RelayTransport stand-in that records the outbound action."""
+
+    def __init__(self):
+        self.sent = None
+
+    def set_inbound_handler(self, h):  # noqa: D401
+        self._h = h
+
+    async def send_outbound(self, action):
+        self.sent = action
+        return {"success": True, "message_id": "m1"}
+
+
+def _make_event(chat_id="chan-1", guild_id="guild-9"):
+    from gateway.platforms.base import MessageEvent, MessageType
+    from gateway.session import SessionSource
+
+    src = SessionSource(
+        platform=Platform.RELAY,
+        chat_id=chat_id,
+        chat_type="channel",
+        guild_id=guild_id,
+    )
+    return MessageEvent(text="hi", source=src, message_type=MessageType.TEXT)
+
+
+@pytest.mark.asyncio
+async def test_send_reattaches_guild_id_from_inbound_scope():
+    """The connector's egress guard resolves the owning tenant from
+    metadata.guild_id; the gateway's generic delivery path drops it, so the
+    relay adapter must re-attach the guild scope learned from the inbound event.
+    Regression for live 'discord egress declined: target not routed to an
+    onboarded tenant'."""
+    t = _CaptureTransport()
+    a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
+    # Simulate the connector delivering an inbound message in guild-9 / chan-1,
+    # but don't run the full handle_message pipeline — just the scope capture.
+    a._capture_scope(_make_event(chat_id="chan-1", guild_id="guild-9"))
+
+    await a.send("chan-1", "the reply")
+
+    assert t.sent["metadata"].get("guild_id") == "guild-9"
+
+
+@pytest.mark.asyncio
+async def test_send_without_known_scope_omits_guild_id():
+    """A chat we never saw inbound (e.g. a DM) gets no guild_id — no-op, never
+    invents a scope."""
+    t = _CaptureTransport()
+    a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
+    await a.send("unknown-chat", "hi")
+    assert "guild_id" not in t.sent["metadata"]
+
+
+@pytest.mark.asyncio
+async def test_send_preserves_explicit_guild_id():
+    """An explicitly-provided metadata.guild_id is never overwritten."""
+    t = _CaptureTransport()
+    a = RelayAdapter(PlatformConfig(), make_desc(platform="discord"), transport=t)
+    a._capture_scope(_make_event(chat_id="chan-1", guild_id="guild-9"))
+    await a.send("chan-1", "hi", metadata={"guild_id": "explicit-1"})
+    assert t.sent["metadata"]["guild_id"] == "explicit-1"
diff --git a/tests/gateway/relay/test_ws_transport.py b/tests/gateway/relay/test_ws_transport.py
index dcb3f6c714f..00aa9b43327 100644
--- a/tests/gateway/relay/test_ws_transport.py
+++ b/tests/gateway/relay/test_ws_transport.py
@@ -177,3 +177,25 @@ async def test_disconnect_fails_pending_waiters_cleanly(server):
     # After disconnect, an outbound returns a structured failure rather than hanging.
     result = await t.send_outbound({"op": "send", "chat_id": "c", "content": "x"})
     assert result["success"] is False
+
+
+def test_https_url_normalized_to_wss():
+    """The relay URL is configured once as the http(s):// BASE (for the provision
+    POST), but websockets.connect needs ws(s):// and the connector mounts its WS
+    server at /relay. The transport must convert scheme AND ensure the /relay
+    path. Regression for the live staging failures 'scheme isn't ws or wss' then
+    'server rejected WebSocket connection: HTTP 400' (wrong path)."""
+    t = WebSocketRelayTransport("https://connector.example", "discord", "b")
+    assert t._url == "wss://connector.example/relay"
+    t2 = WebSocketRelayTransport("http://connector.local:8080", "discord", "b")
+    assert t2._url == "ws://connector.local:8080/relay"
+
+
+def test_ws_dial_url_idempotent_with_scheme_and_path():
+    # Already ws(s):// and/or already ending in /relay -> unchanged (no double append).
+    t = WebSocketRelayTransport("wss://connector.example/relay", "discord", "b")
+    assert t._url == "wss://connector.example/relay"
+    t2 = WebSocketRelayTransport("https://connector.example/relay/", "discord", "b")
+    assert t2._url == "wss://connector.example/relay"
+    t3 = WebSocketRelayTransport("ws://127.0.0.1:9", "discord", "b")
+    assert t3._url == "ws://127.0.0.1:9/relay"
diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py
index 95d49d8b4f1..ac5e29c4d3c 100644
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@@ -337,6 +337,40 @@ class TestAdapterInit:
         assert isinstance(agent, FakeAgent)
         assert captured["reasoning_config"] == {"enabled": True, "effort": "xhigh"}
 
+    def test_create_agent_refreshes_max_iterations_from_runtime_config(self, monkeypatch):
+        captured = {}
+
+        class FakeAgent:
+            def __init__(self, **kwargs):
+                captured.update(kwargs)
+
+        monkeypatch.setattr("run_agent.AIAgent", FakeAgent)
+        monkeypatch.setattr(
+            "gateway.run._resolve_runtime_agent_kwargs",
+            lambda: {
+                "provider": "openai",
+                "base_url": "https://example.test/v1",
+                "api_mode": "chat_completions",
+            },
+        )
+        monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "gpt-5")
+        monkeypatch.setattr("gateway.run._load_gateway_config", lambda: {"agent": {"max_turns": 200}})
+        monkeypatch.setattr(
+            "gateway.run.GatewayRunner._load_reasoning_config",
+            staticmethod(lambda: {}),
+        )
+        monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None))
+        monkeypatch.setattr("gateway.run._current_max_iterations", lambda: 200)
+        monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set())
+
+        adapter = APIServerAdapter(PlatformConfig(enabled=True))
+        monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None)
+
+        agent = adapter._create_agent(session_id="api-session")
+
+        assert isinstance(agent, FakeAgent)
+        assert captured["max_iterations"] == 200
+
 
 # ---------------------------------------------------------------------------
 # Auth checking
diff --git a/tests/gateway/test_cached_agent_max_iterations.py b/tests/gateway/test_cached_agent_max_iterations.py
new file mode 100644
index 00000000000..fcd523c70ef
--- /dev/null
+++ b/tests/gateway/test_cached_agent_max_iterations.py
@@ -0,0 +1,92 @@
+"""Regression tests for PR #48127: cached agent max_iterations refresh.
+
+When a long-lived gateway reuses an agent from its cache, the agent must run
+the *current* configured iteration budget — not the budget it was constructed
+with on the first turn of that session. Two pieces make that true:
+
+1. ``GatewayRunner._init_cached_agent_for_turn`` must NOT reset
+   ``max_iterations`` itself (the gateway refreshes it explicitly right after,
+   from current config). If this helper ever started clobbering it, the
+   gateway's refresh would be silently undone.
+2. The per-turn budget object is rebuilt from ``agent.max_iterations`` at the
+   start of every turn (``agent/turn_context.py`` -> ``IterationBudget``), so
+   refreshing ``max_iterations`` on the cached agent is sufficient to change
+   the operative cap the agent loop checks.
+
+These tests exercise the real code paths rather than asserting a plain
+assignment, so they fail if either contract regresses.
+"""
+
+import time
+from types import SimpleNamespace
+
+from agent.iteration_budget import IterationBudget
+
+
+def _make_cached_agent(max_iterations: int) -> SimpleNamespace:
+    """A minimal stand-in cached agent with the attributes the helpers touch."""
+    # The turn loop checks both api_call_count >= max_iterations AND
+    # iteration_budget.remaining <= 0 (turn_finalizer.py), so the budget must
+    # also reflect the new cap. Seed it with the stale value to prove the
+    # refresh propagates.
+    return SimpleNamespace(
+        _last_activity_ts=time.time() - 1000,
+        _last_activity_desc="previous turn",
+        _api_call_count=42,
+        _last_flushed_db_idx=5,
+        max_iterations=max_iterations,
+        iteration_budget=IterationBudget(max_iterations),
+    )
+
+
+def test_init_cached_agent_for_turn_does_not_touch_max_iterations():
+    """The per-turn reset helper must leave max_iterations untouched.
+
+    The gateway refreshes max_iterations explicitly right after calling this
+    helper; if the helper ever reset it, that refresh would be undone.
+    """
+    from gateway.run import GatewayRunner
+
+    agent = _make_cached_agent(90)
+    GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0)
+
+    # Per-turn state was reset...
+    assert agent._api_call_count == 0
+    assert agent._last_activity_desc == "starting new turn (cached)"
+    assert agent._last_flushed_db_idx == 0
+    # ...but the iteration budget was NOT changed by the helper itself.
+    assert agent.max_iterations == 90
+
+
+def test_init_cached_agent_preserves_max_iterations_on_interrupt_depth():
+    """Interrupt-recursive turns must also leave max_iterations alone."""
+    from gateway.run import GatewayRunner
+
+    agent = _make_cached_agent(200)
+    GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1)
+
+    # Activity timestamps preserved for the inactivity watchdog (#15654)...
+    assert agent._last_activity_desc == "previous turn"
+    # ...and max_iterations untouched.
+    assert agent.max_iterations == 200
+
+
+def test_refreshed_max_iterations_propagates_to_turn_budget():
+    """Refreshing max_iterations on a cached agent changes the operative cap.
+
+    The gateway sets ``agent.max_iterations = max_iterations`` on cache reuse;
+    the new turn's setup then rebuilds ``iteration_budget`` from it. This proves
+    the refresh actually moves the budget the agent loop enforces — the cached
+    agent started at 90 and ends a new turn capped at 200.
+    """
+    agent = _make_cached_agent(90)
+    assert agent.iteration_budget.max_total == 90
+
+    # Gateway refresh on cache reuse:
+    agent.max_iterations = 200
+
+    # Start-of-turn budget rebuild (agent/turn_context.py:166):
+    agent.iteration_budget = IterationBudget(agent.max_iterations)
+
+    assert agent.iteration_budget.max_total == 200
+    assert agent.iteration_budget.remaining == 200
diff --git a/tests/gateway/test_config.py b/tests/gateway/test_config.py
index 9e74dd355ad..9f38f9b8a0d 100644
--- a/tests/gateway/test_config.py
+++ b/tests/gateway/test_config.py
@@ -311,6 +311,55 @@ class TestLoadGatewayConfig:
 
         assert config.quick_commands == {"limits": {"type": "exec", "command": "echo ok"}}
 
+    def test_relay_platform_enabled_from_env_url(self, tmp_path, monkeypatch):
+        """GATEWAY_RELAY_URL must enable Platform.RELAY in config.platforms so
+        start_gateway()'s connect loop actually dials the connector. Registering
+        the adapter in the platform_registry is NOT enough — the connect loop
+        iterates config.platforms, so an un-enabled RELAY never connects (the
+        'relay registered but no inbound' bug)."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setenv("GATEWAY_RELAY_URL", "https://connector.example/relay/")
+
+        config = load_gateway_config()
+
+        assert Platform.RELAY in config.platforms
+        relay = config.platforms[Platform.RELAY]
+        assert relay.enabled is True
+        # Trailing slash stripped; mirrored into extra for the connected-checker.
+        assert relay.extra.get("relay_url") == "https://connector.example/relay"
+        assert Platform.RELAY in config.get_connected_platforms()
+
+    def test_relay_platform_absent_when_url_unset(self, tmp_path, monkeypatch):
+        """No relay URL -> no RELAY platform, so direct/single-tenant gateways
+        are unaffected."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("GATEWAY_RELAY_URL", raising=False)
+
+        config = load_gateway_config()
+
+        assert Platform.RELAY not in config.platforms
+
+    def test_relay_platform_enabled_from_config_yaml(self, tmp_path, monkeypatch):
+        """gateway.relay_url in config.yaml also enables RELAY (env-less path)."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "gateway:\n  platforms:\n    relay:\n      extra:\n        relay_url: https://connector.example/relay\n",
+            encoding="utf-8",
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("GATEWAY_RELAY_URL", raising=False)
+
+        config = load_gateway_config()
+
+        assert Platform.RELAY in config.platforms
+        assert config.platforms[Platform.RELAY].enabled is True
+
     def test_bridges_group_sessions_per_user_from_config_yaml(self, tmp_path, monkeypatch):
         hermes_home = tmp_path / ".hermes"
         hermes_home.mkdir()
diff --git a/tests/gateway/test_discord_clarify_buttons.py b/tests/gateway/test_discord_clarify_buttons.py
index c83e52dba5a..b8b5dc10ed2 100644
--- a/tests/gateway/test_discord_clarify_buttons.py
+++ b/tests/gateway/test_discord_clarify_buttons.py
@@ -122,13 +122,56 @@ class TestClarifyChoiceViewConstruction:
             clarify_id="cidZ",
             allowed_user_ids=set(),
         )
-        # 75 chars + 3 ellipsis chars in the body, plus "1. " prefix
+        # 78 chars + single-char ellipsis in the body, plus "1. " prefix.
+        # Uses U+2026 (…) instead of "..." to fit the 80-char Discord cap.
         first_label = view.children[0].label
         assert first_label.startswith("1. ")
-        assert first_label.endswith("...")
+        assert first_label.endswith("\u2026")
         # Final label total <= 80 (Discord cap on button labels)
         assert len(first_label) <= 80
 
+    def test_truncates_long_choice_label_breaks_on_word_boundary(self):
+        # Long choice with spaces — should cut at the last whole word so the
+        # trailing text stays readable on Discord mobile.
+        long_choice = (
+            "Tight, well-illustrated, covers all 3 audiences "
+            "(patients, families, curious general readers)"
+        )
+        view = ClarifyChoiceView(
+            choices=[long_choice],
+            clarify_id="cidW",
+            allowed_user_ids=set(),
+        )
+        first_label = view.children[0].label
+        assert first_label.startswith("1. ")
+        assert first_label.endswith("\u2026")
+        # No mid-word fragment before the ellipsis.
+        assert not first_label.rstrip("\u2026").endswith("(")
+
+    def test_truncates_long_no_space_choice_on_soft_boundary(self):
+        # A long choice with soft boundaries (commas, hyphens) but no spaces
+        # should still cut on a soft boundary, not mid-word. We use an input
+        # where position 76 is NOT a soft boundary — the test only passes
+        # if the renderer actively searches backward for a soft char
+        # rather than blindly cutting at the budget limit.
+        long_choice = "a" * 30 + "-" + "b" * 30 + "-" + "c" * 30 + "-" + "d" * 30
+        # 30a-30b-30c-30d = 30 + 1 + 30 + 1 + 30 + 1 + 30 = 123 chars
+        # Position 76 is 'b' (a mid-word alpha). The renderer must look back
+        # for a '-' to cut on.
+        view = ClarifyChoiceView(
+            choices=[long_choice],
+            clarify_id="cidSB",
+            allowed_user_ids=set(),
+        )
+        first_label = view.children[0].label
+        assert first_label.endswith("\u2026")
+        assert len(first_label) <= 80
+        body = first_label[len("1. "):].rstrip("\u2026")
+        last_char = body[-1]
+        assert last_char in {"-", ",", ".", ")", " "}, (
+            f"Label cuts mid-word at {last_char!r}: {first_label!r}"
+        )
+
 
 # ===========================================================================
 # Choice callback → resolve_gateway_clarify
@@ -404,3 +447,134 @@ class TestDiscordSendClarify:
         # Only 1 real choice + 1 Other = 2 children
         assert len(view.children) == 2
         assert "real-choice" in view.children[0].label
+
+    @pytest.mark.asyncio
+    async def test_unwraps_dict_choices_to_description(self):
+        # LLMs sometimes emit [{"description": "..."}] instead of bare strings
+        # — the renderer must unwrap common dict shapes, not str() the whole
+        # dict into a Python repr on the button label.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 555
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        malformed = [
+            {"description": "Tight, well-illustrated"},
+            {"label": "Use label key"},
+            {"text": "Use text key"},
+            "normal-string",  # strings still pass through
+        ]
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=malformed,
+            clarify_id="cidU",
+            session_key="sk-U",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        labels = [b.label for b in view.children[:-1]]  # exclude Other
+        # No raw Python repr should leak onto any label.
+        for label in labels:
+            assert "{'" not in label
+            assert "':" not in label
+        # Each dict unwrapped to its inner string.
+        assert any("Tight, well-illustrated" in lbl for lbl in labels)
+        assert any("Use label key" in lbl for lbl in labels)
+        assert any("Use text key" in lbl for lbl in labels)
+        assert any("normal-string" in lbl for lbl in labels)
+
+    @pytest.mark.asyncio
+    async def test_unwrap_prefers_description_over_name_in_multi_key_dict(self):
+        # When the LLM emits both 'name' (often a short identifier in
+        # OpenAI-style tool calls) and 'description' (the user-facing text),
+        # the renderer must surface 'description'. The user should never see
+        # a 4-char model identifier on a button label.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 666
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=[{"name": "tight", "description": "Tight, well-illustrated"}],
+            clarify_id="cidN",
+            session_key="sk-N",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        choice_label = view.children[0].label
+        assert "Tight, well-illustrated" in choice_label
+        # The 'name' value (a short identifier) must NOT have leaked.
+        body = choice_label.split("1. ", 1)[1].rstrip("\u2026")
+        assert "tight" not in body, f"'name' leaked onto button: {choice_label!r}"
+
+    @pytest.mark.asyncio
+    async def test_unwrap_prefers_label_over_description(self):
+        # When both 'label' and 'description' are present, 'label' wins.
+        # 'label' is the canonical short user-facing text in most LLM tool
+        # conventions; 'description' is the longer explanation.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 777
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=[{"label": "Short", "description": "Long verbose explanation"}],
+            clarify_id="cidL",
+            session_key="sk-L",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        choice_label = view.children[0].label
+        assert "Short" in choice_label
+        # The longer description must NOT have leaked.
+        assert "Long verbose" not in choice_label, (
+            f"'description' leaked over 'label': {choice_label!r}"
+        )
+
+    @pytest.mark.asyncio
+    async def test_unwrap_does_not_pick_value_or_name_alone(self):
+        # 'name' and 'value' are Discord-component-shaped fields that could
+        # accidentally appear in dicts not intended as choices (e.g., a
+        # developer-error in the gateway wiring). The renderer should not
+        # surface them as button labels — only the well-known LLM tool-call
+        # keys (label, description, text, title) should win.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 888
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=[
+                {"name": "only_name_here"},   # should be filtered out
+                {"value": "only_value_here"},  # should be filtered out
+                {"description": "real choice"},
+            ],
+            clarify_id="cidNV",
+            session_key="sk-NV",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        choice_labels = [b.label for b in view.children[:-1]]  # exclude Other
+        # Only the well-formed dict survives.
+        assert len(choice_labels) == 1, (
+            f"Expected 1 choice, got {len(choice_labels)}: {choice_labels!r}"
+        )
+        assert "real choice" in choice_labels[0]
+        for label in choice_labels:
+            assert "only_name_here" not in label, f"name leaked: {label!r}"
+            assert "only_value_here" not in label, f"value leaked: {label!r}"
diff --git a/tests/gateway/test_discord_free_response.py b/tests/gateway/test_discord_free_response.py
index e2133d56c35..39556f6603f 100644
--- a/tests/gateway/test_discord_free_response.py
+++ b/tests/gateway/test_discord_free_response.py
@@ -666,6 +666,70 @@ async def test_fetch_channel_context_stops_at_self_message_and_reverses_to_chron
     )
 
 
+@pytest.mark.asyncio
+async def test_fetch_channel_context_skips_self_improvement_boundary_message(adapter, monkeypatch):
+    """Delayed harness status bumps must not hide messages after the real reply."""
+    monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all")
+    adapter.config.extra["history_backfill_limit"] = 10
+
+    codex = SimpleNamespace(id=55, display_name="Codex", name="Codex", bot=True)
+    human = SimpleNamespace(id=56, display_name="Alice", name="Alice", bot=False)
+
+    channel = FakeHistoryChannel(
+        [
+            make_history_message(
+                author=adapter._client.user,
+                content="arbitrary lifecycle text from a metadata-marked send",
+                msg_id=9,
+            ),
+            make_history_message(
+                author=adapter._client.user,
+                content="[Background process bg-123 finished with exit code 0~ Here's the final output:\nok]",
+                msg_id=8,
+            ),
+            make_history_message(
+                author=codex,
+                content="♻ Gateway restarted successfully. Your session continues.",
+                msg_id=7,
+            ),
+            make_history_message(
+                author=codex,
+                content="💾 Self-improvement review: Memory updated",
+                msg_id=6,
+            ),
+            make_history_message(author=human, content="question after reply", msg_id=5),
+            make_history_message(
+                author=adapter._client.user,
+                content="💾 Self-improvement review: Skill 'hermes-gateway-display-config' patched",
+                msg_id=4,
+            ),
+            make_history_message(author=codex, content="Codex final answer", msg_id=3),
+            make_history_message(author=human, content="prompt before reply", msg_id=2),
+            make_history_message(author=adapter._client.user, content="our prior response", msg_id=1),
+        ],
+        channel_id=123,
+    )
+    adapter._nonconversational_messages.mark_many(["9"])
+
+    result = await adapter._fetch_channel_context(channel, before=make_message(channel=channel, content="trigger"))
+
+    assert result == (
+        "[Recent channel messages]\n"
+        "[Alice] prompt before reply\n"
+        "[Codex [bot]] Codex final answer\n"
+        "[Alice] question after reply"
+    )
+
+
+def test_nonconversational_fallback_requires_self_improvement_emoji():
+    assert discord_platform._looks_like_nonconversational_history_message(
+        "💾 Self-improvement review: Memory updated"
+    )
+    assert not discord_platform._looks_like_nonconversational_history_message(
+        "Self-improvement review: this is a normal assistant heading"
+    )
+
+
 @pytest.mark.asyncio
 async def test_fetch_channel_context_skips_other_bots_when_allow_bots_none(adapter, monkeypatch):
     monkeypatch.setenv("DISCORD_ALLOW_BOTS", "none")
@@ -801,6 +865,33 @@ async def test_fetch_channel_context_ignores_stale_cache(adapter, monkeypatch):
     assert recorded_after["value"] is None
 
 
+@pytest.mark.asyncio
+async def test_discord_send_does_not_cache_nonconversational_status_as_history_boundary(adapter):
+    """Automated status notifications should not move the backfill boundary."""
+
+    class SendingChannel(FakeTextChannel):
+        async def send(self, content, reference=None):
+            return SimpleNamespace(id=222)
+
+    channel = SendingChannel(channel_id=777)
+    adapter._client = SimpleNamespace(
+        user=adapter._client.user,
+        get_channel=lambda channel_id: channel if channel_id == 777 else None,
+        fetch_channel=AsyncMock(return_value=channel),
+    )
+    adapter._last_self_message_id["777"] = "111"
+
+    result = await adapter.send(
+        "777",
+        "arbitrary lifecycle text from gateway",
+        metadata={"non_conversational": True},
+    )
+
+    assert result.success is True
+    assert adapter._last_self_message_id["777"] == "111"
+    assert "222" in adapter._nonconversational_messages
+
+
 @pytest.mark.asyncio
 async def test_discord_shared_channel_backfill_prepends_context(adapter, monkeypatch):
     monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
@@ -925,5 +1016,3 @@ async def test_discord_auto_thread_skips_backfill(adapter, monkeypatch):
 
     adapter._auto_create_thread.assert_awaited_once()
     adapter._fetch_channel_context.assert_not_awaited()
-
-
diff --git a/tests/gateway/test_gateway_command_line_matcher.py b/tests/gateway/test_gateway_command_line_matcher.py
new file mode 100644
index 00000000000..bc8113b91a0
--- /dev/null
+++ b/tests/gateway/test_gateway_command_line_matcher.py
@@ -0,0 +1,60 @@
+"""Tests for the strict gateway command-line matcher.
+
+Regression guard for the Windows ``hermes gateway restart`` silent-outage bug:
+the previous loose substring match (``"... gateway" in cmdline``) false-matched
+``gateway status``/``dashboard`` siblings and unrelated processes such as
+``python -m tui_gateway``, which let ``restart()`` race a still-draining old
+process and ``status``/``start`` report false positives.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from gateway.status import looks_like_gateway_command_line as matches
+
+
+ACCEPT = [
+    "pythonw.exe -m hermes_cli.main gateway run",
+    r"C:\Users\me\hermes\venv\Scripts\pythonw.exe -m hermes_cli.main gateway run",
+    "python -m hermes_cli.main --profile work gateway run",
+    "python -m hermes_cli.main gateway run --replace",
+    "python -m hermes_cli/main.py gateway run",
+    "python gateway/run.py",
+    "hermes-gateway.exe",
+    "hermes gateway",          # bare `hermes gateway` defaults to run
+    "hermes gateway run",
+    # profile selector AFTER the `gateway` token (argv is profile-position
+    # agnostic — _apply_profile_override strips --profile/-p anywhere)
+    "hermes gateway --profile work run",
+    "python -m hermes_cli.main gateway -p work run",
+    "hermes gateway --profile=work run",
+    # a profile literally NAMED "gateway"
+    "hermes -p gateway gateway run",
+    "python -m hermes_cli.main --profile gateway gateway run",
+    # quoted Windows paths with spaces (shlex-aware tokenization)
+    r'"C:\Program Files\Hermes\hermes-gateway.exe"',
+    r'"C:\Program Files\Hermes\gateway\run.py" run',
+    r'"C:\Program Files\Py\pythonw.exe" -m hermes_cli.main gateway run',
+]
+
+REJECT = [
+    "python -m tui_gateway",                              # unrelated module
+    "python -m hermes_cli.main gateway status",           # other subcommand
+    "python -m hermes_cli.main gateway restart",
+    "python -m hermes_cli.main gateway stop",
+    "python -m hermes_cli.main --profile x dashboard",    # non-gateway subcommand
+    "some random python -m mygateway thing",
+    "",
+    None,
+]
+
+
+@pytest.mark.parametrize("cmd", ACCEPT)
+def test_accepts_real_gateway_run(cmd):
+    assert matches(cmd) is True
+
+
+@pytest.mark.parametrize("cmd", REJECT)
+def test_rejects_non_gateway_run(cmd):
+    assert matches(cmd) is False
diff --git a/tests/gateway/test_kanban_watchers_mixin.py b/tests/gateway/test_kanban_watchers_mixin.py
index e4666e15255..061b528e79e 100644
--- a/tests/gateway/test_kanban_watchers_mixin.py
+++ b/tests/gateway/test_kanban_watchers_mixin.py
@@ -43,3 +43,27 @@ def test_watcher_loops_are_coroutines():
     # The two long-running watchers are async loops.
     assert inspect.iscoroutinefunction(GatewayKanbanWatchersMixin._kanban_notifier_watcher)
     assert inspect.iscoroutinefunction(GatewayKanbanWatchersMixin._kanban_dispatcher_watcher)
+
+
+def test_singleton_dispatcher_lock_is_exclusive(tmp_path):
+    """Only one holder of the dispatcher lock at a time — the backstop that
+    stops concurrent dispatchers double reclaiming and corrupting shared
+    kanban SQLite index pages under wal_autocheckpoint=0."""
+    import os
+
+    from gateway.kanban_watchers import _acquire_singleton_lock, _release_singleton_lock
+
+    lock = tmp_path / "kanban" / ".dispatcher.lock"
+
+    h1, st1 = _acquire_singleton_lock(lock)
+    assert st1 == "held" and h1 is not None
+
+    # A second acquire while the first is held must be refused, not granted.
+    h2, st2 = _acquire_singleton_lock(lock)
+    assert st2 == "contended" and h2 is None
+
+    # Releasing the first lets a fresh acquire succeed (lock is reusable).
+    _release_singleton_lock(h1)
+    h3, st3 = _acquire_singleton_lock(lock)
+    assert st3 == "held" and h3 is not None
+    _release_singleton_lock(h3)
diff --git a/tests/gateway/test_model_command_flat_string_config.py b/tests/gateway/test_model_command_flat_string_config.py
index 38d6ea11dae..9934d9806b1 100644
--- a/tests/gateway/test_model_command_flat_string_config.py
+++ b/tests/gateway/test_model_command_flat_string_config.py
@@ -156,3 +156,46 @@ async def test_model_global_persists_when_config_has_proper_dict_model(tmp_path,
     written = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
     assert written["model"]["default"] == "gpt-5.5"
     assert written["model"]["provider"] == "openrouter"
+
+
+@pytest.mark.asyncio
+async def test_model_no_flag_persists_by_default(tmp_path, monkeypatch):
+    """A plain ``/model X`` (no --global) now persists to config.yaml.
+
+    This is the user-facing fix: switching models in one session survives
+    into the next without re-typing the switch every time.
+    """
+    cfg_path = _setup_isolated_home(
+        tmp_path,
+        monkeypatch,
+        {"default": "old-model", "provider": "openai-codex"},
+    )
+
+    result = await _make_runner()._handle_model_command(
+        _make_event("/model gpt-5.5")
+    )
+
+    assert result is not None
+    assert "gpt-5.5" in result
+    written = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
+    assert written["model"]["default"] == "gpt-5.5"
+
+
+@pytest.mark.asyncio
+async def test_model_session_flag_does_not_persist(tmp_path, monkeypatch):
+    """``/model X --session`` opts out of persistence even under the new default."""
+    cfg_path = _setup_isolated_home(
+        tmp_path,
+        monkeypatch,
+        {"default": "old-model", "provider": "openai-codex"},
+    )
+
+    result = await _make_runner()._handle_model_command(
+        _make_event("/model gpt-5.5 --session")
+    )
+
+    assert result is not None
+    assert "gpt-5.5" in result
+    written = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
+    # Config untouched — the session override is in-memory only.
+    assert written["model"]["default"] == "old-model"
diff --git a/tests/gateway/test_multiplex_adapter_registry.py b/tests/gateway/test_multiplex_adapter_registry.py
new file mode 100644
index 00000000000..7ecca64dfee
--- /dev/null
+++ b/tests/gateway/test_multiplex_adapter_registry.py
@@ -0,0 +1,136 @@
+"""Phase 3: secondary-profile adapter registry + same-token conflict detection."""
+import pytest
+
+from gateway.run import GatewayRunner
+
+
+class _FakeAdapter:
+    def __init__(self, token=None):
+        self.token = token
+
+
+class TestCredentialFingerprint:
+    def test_none_without_token(self):
+        assert GatewayRunner._adapter_credential_fingerprint(_FakeAdapter()) is None
+
+    def test_stable_and_log_safe(self):
+        a = _FakeAdapter(token="secret-bot-token")
+        fp1 = GatewayRunner._adapter_credential_fingerprint(a)
+        fp2 = GatewayRunner._adapter_credential_fingerprint(_FakeAdapter(token="secret-bot-token"))
+        assert fp1 == fp2  # stable
+        assert "secret-bot-token" not in (fp1 or "")  # never the raw token
+        assert len(fp1) == 16
+
+    def test_distinct_tokens_distinct_fp(self):
+        a = GatewayRunner._adapter_credential_fingerprint(_FakeAdapter(token="tok-A"))
+        b = GatewayRunner._adapter_credential_fingerprint(_FakeAdapter(token="tok-B"))
+        assert a != b
+
+    def test_reads_alt_attrs(self):
+        class _AltAdapter:
+            def __init__(self):
+                self.bot_token = "alt-token"
+        assert GatewayRunner._adapter_credential_fingerprint(_AltAdapter()) is not None
+
+
+class TestProfileMessageHandler:
+    @pytest.mark.asyncio
+    async def test_stamps_profile_on_unstamped_source(self):
+        runner = GatewayRunner.__new__(GatewayRunner)
+        seen = {}
+
+        async def _fake_handle(event):
+            seen["profile"] = event.source.profile
+            return "ok"
+
+        runner._handle_message = _fake_handle
+        handler = runner._make_profile_message_handler("coder")
+
+        class _Src:
+            profile = None
+
+        class _Evt:
+            source = _Src()
+
+        result = await handler(_Evt())
+        assert result == "ok"
+        assert seen["profile"] == "coder"
+
+    @pytest.mark.asyncio
+    async def test_does_not_override_existing_profile(self):
+        runner = GatewayRunner.__new__(GatewayRunner)
+        seen = {}
+
+        async def _fake_handle(event):
+            seen["profile"] = event.source.profile
+            return "ok"
+
+        runner._handle_message = _fake_handle
+        handler = runner._make_profile_message_handler("coder")
+
+        class _Src:
+            profile = "writer"  # already stamped (e.g. by URL prefix)
+
+        class _Evt:
+            source = _Src()
+
+        await handler(_Evt())
+        assert seen["profile"] == "writer"
+
+
+class TestPortBindingHardError:
+    """A secondary profile enabling a port-binding platform aborts startup."""
+
+    @pytest.mark.asyncio
+    async def test_secondary_webhook_raises(self, monkeypatch):
+        from gateway.run import MultiplexConfigError
+        from gateway.config import GatewayConfig, Platform, PlatformConfig
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner.config = GatewayConfig(multiplex_profiles=True)
+        runner._profile_adapters = {}
+
+        # reviewer profile config enables webhook (a port-binding platform)
+        reviewer_cfg = GatewayConfig(multiplex_profiles=True)
+        reviewer_cfg.platforms = {
+            Platform.WEBHOOK: PlatformConfig(enabled=True, extra={"port": 8644}),
+        }
+        monkeypatch.setattr(
+            "gateway.config.load_gateway_config", lambda: reviewer_cfg
+        )
+
+        with pytest.raises(MultiplexConfigError) as ei:
+            await runner._start_one_profile_adapters("reviewer", "/tmp/x", {})
+        assert "webhook" in str(ei.value)
+        assert "reviewer" in str(ei.value)
+
+    @pytest.mark.asyncio
+    async def test_secondary_non_binding_platform_ok(self, monkeypatch):
+        """A non-port-binding platform (e.g. telegram) is NOT rejected."""
+        from gateway.config import GatewayConfig, Platform, PlatformConfig
+
+        runner = GatewayRunner.__new__(GatewayRunner)
+        runner.config = GatewayConfig(multiplex_profiles=True)
+        runner._profile_adapters = {}
+
+        reviewer_cfg = GatewayConfig(multiplex_profiles=True)
+        reviewer_cfg.platforms = {
+            Platform.TELEGRAM: PlatformConfig(enabled=True, token="t"),
+        }
+        monkeypatch.setattr(
+            "gateway.config.load_gateway_config", lambda: reviewer_cfg
+        )
+        # _create_adapter returns None here (no real telegram token wiring), so
+        # the loop simply connects nothing — the key assertion is NO raise.
+        monkeypatch.setattr(runner, "_create_adapter", lambda p, c: None)
+
+        connected = await runner._start_one_profile_adapters("reviewer", "/tmp/x", {})
+        assert connected == 0  # nothing connected, but no MultiplexConfigError
+
+    def test_port_binding_set_covers_known_listeners(self):
+        from gateway.run import _PORT_BINDING_PLATFORM_VALUES
+        # Every adapter that binds a TCP port must be in the guard set.
+        for p in ("webhook", "api_server", "msgraph_webhook", "feishu",
+                  "wecom_callback", "bluebubbles", "sms"):
+            assert p in _PORT_BINDING_PLATFORM_VALUES
+
diff --git a/tests/gateway/test_multiplex_credential_isolation.py b/tests/gateway/test_multiplex_credential_isolation.py
new file mode 100644
index 00000000000..748580197c7
--- /dev/null
+++ b/tests/gateway/test_multiplex_credential_isolation.py
@@ -0,0 +1,88 @@
+"""End-to-end credential isolation proof for multiplex mode (Workstream A).
+
+These exercise the REAL resolution path (runtime_provider, secret scope, MCP
+interpolation) rather than mocking it, proving the property that matters: two
+profiles with different keys never see each other's, and an unscoped read in
+multiplex mode fails closed instead of leaking.
+"""
+import pytest
+
+from agent import secret_scope as ss
+
+
+@pytest.fixture(autouse=True)
+def _reset(monkeypatch):
+    ss.set_multiplex_active(False)
+    yield
+    ss.set_multiplex_active(False)
+
+
+class TestRuntimeProviderUsesScope:
+    """hermes_cli.runtime_provider._getenv resolves through the secret scope."""
+
+    def test_getenv_reads_scope_under_multiplex(self, monkeypatch):
+        from hermes_cli.runtime_provider import _getenv
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-global-leak")
+        ss.set_multiplex_active(True)
+        tok = ss.set_secret_scope({"ANTHROPIC_API_KEY": "sk-profileA"})
+        try:
+            assert _getenv("ANTHROPIC_API_KEY") == "sk-profileA"
+        finally:
+            ss.reset_secret_scope(tok)
+
+    def test_getenv_two_profiles_isolated(self, monkeypatch):
+        from hermes_cli.runtime_provider import _getenv
+        ss.set_multiplex_active(True)
+
+        tok_a = ss.set_secret_scope({"OPENAI_API_KEY": "sk-A"})
+        try:
+            assert _getenv("OPENAI_API_KEY") == "sk-A"
+        finally:
+            ss.reset_secret_scope(tok_a)
+
+        tok_b = ss.set_secret_scope({"OPENAI_API_KEY": "sk-B"})
+        try:
+            assert _getenv("OPENAI_API_KEY") == "sk-B"
+        finally:
+            ss.reset_secret_scope(tok_b)
+
+    def test_getenv_fails_closed_unscoped(self, monkeypatch):
+        from hermes_cli.runtime_provider import _getenv
+        monkeypatch.setenv("OPENROUTER_API_KEY", "sk-leak")
+        ss.set_multiplex_active(True)
+        with pytest.raises(ss.UnscopedSecretError):
+            _getenv("OPENROUTER_API_KEY")
+
+    def test_getenv_global_var_still_reads_environ(self, monkeypatch):
+        from hermes_cli.runtime_provider import _getenv
+        monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
+        ss.set_multiplex_active(True)
+        # global var: no scope needed, no raise
+        assert _getenv("HERMES_MAX_ITERATIONS") == "42"
+
+
+class TestMcpInterpolationUsesScope:
+    """MCP config ${VAR} interpolation resolves through the secret scope."""
+
+    def test_interpolation_reads_scope(self, monkeypatch):
+        from tools.mcp_tool import _interpolate_env_vars
+        monkeypatch.setenv("MY_MCP_TOKEN", "global-token")
+        ss.set_multiplex_active(True)
+        tok = ss.set_secret_scope({"MY_MCP_TOKEN": "profile-token"})
+        try:
+            cfg = {"env": {"TOKEN": "${MY_MCP_TOKEN}"}}
+            assert _interpolate_env_vars(cfg) == {"env": {"TOKEN": "profile-token"}}
+        finally:
+            ss.reset_secret_scope(tok)
+
+    def test_interpolation_unset_keeps_placeholder(self, monkeypatch):
+        from tools.mcp_tool import _interpolate_env_vars
+        monkeypatch.delenv("UNSET_MCP_VAR", raising=False)
+        # multiplex off: unset var keeps literal placeholder (legacy behavior)
+        assert _interpolate_env_vars("${UNSET_MCP_VAR}") == "${UNSET_MCP_VAR}"
+
+    def test_interpolation_off_reads_environ(self, monkeypatch):
+        from tools.mcp_tool import _interpolate_env_vars
+        monkeypatch.setenv("MY_MCP_TOKEN", "env-token")
+        # multiplex off: legacy os.environ resolution
+        assert _interpolate_env_vars("${MY_MCP_TOKEN}") == "env-token"
diff --git a/tests/gateway/test_multiplex_http_routing.py b/tests/gateway/test_multiplex_http_routing.py
new file mode 100644
index 00000000000..e144030c351
--- /dev/null
+++ b/tests/gateway/test_multiplex_http_routing.py
@@ -0,0 +1,73 @@
+"""Phase 1: HTTP-inbound /p/<profile>/ routing for the webhook adapter."""
+import pytest
+
+from gateway.config import GatewayConfig, Platform
+from gateway.session import SessionSource, build_session_key
+
+
+class TestSessionSourceProfileField:
+    def test_profile_roundtrips(self):
+        s = SessionSource(
+            platform=Platform.WEBHOOK if hasattr(Platform, "WEBHOOK") else Platform.TELEGRAM,
+            chat_id="c1",
+            chat_type="webhook",
+            profile="coder",
+        )
+        restored = SessionSource.from_dict(s.to_dict())
+        assert restored.profile == "coder"
+
+    def test_profile_absent_not_serialized(self):
+        s = SessionSource(platform=Platform.TELEGRAM, chat_id="c1", chat_type="dm")
+        assert "profile" not in s.to_dict()
+
+    def test_source_profile_drives_session_key_namespace(self):
+        s = SessionSource(platform=Platform.TELEGRAM, chat_id="99", chat_type="dm")
+        # build_session_key takes profile explicitly; the adapter passes
+        # source.profile through. Verify the namespace follows it.
+        assert build_session_key(s, profile="coder") == "agent:coder:telegram:dm:99"
+
+
+class TestWebhookProfileResolution:
+    """_resolve_request_profile validates the /p/<profile>/ prefix."""
+
+    def _adapter(self, multiplex: bool, served=("default", "coder")):
+        from gateway.platforms.webhook import WebhookAdapter, _PROFILE_REJECTED
+
+        class _FakeReq:
+            def __init__(self, profile):
+                self.match_info = {"profile": profile} if profile is not None else {}
+
+        cfg = GatewayConfig(multiplex_profiles=multiplex)
+
+        class _Runner:
+            config = cfg
+
+        # Construct minimally; we only call _resolve_request_profile.
+        adapter = WebhookAdapter.__new__(WebhookAdapter)
+        adapter.gateway_runner = _Runner()
+        return adapter, _FakeReq, _PROFILE_REJECTED, served
+
+    def test_no_prefix_returns_none(self):
+        adapter, Req, _REJ, _ = self._adapter(multiplex=True)
+        assert adapter._resolve_request_profile(Req(None)) is None
+
+    def test_prefix_ignored_when_multiplex_off(self):
+        adapter, Req, _REJ, _ = self._adapter(multiplex=False)
+        # Even a bogus profile is ignored (not 404'd) when multiplexing is off.
+        assert adapter._resolve_request_profile(Req("anything")) is None
+
+    def test_known_profile_accepted(self, monkeypatch):
+        adapter, Req, _REJ, served = self._adapter(multiplex=True)
+        monkeypatch.setattr(
+            "hermes_cli.profiles.profiles_to_serve",
+            lambda multiplex: [(n, None) for n in served],
+        )
+        assert adapter._resolve_request_profile(Req("coder")) == "coder"
+
+    def test_unknown_profile_rejected(self, monkeypatch):
+        adapter, Req, REJ, served = self._adapter(multiplex=True)
+        monkeypatch.setattr(
+            "hermes_cli.profiles.profiles_to_serve",
+            lambda multiplex: [(n, None) for n in served],
+        )
+        assert adapter._resolve_request_profile(Req("ghost")) is REJ
diff --git a/tests/gateway/test_multiplex_lifecycle.py b/tests/gateway/test_multiplex_lifecycle.py
new file mode 100644
index 00000000000..6b5da5d9c38
--- /dev/null
+++ b/tests/gateway/test_multiplex_lifecycle.py
@@ -0,0 +1,55 @@
+"""Phase 4: lifecycle guard + per-profile observability."""
+import pytest
+
+
+class TestServedProfilesStatus:
+    def test_write_and_read_served_profiles(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import importlib
+        import gateway.status as status
+        importlib.reload(status)
+        try:
+            status.write_runtime_status(
+                gateway_state="running", served_profiles=["default", "coder"]
+            )
+            rec = status.read_runtime_status()
+            assert rec.get("served_profiles") == ["default", "coder"]
+        finally:
+            importlib.reload(status)
+
+    def test_served_profiles_absent_by_default(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import importlib
+        import gateway.status as status
+        importlib.reload(status)
+        try:
+            status.write_runtime_status(gateway_state="running")
+            rec = status.read_runtime_status()
+            assert "served_profiles" not in rec
+        finally:
+            importlib.reload(status)
+
+
+class TestNamedProfileMultiplexerGuard:
+    """_guard_named_profile_under_multiplexer is inert unless all conditions hold."""
+
+    def test_inert_for_default_profile(self, monkeypatch):
+        from hermes_cli import gateway as gw
+        monkeypatch.setattr(gw, "_profile_suffix", lambda: "")
+        # Should return without raising (default profile => guard N/A).
+        gw._guard_named_profile_under_multiplexer(force=False)
+
+    def test_force_bypasses(self, monkeypatch):
+        from hermes_cli import gateway as gw
+        # Even if it looks like a named profile, force returns immediately.
+        monkeypatch.setattr(gw, "_profile_suffix", lambda: "coder")
+        gw._guard_named_profile_under_multiplexer(force=True)
+
+    def test_inert_when_no_default_gateway_running(self, monkeypatch, tmp_path):
+        from hermes_cli import gateway as gw
+        monkeypatch.setattr(gw, "_profile_suffix", lambda: "coder")
+        monkeypatch.setattr(
+            "hermes_constants.get_default_hermes_root", lambda: tmp_path
+        )
+        # No gateway.pid in tmp_path => no running default gateway => no raise.
+        gw._guard_named_profile_under_multiplexer(force=False)
diff --git a/tests/gateway/test_multiplex_phase0.py b/tests/gateway/test_multiplex_phase0.py
new file mode 100644
index 00000000000..0297b08494c
--- /dev/null
+++ b/tests/gateway/test_multiplex_phase0.py
@@ -0,0 +1,165 @@
+"""Phase 0 foundations for multi-profile gateway multiplexing.
+
+Covers the three Phase 0 deliverables:
+  1. ``gateway.multiplex_profiles`` config flag (default False, round-trips).
+  2. ``hermes_cli.profiles.profiles_to_serve`` enumeration.
+  3. Profile-stamped ``build_session_key`` that is BYTE-IDENTICAL when the
+     flag is off (the orphan-every-session guard) and namespace-segmented when
+     on, without disturbing the positional key layout downstream parsers rely
+     on.
+"""
+import pytest
+from unittest.mock import patch
+
+from gateway.config import GatewayConfig, Platform
+from gateway.session import SessionSource, SessionStore, build_session_key
+
+
+def _src(**kw) -> SessionSource:
+    kw.setdefault("platform", Platform.TELEGRAM)
+    kw.setdefault("chat_id", "99")
+    kw.setdefault("chat_type", "dm")
+    return SessionSource(**kw)
+
+
+class TestSessionKeyByteIdenticalWhenOff:
+    """The non-negotiable guard: with no profile (or 'default'), every key is
+    byte-for-byte what it was before Phase 0. A diff here orphans every
+    existing session on upgrade."""
+
+    @pytest.mark.parametrize("profile", [None, "default"])
+    def test_dm_with_chat_id(self, profile):
+        s = _src(chat_id="99", chat_type="dm")
+        assert build_session_key(s, profile=profile) == "agent:main:telegram:dm:99"
+
+    @pytest.mark.parametrize("profile", [None, "default"])
+    def test_dm_with_thread(self, profile):
+        s = _src(chat_id="99", chat_type="dm", thread_id="t1")
+        assert build_session_key(s, profile=profile) == "agent:main:telegram:dm:99:t1"
+
+    @pytest.mark.parametrize("profile", [None, "default"])
+    def test_dm_without_chat_id_falls_back_to_user(self, profile):
+        s = _src(chat_id="", chat_type="dm", user_id="jordan")
+        assert build_session_key(s, profile=profile) == "agent:main:telegram:dm:jordan"
+
+    @pytest.mark.parametrize("profile", [None, "default"])
+    def test_group_per_user(self, profile):
+        s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
+        assert (
+            build_session_key(s, profile=profile)
+            == "agent:main:discord:group:g1:alice"
+        )
+
+    @pytest.mark.parametrize("profile", [None, "default"])
+    def test_group_shared_when_disabled(self, profile):
+        s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
+        assert (
+            build_session_key(s, group_sessions_per_user=False, profile=profile)
+            == "agent:main:discord:group:g1"
+        )
+
+
+class TestSessionKeyNamespacedWhenOn:
+    """A named profile occupies the namespace slot, isolating its sessions."""
+
+    def test_named_profile_dm(self):
+        s = _src(chat_id="99", chat_type="dm")
+        assert build_session_key(s, profile="coder") == "agent:coder:telegram:dm:99"
+
+    def test_named_profile_group_per_user(self):
+        s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
+        assert (
+            build_session_key(s, profile="coder")
+            == "agent:coder:discord:group:g1:alice"
+        )
+
+    def test_two_profiles_same_chat_do_not_collide(self):
+        s = _src(chat_id="99", chat_type="dm")
+        a = build_session_key(s, profile="default")
+        b = build_session_key(s, profile="coder")
+        c = build_session_key(s, profile="writer")
+        assert a != b != c and a != c
+
+    def test_positional_layout_preserved_for_parsers(self):
+        """Downstream parsers split on ':' and read parts[2]=platform,
+        parts[3]=chat_type, parts[4]=chat_id (see qqbot adapter
+        _parse_gateway_session_key). The profile must occupy parts[1] only."""
+        s = _src(platform=Platform.DISCORD, chat_id="g1", chat_type="group", user_id="alice")
+        parts = build_session_key(s, profile="coder").split(":")
+        assert parts[0] == "agent"
+        assert parts[1] == "coder"  # namespace slot (was always 'main')
+        assert parts[2] == "discord"  # platform — unchanged offset
+        assert parts[3] == "group"  # chat_type — unchanged offset
+        assert parts[4] == "g1"  # chat_id — unchanged offset
+
+    def test_default_namespace_layout_matches_named(self):
+        """Default and named keys differ ONLY in parts[1]."""
+        s = _src(platform=Platform.SLACK, chat_id="c1", chat_type="channel", user_id="u1")
+        d = build_session_key(s, profile="default").split(":")
+        n = build_session_key(s, profile="coder").split(":")
+        assert d[0] == n[0] == "agent"
+        assert d[1] == "main" and n[1] == "coder"
+        assert d[2:] == n[2:]  # everything after the namespace is identical
+
+
+class TestMultiplexConfigFlag:
+    """gateway.multiplex_profiles defaults off and round-trips."""
+
+    def test_default_is_false(self):
+        assert GatewayConfig().multiplex_profiles is False
+
+    def test_to_dict_includes_flag(self):
+        assert GatewayConfig().to_dict()["multiplex_profiles"] is False
+
+    def test_from_dict_top_level(self):
+        cfg = GatewayConfig.from_dict({"multiplex_profiles": True})
+        assert cfg.multiplex_profiles is True
+
+    def test_from_dict_nested_gateway(self):
+        cfg = GatewayConfig.from_dict({"gateway": {"multiplex_profiles": True}})
+        assert cfg.multiplex_profiles is True
+
+    def test_from_dict_coerces_truthy_string(self):
+        cfg = GatewayConfig.from_dict({"multiplex_profiles": "true"})
+        assert cfg.multiplex_profiles is True
+
+    def test_roundtrip(self):
+        cfg = GatewayConfig.from_dict(GatewayConfig(multiplex_profiles=True).to_dict())
+        assert cfg.multiplex_profiles is True
+
+
+class TestSessionStoreProfileResolution:
+    """SessionStore._generate_session_key honors the flag: legacy namespace
+    when off, active-profile namespace when on."""
+
+    def _store(self, tmp_path, **cfg_kw):
+        config = GatewayConfig(**cfg_kw)
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            s = SessionStore(sessions_dir=tmp_path, config=config)
+        s._db = None
+        s._loaded = True
+        return s
+
+    def test_flag_off_uses_legacy_namespace(self, tmp_path):
+        store = self._store(tmp_path)  # multiplex_profiles defaults False
+        s = _src(chat_id="99", chat_type="dm")
+        assert store._generate_session_key(s) == "agent:main:telegram:dm:99"
+        assert store._generate_session_key(s) == build_session_key(s)
+
+    def test_flag_off_resolve_profile_is_none(self, tmp_path):
+        store = self._store(tmp_path)
+        assert store._resolve_profile_for_key() is None
+
+    def test_flag_on_uses_active_profile_namespace(self, tmp_path):
+        store = self._store(tmp_path, multiplex_profiles=True)
+        s = _src(chat_id="99", chat_type="dm")
+        with patch("hermes_cli.profiles.get_active_profile_name", return_value="coder"):
+            assert store._generate_session_key(s) == "agent:coder:telegram:dm:99"
+
+    def test_flag_on_default_profile_stays_legacy(self, tmp_path):
+        store = self._store(tmp_path, multiplex_profiles=True)
+        s = _src(chat_id="99", chat_type="dm")
+        with patch("hermes_cli.profiles.get_active_profile_name", return_value="default"):
+            assert store._generate_session_key(s) == "agent:main:telegram:dm:99"
+
+
diff --git a/tests/gateway/test_runtime_env_reload_config_authority.py b/tests/gateway/test_runtime_env_reload_config_authority.py
index 92d54b8863c..d90b58297e8 100644
--- a/tests/gateway/test_runtime_env_reload_config_authority.py
+++ b/tests/gateway/test_runtime_env_reload_config_authority.py
@@ -51,3 +51,18 @@ def test_reload_runtime_env_keeps_env_max_iterations_when_config_omits_key(
     gateway_run._reload_runtime_env_preserving_config_authority()
 
     assert os.environ["HERMES_MAX_ITERATIONS"] == "123"
+
+
+def test_current_max_iterations_reloads_before_reading(monkeypatch) -> None:
+    monkeypatch.setenv("HERMES_MAX_ITERATIONS", "90")
+
+    def _fake_reload() -> None:
+        os.environ["HERMES_MAX_ITERATIONS"] = "200"
+
+    monkeypatch.setattr(
+        gateway_run,
+        "_reload_runtime_env_preserving_config_authority",
+        _fake_reload,
+    )
+
+    assert gateway_run._current_max_iterations() == 200
diff --git a/tests/hermes_cli/test_backup.py b/tests/hermes_cli/test_backup.py
index 762af37069c..e768d2a996c 100644
--- a/tests/hermes_cli/test_backup.py
+++ b/tests/hermes_cli/test_backup.py
@@ -153,6 +153,39 @@ class TestShouldExclude:
         assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/SKILL.md"))
         assert not _should_exclude(Path("skills/autonomous-ai-agents/hermes-agent/sub/item.txt"))
 
+    @pytest.mark.parametrize(
+        "rel",
+        [
+            "plugins/my-plugin/.venv/lib/python3.12/site-packages/x/__init__.py",
+            "plugins/my-plugin/venv/bin/python",
+            "mcp/server/site-packages/pkg/mod.py",
+            ".cache/uv/wheels/abc.whl",
+            "plugins/p/.cache/pip/http/deadbeef",
+            ".tox/py312/log.txt",
+            ".nox/tests/bin/pytest",
+            "plugins/p/.pytest_cache/v/cache/lastfailed",
+            ".mypy_cache/3.12/agent.meta.json",
+            ".ruff_cache/0.4.0/abc",
+        ],
+    )
+    def test_excludes_regeneratable_dependency_and_cache_dirs(self, rel):
+        """Python dep trees and tool caches under HERMES_HOME must be skipped —
+        these are what balloon a backup to hundreds of thousands of files."""
+        from hermes_cli.backup import _should_exclude
+        assert _should_exclude(Path(rel))
+
+    def test_does_not_exclude_curator_archive(self):
+        """skills/.archive/ holds restorable archived skills and MUST survive
+        a backup — it is intentionally NOT in the exclusion set."""
+        from hermes_cli.backup import _should_exclude
+        assert not _should_exclude(Path("skills/.archive/old-skill/SKILL.md"))
+
+    def test_does_not_exclude_legit_files_resembling_cache_names(self):
+        """Only directory-component matches are excluded; a normal file is kept."""
+        from hermes_cli.backup import _should_exclude
+        assert not _should_exclude(Path("skills/my-skill/venv-notes.md"))
+        assert not _should_exclude(Path("memories/cache.json"))
+
 # ---------------------------------------------------------------------------
 # Backup tests
 # ---------------------------------------------------------------------------
@@ -272,6 +305,37 @@ class TestBackup:
             agent_files = [n for n in names if "hermes-agent" in n]
             assert agent_files == [], f"hermes-agent files leaked into backup: {agent_files}"
 
+    def test_excludes_dependency_and_cache_trees(self, tmp_path, monkeypatch):
+        """A plugin venv / site-packages / pip cache under HERMES_HOME must be
+        pruned by the walk, while real data (skills, config) is preserved.
+        This is the regression guard for the ballooning-backup bug."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        _make_hermes_tree(hermes_home)
+
+        # Simulate the heavy regeneratable trees that ballooned the backup.
+        venv_pkg = hermes_home / "plugins" / "heavy" / ".venv" / "lib" / "site-packages" / "dep"
+        venv_pkg.mkdir(parents=True)
+        (venv_pkg / "__init__.py").write_text("# dep\n")
+        pip_cache = hermes_home / ".cache" / "uv" / "wheels"
+        pip_cache.mkdir(parents=True)
+        (pip_cache / "abc.whl").write_bytes(b"\x00")
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+        out_zip = tmp_path / "backup.zip"
+        from hermes_cli.backup import run_backup
+        run_backup(Namespace(output=str(out_zip)))
+
+        with zipfile.ZipFile(out_zip, "r") as zf:
+            names = zf.namelist()
+        leaked = [n for n in names if ".venv" in n or "site-packages" in n or ".cache" in n]
+        assert leaked == [], f"regeneratable trees leaked into backup: {leaked}"
+        # Real data still present.
+        assert "skills/my-skill/SKILL.md" in names
+        assert "config.yaml" in names
+
     def test_includes_nested_hermes_agent_in_skills(self, tmp_path, monkeypatch):
         """Backup includes skills/.../hermes-agent/ but NOT root hermes-agent/."""
         hermes_home = tmp_path / ".hermes"
diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py
index 3e3144fdfea..5f84004ee80 100644
--- a/tests/hermes_cli/test_config.py
+++ b/tests/hermes_cli/test_config.py
@@ -955,6 +955,17 @@ class TestInterimAssistantMessageConfig:
         assert raw["display"]["interim_assistant_messages"] is True
 
 
+class TestCliRefreshIntervalConfig:
+    """Test the CLI refresh_interval config default (#45592 / #48309)."""
+
+    def test_default_config_enables_cli_refresh_interval(self):
+        """cli_refresh_interval defaults to 1.0 so the idle status-bar
+        clock keeps ticking and the bottom chrome stays alive during
+        idle (#45592). Users on emulators where the periodic redraw
+        fights auto-scroll can set it to 0 (#48309)."""
+        assert DEFAULT_CONFIG["display"]["cli_refresh_interval"] == 1.0
+
+
 class TestDiscordChannelPromptsConfig:
     def test_default_config_includes_discord_channel_prompts(self):
         assert DEFAULT_CONFIG["discord"]["channel_prompts"] == {}
diff --git a/tests/hermes_cli/test_debug.py b/tests/hermes_cli/test_debug.py
index 615e379f7d2..f8d958ffa86 100644
--- a/tests/hermes_cli/test_debug.py
+++ b/tests/hermes_cli/test_debug.py
@@ -31,6 +31,9 @@ def hermes_home(tmp_path, monkeypatch):
     (logs_dir / "gateway.log").write_text(
         "2026-04-12 17:00:10 INFO gateway.run: started\n"
     )
+    (logs_dir / "gui.log").write_text(
+        "2026-04-12 17:00:12 INFO hermes_cli.web_server: dashboard request\n"
+    )
     (logs_dir / "desktop.log").write_text(
         "2026-04-12 17:00:15 INFO desktop: backend spawned\n"
     )
@@ -454,6 +457,15 @@ class TestCollectDebugReport:
 
         assert "--- gateway.log" in report
 
+    def test_report_includes_gui_log(self, hermes_home):
+        from hermes_cli.debug import collect_debug_report
+
+        with patch("hermes_cli.dump.run_dump"):
+            report = collect_debug_report(log_lines=50)
+
+        assert "--- gui.log" in report
+        assert "dashboard request" in report
+
     def test_report_includes_desktop_log(self, hermes_home):
         from hermes_cli.debug import collect_debug_report
 
@@ -538,8 +550,8 @@ class TestRunDebugShare:
         assert "FULL agent.log" in out
         assert "FULL gateway.log" in out
 
-    def test_share_uploads_four_pastes(self, hermes_home, capsys):
-        """Successful share uploads report + agent.log + gateway.log + desktop.log."""
+    def test_share_uploads_five_pastes(self, hermes_home, capsys):
+        """Successful share uploads report + agent.log + gateway.log + gui.log + desktop.log."""
         from hermes_cli.debug import run_debug_share
 
         args = MagicMock()
@@ -561,15 +573,17 @@ class TestRunDebugShare:
             run_debug_share(args)
 
         out = capsys.readouterr().out
-        # Should have 4 uploads: report, agent.log, gateway.log, desktop.log
-        assert call_count[0] == 4
+        # Should have 5 uploads: report, agent.log, gateway.log, gui.log, desktop.log
+        assert call_count[0] == 5
         assert "paste.rs/paste1" in out  # Report
         assert "paste.rs/paste2" in out  # agent.log
         assert "paste.rs/paste3" in out  # gateway.log
-        assert "paste.rs/paste4" in out  # desktop.log
+        assert "paste.rs/paste4" in out  # gui.log
+        assert "paste.rs/paste5" in out  # desktop.log
         assert "Report" in out
         assert "agent.log" in out
         assert "gateway.log" in out
+        assert "gui.log" in out
         assert "desktop.log" in out
 
         # Each log paste should start with the dump header
@@ -579,7 +593,10 @@ class TestRunDebugShare:
         gateway_paste = uploaded_content[2]
         assert "--- hermes dump ---" in gateway_paste
         assert "--- full gateway.log ---" in gateway_paste
-        desktop_paste = uploaded_content[3]
+        gui_paste = uploaded_content[3]
+        assert "--- hermes dump ---" in gui_paste
+        assert "--- full gui.log ---" in gui_paste
+        desktop_paste = uploaded_content[4]
         assert "--- hermes dump ---" in desktop_paste
         assert "--- full desktop.log ---" in desktop_paste
 
diff --git a/tests/hermes_cli/test_gateway_restart_loop.py b/tests/hermes_cli/test_gateway_restart_loop.py
index d6c9bb06cec..74ee9e4934e 100644
--- a/tests/hermes_cli/test_gateway_restart_loop.py
+++ b/tests/hermes_cli/test_gateway_restart_loop.py
@@ -6,6 +6,7 @@ Covers:
 - _contains_gateway_lifecycle_command pattern matching
 """
 
+import json
 import os
 from argparse import Namespace
 
@@ -250,3 +251,109 @@ class TestGatewaySelfTargetingGuard:
         args = Namespace(gateway_command="restart", all=False, system=False)
         with pytest.raises(_Reached):
             gw.gateway_command(args)
+
+
+# ---------------------------------------------------------------------------
+# Defense 3: terminal_tool hard-blocks gateway lifecycle commands inside gateway
+# ---------------------------------------------------------------------------
+
+class TestTerminalToolGatewayLifecycleGuard:
+    """terminal_tool must refuse gateway lifecycle commands when _HERMES_GATEWAY=1.
+
+    Issue #37453: systemctl --user restart hermes-gateway runs as a child of the
+    gateway process.  When systemd delivers SIGTERM the gateway kills its own
+    restart command mid-execution — the service may never restart.  The guard
+    must fire before execution, unconditionally (force=True cannot bypass it).
+    """
+
+    def _make_fake_env(self):
+        class _FakeEnv:
+            env = {}
+            def execute(self, command, **kwargs):  # pragma: no cover
+                raise AssertionError("execute must not be reached")
+        return _FakeEnv()
+
+    def _minimal_config(self):
+        return {"env_type": "local", "cwd": "/tmp", "timeout": 60, "lifetime_seconds": 3600}
+
+    def _patch_env(self, monkeypatch, fake_env, *, inside_gateway: bool):
+        import tools.terminal_tool as tt
+        eid = "default"
+        monkeypatch.setattr(tt, "_active_environments", {eid: fake_env})
+        monkeypatch.setattr(tt, "_last_activity", {eid: 0.0})
+        monkeypatch.setattr(tt, "_task_env_overrides", {})
+        monkeypatch.setattr(tt, "_get_env_config", self._minimal_config)
+        if inside_gateway:
+            monkeypatch.setenv("_HERMES_GATEWAY", "1")
+        else:
+            monkeypatch.delenv("_HERMES_GATEWAY", raising=False)
+
+    @pytest.mark.parametrize("cmd", [
+        "systemctl restart hermes-gateway",
+        "systemctl --user restart hermes-gateway",
+        "systemctl stop hermes-gateway.service",
+        "hermes gateway restart",
+        "launchctl kickstart gui/501/ai.hermes.gateway",
+        "pkill -f hermes.*gateway",
+    ])
+    def test_blocks_lifecycle_commands_inside_gateway(self, monkeypatch, cmd):
+        import tools.terminal_tool as tt
+        self._patch_env(monkeypatch, self._make_fake_env(), inside_gateway=True)
+
+        result = json.loads(tt.terminal_tool(command=cmd))
+
+        assert result["exit_code"] == 1
+        assert "Blocked" in result["error"]
+
+    def test_force_true_cannot_bypass_block(self, monkeypatch):
+        import tools.terminal_tool as tt
+        self._patch_env(monkeypatch, self._make_fake_env(), inside_gateway=True)
+
+        result = json.loads(tt.terminal_tool(
+            command="systemctl restart hermes-gateway", force=True
+        ))
+
+        assert result["exit_code"] == 1
+        assert "Blocked" in result["error"]
+
+    def test_safe_systemctl_commands_pass_through(self, monkeypatch):
+        """Non-hermes systemctl commands must not be blocked by this guard."""
+        import tools.terminal_tool as tt
+
+        calls = []
+
+        class _FakeEnv:
+            env = {}
+            def execute(self, command, **kwargs):
+                calls.append(command)
+                return {"output": "Active: running", "returncode": 0}
+
+        self._patch_env(monkeypatch, _FakeEnv(), inside_gateway=True)
+        monkeypatch.setattr(tt, "_check_all_guards", lambda cmd, env: {"approved": True})
+
+        result = json.loads(tt.terminal_tool(command="systemctl status nginx"))
+
+        assert result["exit_code"] == 0
+        assert calls == ["systemctl status nginx"]
+
+    def test_guard_inactive_outside_gateway(self, monkeypatch):
+        """Without _HERMES_GATEWAY=1 the lifecycle guard must not fire."""
+        import tools.terminal_tool as tt
+
+        calls = []
+
+        class _FakeEnv:
+            env = {}
+            def execute(self, command, **kwargs):
+                calls.append(command)
+                return {"output": "restarting...", "returncode": 0}
+
+        self._patch_env(monkeypatch, _FakeEnv(), inside_gateway=False)
+        monkeypatch.setattr(tt, "_check_all_guards", lambda cmd, env: {"approved": True})
+
+        result = json.loads(tt.terminal_tool(command="systemctl restart hermes-gateway"))
+
+        # Outside the gateway the lifecycle guard doesn't block — the normal
+        # approval flow handles it (here mocked as approved).
+        assert result["exit_code"] == 0
+        assert calls == ["systemctl restart hermes-gateway"]
diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py
index 8bb5c1a7b85..1386b1ebdc4 100644
--- a/tests/hermes_cli/test_kanban_db.py
+++ b/tests/hermes_cli/test_kanban_db.py
@@ -505,6 +505,171 @@ def test_stale_claim_with_live_pid_uses_env_ttl_override(
         assert task.claim_expires > int(time.time()) + 3000
 
 
+def test_stale_claim_deferred_when_live_worker_survives_termination(
+    kanban_home, monkeypatch,
+):
+    """A TTL-expired claim whose worker survives the kill must NOT be released.
+
+    Releasing would let the dispatcher spawn a duplicate beside the still-alive
+    worker — the runaway seen when a cgroup memory.high throttle parks a worker
+    in uninterruptible (D) state, where a pending SIGKILL cannot land. The claim
+    is held (extended) and retried next tick instead.
+    """
+    import hermes_cli.kanban_db as _kb
+
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="x", assignee="a")
+        host = _kb._claimer_id().split(":", 1)[0]
+        kb.claim_task(conn, t, claimer=f"{host}:worker")
+        kb._set_worker_pid(conn, t, 12345)
+
+        old_expires = int(time.time()) - 60
+        # Heartbeat stale by > 1h so the live-pid EXTEND branch is skipped and
+        # the terminate path (the wedged-worker case) runs.
+        conn.execute(
+            "UPDATE tasks SET claim_expires = ?, last_heartbeat_at = ? "
+            "WHERE id = ?",
+            (old_expires, int(time.time()) - 7200, t),
+        )
+        monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: True)
+        monkeypatch.setattr(
+            _kb, "_terminate_reclaimed_worker",
+            lambda *a, **k: {
+                "termination_attempted": True,
+                "host_local": True,
+                "terminated": False,
+            },
+        )
+        reclaimed = kb.release_stale_claims(conn, signal_fn=lambda _p, _s: None)
+        assert reclaimed == 0
+
+        assert kb.get_task(conn, t).status == "running"
+        worker_pid = conn.execute(
+            "SELECT worker_pid FROM tasks WHERE id = ?", (t,),
+        ).fetchone()[0]
+        assert worker_pid == 12345  # worker not orphaned
+        claim_expires = conn.execute(
+            "SELECT claim_expires FROM tasks WHERE id = ?", (t,),
+        ).fetchone()[0]
+        assert claim_expires > old_expires  # claim held, not released
+
+        kinds = [
+            r["kind"] for r in conn.execute(
+                "SELECT kind FROM task_events WHERE task_id = ?", (t,),
+            ).fetchall()
+        ]
+        assert "reclaim_deferred" in kinds
+        assert "reclaimed" not in kinds
+
+
+def test_stale_claim_reclaimed_when_termination_succeeds(
+    kanban_home, monkeypatch,
+):
+    """When the worker is actually killed, the claim is released as before."""
+    import hermes_cli.kanban_db as _kb
+
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="x", assignee="a")
+        host = _kb._claimer_id().split(":", 1)[0]
+        kb.claim_task(conn, t, claimer=f"{host}:worker")
+        kb._set_worker_pid(conn, t, 12345)
+        conn.execute(
+            "UPDATE tasks SET claim_expires = ?, last_heartbeat_at = ? "
+            "WHERE id = ?",
+            (int(time.time()) - 60, int(time.time()) - 7200, t),
+        )
+        monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: False)
+        monkeypatch.setattr(
+            _kb, "_terminate_reclaimed_worker",
+            lambda *a, **k: {
+                "termination_attempted": True,
+                "host_local": True,
+                "terminated": True,
+            },
+        )
+        reclaimed = kb.release_stale_claims(conn, signal_fn=lambda _p, _s: None)
+        assert reclaimed == 1
+        assert kb.get_task(conn, t).status == "ready"
+
+
+def test_stale_claim_released_when_worker_not_host_local(
+    kanban_home, monkeypatch,
+):
+    """The defer guard only holds OUR own surviving workers.
+
+    A claim we cannot manage (different host, or no kill attempted) must still
+    be released, otherwise a foreign-host claim could strand a task forever.
+    """
+    import hermes_cli.kanban_db as _kb
+
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="x", assignee="a")
+        host = _kb._claimer_id().split(":", 1)[0]
+        kb.claim_task(conn, t, claimer=f"{host}:worker")
+        kb._set_worker_pid(conn, t, 12345)
+        conn.execute(
+            "UPDATE tasks SET claim_expires = ?, last_heartbeat_at = ? "
+            "WHERE id = ?",
+            (int(time.time()) - 60, int(time.time()) - 7200, t),
+        )
+        monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: True)
+        monkeypatch.setattr(
+            _kb, "_terminate_reclaimed_worker",
+            lambda *a, **k: {
+                "termination_attempted": False,
+                "host_local": False,
+                "terminated": False,
+            },
+        )
+        reclaimed = kb.release_stale_claims(conn, signal_fn=lambda _p, _s: None)
+        assert reclaimed == 1
+        assert kb.get_task(conn, t).status == "ready"
+
+
+def test_detect_stale_defers_when_live_worker_survives(kanban_home, monkeypatch):
+    """detect_stale_running must also hold the claim when the worker survives."""
+    import hermes_cli.kanban_db as _kb
+
+    with kb.connect() as conn:
+        t = kb.create_task(conn, title="wedged", assignee="worker")
+        kb.claim_task(conn, t)
+        kb._set_worker_pid(conn, t, os.getpid())
+
+        five_hours_ago = int(time.time()) - (5 * 3600)
+        with kb.write_txn(conn):
+            conn.execute(
+                "UPDATE tasks SET started_at = ?, last_heartbeat_at = NULL "
+                "WHERE id = ?",
+                (five_hours_ago, t),
+            )
+            conn.execute(
+                "UPDATE task_runs SET started_at = ? "
+                "WHERE id = (SELECT current_run_id FROM tasks WHERE id = ?)",
+                (five_hours_ago, t),
+            )
+
+        monkeypatch.setattr(_kb, "_pid_alive", lambda _pid: True)
+        monkeypatch.setattr(
+            _kb, "_terminate_reclaimed_worker",
+            lambda *a, **k: {
+                "termination_attempted": True,
+                "host_local": True,
+                "terminated": False,
+            },
+        )
+        stale = kb.detect_stale_running(
+            conn, stale_timeout_seconds=14400, signal_fn=lambda p, s: None,
+        )
+        assert stale == []
+        assert kb.get_task(conn, t).status == "running"
+        kinds = [
+            r["kind"] for r in conn.execute(
+                "SELECT kind FROM task_events WHERE task_id = ?", (t,),
+            ).fetchall()
+        ]
+        assert "reclaim_deferred" in kinds
+
+
 def test_stale_claim_reclaim_event_records_diagnostic_payload(
     kanban_home, monkeypatch,
 ):
diff --git a/tests/hermes_cli/test_model_picker_expensive_confirm.py b/tests/hermes_cli/test_model_picker_expensive_confirm.py
index b827be3c9e8..222968daea3 100644
--- a/tests/hermes_cli/test_model_picker_expensive_confirm.py
+++ b/tests/hermes_cli/test_model_picker_expensive_confirm.py
@@ -55,10 +55,12 @@ def test_prompt_toolkit_model_picker_defers_confirmation_off_key_handler(monkeyp
         lambda *_args: captured.setdefault("ran_inline", True)
     )
 
-    _bound(cli_mod.HermesCLI._handle_model_picker_selection, self_)()
+    # The key handler now resolves persistence via resolve_persist_behavior,
+    # which defaults to True (persist-by-default). Simulate that call.
+    _bound(cli_mod.HermesCLI._handle_model_picker_selection, self_)(persist_global=True)
 
     assert self_._model_picker_state is None
     assert captured["started"] is True
     assert captured["daemon"] is True
-    assert captured["args"] == (result, False)
+    assert captured["args"] == (result, True)
     assert "ran_inline" not in captured
diff --git a/tests/hermes_cli/test_model_switch_persist_default.py b/tests/hermes_cli/test_model_switch_persist_default.py
new file mode 100644
index 00000000000..912bd7afe47
--- /dev/null
+++ b/tests/hermes_cli/test_model_switch_persist_default.py
@@ -0,0 +1,122 @@
+"""Tests for persist-by-default model switching.
+
+Covers:
+- ``parse_model_flags`` recognises ``--session`` (and keeps ``--global``).
+- ``resolve_persist_behavior`` applies the config-gated default and the
+  ``--session`` / ``--global`` overrides.
+- The default (no flags) persists, which is the user-facing fix: a plain
+  ``/model <name>`` survives across sessions.
+"""
+
+from unittest.mock import patch
+
+from hermes_cli.model_switch import parse_model_flags, resolve_persist_behavior
+
+
+# ---------------------------------------------------------------------------
+# parse_model_flags
+# ---------------------------------------------------------------------------
+
+
+class TestParseModelFlagsSession:
+    def test_no_flags(self):
+        assert parse_model_flags("sonnet") == ("sonnet", "", False, False, False)
+
+    def test_global_flag(self):
+        assert parse_model_flags("sonnet --global") == ("sonnet", "", True, False, False)
+
+    def test_session_flag(self):
+        assert parse_model_flags("sonnet --session") == (
+            "sonnet",
+            "",
+            False,
+            False,
+            True,
+        )
+
+    def test_session_with_provider(self):
+        assert parse_model_flags("sonnet --provider anthropic --session") == (
+            "sonnet",
+            "anthropic",
+            False,
+            False,
+            True,
+        )
+
+    def test_refresh_flag_still_parsed(self):
+        assert parse_model_flags("--refresh") == ("", "", False, True, False)
+
+    def test_unicode_dash_session_normalized(self):
+        # Telegram/iOS auto-converts -- to en/em dashes.
+        assert parse_model_flags("sonnet \u2013session") == (
+            "sonnet",
+            "",
+            False,
+            False,
+            True,
+        )
+
+
+# ---------------------------------------------------------------------------
+# resolve_persist_behavior
+# ---------------------------------------------------------------------------
+
+
+class TestResolvePersistBehavior:
+    def test_session_flag_always_session_only(self):
+        # --session opts out even if the config default is True.
+        with _config({"model": {"persist_switch_by_default": True}}):
+            assert resolve_persist_behavior(False, True) is False
+
+    def test_global_flag_always_persists(self):
+        # --global forces persist even if the config default is False.
+        with _config({"model": {"persist_switch_by_default": False}}):
+            assert resolve_persist_behavior(True, False) is True
+
+    def test_default_persists_when_config_missing(self):
+        # No model section at all → built-in default (True).
+        with _config({}):
+            assert resolve_persist_behavior(False, False) is True
+
+    def test_default_persists_when_key_true(self):
+        with _config({"model": {"persist_switch_by_default": True}}):
+            assert resolve_persist_behavior(False, False) is True
+
+    def test_default_session_only_when_key_false(self):
+        with _config({"model": {"persist_switch_by_default": False}}):
+            assert resolve_persist_behavior(False, False) is False
+
+    def test_default_when_model_is_flat_string(self):
+        # Fresh install: ``model: ""`` (not a dict) → built-in default True.
+        with _config({"model": ""}):
+            assert resolve_persist_behavior(False, False) is True
+
+    def test_session_overrides_global_when_both_set(self):
+        # --session is the explicit opt-out and wins over --global.
+        with _config({"model": {"persist_switch_by_default": True}}):
+            assert resolve_persist_behavior(True, True) is False
+
+
+# ---------------------------------------------------------------------------
+# helper
+# ---------------------------------------------------------------------------
+
+
+class _config:
+    """Context manager that patches ``load_config`` to return a fixed dict."""
+
+    def __init__(self, cfg: dict):
+        self.cfg = cfg
+
+    def __enter__(self):
+        self._patch = patch(
+            "hermes_cli.config.load_config",
+            return_value=self.cfg,
+        )
+        # resolve_persist_behavior imports load_config lazily inside the
+        # function, so patching the source module is sufficient.
+        self._patch.start()
+        return self
+
+    def __exit__(self, *exc):
+        self._patch.stop()
diff --git a/tests/hermes_cli/test_profiles.py b/tests/hermes_cli/test_profiles.py
index 1ea1845d9d3..59afe84e563 100644
--- a/tests/hermes_cli/test_profiles.py
+++ b/tests/hermes_cli/test_profiles.py
@@ -35,6 +35,7 @@ from hermes_cli.profiles import (
     has_bundled_skills_opt_out,
     NO_BUNDLED_SKILLS_MARKER,
     backfill_profile_envs,
+    profiles_to_serve,
 )
 from hermes_cli.config import DEFAULT_CONFIG
 
@@ -1487,3 +1488,48 @@ class TestEdgeCases:
             delete_profile("coder", yes=True)
 
         assert get_active_profile() == "default"
+
+
+class TestProfilesToServe:
+    """profiles_to_serve(multiplex) — the gateway's profile-enumeration chokepoint."""
+
+    def test_off_returns_only_active_default(self, profile_env):
+        serve = profiles_to_serve(multiplex=False)
+        assert len(serve) == 1
+        name, home = serve[0]
+        assert name == "default"
+        assert home == _get_default_hermes_home()
+
+    def test_off_returns_only_active_named(self, profile_env, monkeypatch):
+        # A named profile's gateway runs with HERMES_HOME pointing at the
+        # profile dir; get_active_profile_name() infers the name from there.
+        create_profile("coder", no_alias=True)
+        monkeypatch.setenv("HERMES_HOME", str(get_profile_dir("coder")))
+        serve = profiles_to_serve(multiplex=False)
+        assert len(serve) == 1
+        assert serve[0][0] == "coder"
+        assert serve[0][1] == get_profile_dir("coder")
+
+    def test_on_returns_default_plus_all_named(self, profile_env):
+        create_profile("coder", no_alias=True)
+        create_profile("writer", no_alias=True)
+        serve = dict(profiles_to_serve(multiplex=True))
+        assert set(serve) == {"default", "coder", "writer"}
+        assert serve["default"] == _get_default_hermes_home()
+        assert serve["coder"] == get_profile_dir("coder")
+
+    def test_on_default_always_first(self, profile_env):
+        create_profile("coder", no_alias=True)
+        serve = profiles_to_serve(multiplex=True)
+        assert serve[0][0] == "default"
+
+    def test_on_active_profile_does_not_change_set(self, profile_env):
+        """Enumeration is independent of which profile is active."""
+        create_profile("coder", no_alias=True)
+        set_active_profile("coder")
+        serve = dict(profiles_to_serve(multiplex=True))
+        assert set(serve) == {"default", "coder"}
+
+    def test_on_no_named_profiles_returns_just_default(self, profile_env):
+        serve = profiles_to_serve(multiplex=True)
+        assert [n for n, _ in serve] == ["default"]
diff --git a/tests/hermes_cli/test_provider_catalog.py b/tests/hermes_cli/test_provider_catalog.py
new file mode 100644
index 00000000000..508c18aae75
--- /dev/null
+++ b/tests/hermes_cli/test_provider_catalog.py
@@ -0,0 +1,127 @@
+"""Tests for the unified provider catalog (hermes_cli.provider_catalog).
+
+These are invariant tests, not snapshots: they assert the parity *contract*
+between what ``hermes model`` shows (``CANONICAL_PROVIDERS``) and what the
+catalog exposes, plus how each provider's ``auth_type`` maps to a desktop tab —
+never a specific provider count or a frozen vendor list (both change over time).
+"""
+
+from hermes_cli.models import CANONICAL_PROVIDERS
+from hermes_cli.provider_catalog import (
+    ProviderDescriptor,
+    provider_catalog,
+    provider_catalog_by_slug,
+    tab_for_auth_type,
+)
+
+
+def test_catalog_covers_every_hermes_model_provider():
+    """PARITY CONTRACT: the catalog == the `hermes model` universe."""
+    slugs = {d.slug for d in provider_catalog()}
+    for entry in CANONICAL_PROVIDERS:
+        assert entry.slug in slugs, (
+            f"{entry.slug} is shown in `hermes model` but missing from provider_catalog()"
+        )
+
+
+def test_catalog_has_no_providers_outside_hermes_model():
+    """The catalog must not invent providers `hermes model` doesn't show."""
+    canonical = {e.slug for e in CANONICAL_PROVIDERS}
+    for d in provider_catalog():
+        assert d.slug in canonical, f"{d.slug} in catalog but not in CANONICAL_PROVIDERS"
+
+
+def test_every_descriptor_lands_on_exactly_one_known_tab():
+    for d in provider_catalog():
+        assert d.tab in {"keys", "accounts"}, f"{d.slug} has bad tab {d.tab!r}"
+
+
+def test_descriptor_count_matches_canonical():
+    """One descriptor per canonical entry (no dupes, no drops)."""
+    cat = provider_catalog()
+    assert len(cat) == len(CANONICAL_PROVIDERS)
+    assert len({d.slug for d in cat}) == len(cat)
+
+
+def test_profileless_providers_still_present():
+    """Providers without a ProviderProfile must still resolve via fallbacks.
+
+    lmstudio / openai-api / tencent-tokenhub / xai-oauth have no profile on
+    main; they exist only as registry + canonical entries. The catalog must
+    not require a profile to include a provider.
+    """
+    by = provider_catalog_by_slug()
+    for slug in ("lmstudio", "openai-api", "tencent-tokenhub", "xai-oauth"):
+        assert slug in by, f"{slug} dropped from catalog (profile-less provider)"
+        assert by[slug].label, f"{slug} has empty label despite canonical fallback"
+        assert by[slug].description, f"{slug} has empty description despite fallback"
+
+
+def test_api_key_providers_route_to_keys_oauth_to_accounts():
+    by = provider_catalog_by_slug()
+    # api_key → keys
+    assert by["kilocode"].tab == "keys"
+    assert by["openai-api"].tab == "keys"
+    # account / sign-in flows → accounts
+    assert by["google-gemini-cli"].tab == "accounts"
+    assert by["copilot-acp"].tab == "accounts"
+
+
+def test_copilot_surfaces_as_a_provider_with_its_own_token_var():
+    """Regression for the reported bug: a GitHub Copilot login showed up under
+    tools, never as a provider, because the shared GITHUB_TOKEN is tool-category.
+
+    Copilot authenticates via the `copilot`/api_key path, so it belongs on the
+    keys tab — but its PRIMARY credential var must be the provider-owned
+    COPILOT_GITHUB_TOKEN, not the shared tool-category GITHUB_TOKEN. That is what
+    lets the desktop render Copilot as its own provider card.
+    """
+    by = provider_catalog_by_slug()
+    assert "copilot" in by
+    d = by["copilot"]
+    assert d.tab == "keys"
+    assert d.api_key_env_vars, "Copilot must expose a credential env var"
+    assert d.api_key_env_vars[0] == "COPILOT_GITHUB_TOKEN", (
+        "Copilot's primary var must be the provider-owned token, not shared GITHUB_TOKEN"
+    )
+
+
+def test_bedrock_routes_to_keys():
+    """Bedrock is aws_sdk (AWS_REGION/AWS_PROFILE), configured on the keys tab."""
+    by = provider_catalog_by_slug()
+    assert by["bedrock"].tab == "keys"
+
+
+def test_api_key_providers_expose_a_credential_env_var():
+    """Every keys-tab provider that authenticates via a pasted API key must
+    surface at least one env var to write the key into (otherwise the GUI can't
+    configure it).
+
+    Exemptions: ``aws_sdk`` (bedrock — uses AWS_REGION/AWS_PROFILE) and the
+    ``custom`` bring-your-own-endpoint pseudo-provider, which is configured
+    inline via the local-endpoint flow rather than a fixed env var.
+    """
+    exempt = {"custom"}
+    for d in provider_catalog():
+        if d.auth_type == "api_key" and d.slug not in exempt:
+            assert d.api_key_env_vars, f"{d.slug} is api_key but exposes no env var"
+
+
+def test_order_mirrors_canonical_declaration():
+    cat = provider_catalog()
+    assert [d.order for d in cat] == list(range(len(cat)))
+    assert [d.slug for d in cat] == [e.slug for e in CANONICAL_PROVIDERS]
+
+
+def test_descriptors_are_provider_descriptor_instances():
+    for d in provider_catalog():
+        assert isinstance(d, ProviderDescriptor)
+
+
+def test_tab_for_auth_type_helper():
+    assert tab_for_auth_type("api_key") == "keys"
+    assert tab_for_auth_type("aws_sdk") == "keys"
+    assert tab_for_auth_type("oauth_external") == "accounts"
+    assert tab_for_auth_type("oauth_device_code") == "accounts"
+    assert tab_for_auth_type("copilot") == "accounts"
+    assert tab_for_auth_type("external_process") == "accounts"
diff --git a/tests/hermes_cli/test_provider_parity.py b/tests/hermes_cli/test_provider_parity.py
new file mode 100644
index 00000000000..0f49f260e71
--- /dev/null
+++ b/tests/hermes_cli/test_provider_parity.py
@@ -0,0 +1,90 @@
+"""End-to-end provider parity contract: the desktop Providers tabs must show
+the SAME provider universe as ``hermes model`` (the CLI/TUI picker).
+
+This is the single load-bearing invariant of the unified provider catalog:
+
+    keys(/api/env provider rows) ∪ ids(/api/providers/oauth) ⊇ CANONICAL_PROVIDERS
+
+i.e. every provider the CLI picker offers is configurable from the desktop app,
+on one of the two Providers sub-tabs (API keys or Accounts). It is asserted as
+an invariant against the real FastAPI endpoints (not a snapshot / count), so it
+can never silently drift again when a provider plugin is added.
+"""
+
+from fastapi.testclient import TestClient
+
+from hermes_cli.models import CANONICAL_PROVIDERS
+from hermes_cli.provider_catalog import provider_catalog
+from hermes_cli.web_server import _SESSION_TOKEN, app
+
+client = TestClient(app)
+HEADERS = {"X-Hermes-Session-Token": _SESSION_TOKEN}
+
+# `custom` is the bring-your-own-endpoint pseudo-provider configured inline via
+# the model picker's local-endpoint flow, not a fixed credential card. It is in
+# the CLI picker's universe but intentionally has no dedicated Providers-tab
+# card. Exempt it from the union check.
+_EXEMPT = {"custom"}
+
+# Providers that legitimately offer BOTH auth methods and so intentionally
+# appear on both desktop tabs (an API-key card AND an account sign-in card).
+# Anthropic supports a direct API key (Keys tab) and a subscription OAuth /
+# Claude Code login (Accounts tab); surfacing both is correct, not a bug.
+_DUAL_TAB = {"anthropic"}
+
+
+def _keys_tab_providers() -> set[str]:
+    """Provider slugs that have at least one card on the desktop API-keys tab."""
+    data = client.get("/api/env", headers=HEADERS).json()
+    return {
+        info.get("provider")
+        for info in data.values()
+        if info.get("category") == "provider" and info.get("provider")
+    }
+
+
+def _accounts_tab_providers() -> set[str]:
+    """Provider slugs offered on the desktop Accounts tab."""
+    data = client.get("/api/providers/oauth", headers=HEADERS).json()
+    return {p["id"] for p in data["providers"]}
+
+
+def test_every_hermes_model_provider_is_configurable_in_desktop():
+    """PARITY CONTRACT: GUI (keys ∪ accounts) ⊇ `hermes model` universe."""
+    gui = _keys_tab_providers() | _accounts_tab_providers()
+    missing = [
+        e.slug
+        for e in CANONICAL_PROVIDERS
+        if e.slug not in _EXEMPT and e.slug not in gui
+    ]
+    assert not missing, (
+        "providers shown in `hermes model` but not configurable in the desktop "
+        f"Providers tabs: {missing}"
+    )
+
+
+def test_each_provider_lands_on_the_tab_its_auth_type_dictates():
+    """A keys-tab provider must surface under /api/env; an accounts-tab provider
+    under /api/providers/oauth. Cross-checks the catalog's tab routing against
+    where each provider actually renders.
+    """
+    keys = _keys_tab_providers()
+    accounts = _accounts_tab_providers()
+    for d in provider_catalog():
+        if d.slug in _EXEMPT:
+            continue
+        if d.tab == "keys" and d.api_key_env_vars:
+            assert d.slug in keys, f"{d.slug} (keys tab) missing from /api/env"
+        elif d.tab == "accounts":
+            assert d.slug in accounts, f"{d.slug} (accounts tab) missing from /api/providers/oauth"
+
+
+def test_no_provider_appears_on_both_tabs():
+    """A provider should be configured exactly one way — not duplicated across
+    both tabs (which would confuse users about where to put credentials).
+
+    Exception: genuinely dual-auth providers (see ``_DUAL_TAB``) intentionally
+    appear on both tabs.
+    """
+    overlap = (_keys_tab_providers() & _accounts_tab_providers()) - _EXEMPT - _DUAL_TAB
+    assert not overlap, f"providers appearing on BOTH desktop tabs: {sorted(overlap)}"
diff --git a/tests/hermes_cli/test_web_oauth_dispatch.py b/tests/hermes_cli/test_web_oauth_dispatch.py
index 1d87573fe58..016cd932f58 100644
--- a/tests/hermes_cli/test_web_oauth_dispatch.py
+++ b/tests/hermes_cli/test_web_oauth_dispatch.py
@@ -470,6 +470,39 @@ def test_xai_oauth_listed_as_loopback_flow():
     assert "grok" in providers["xai-oauth"]["name"].lower()
 
 
+def test_accounts_offers_every_oauth_provider_from_catalog():
+    """PARITY CONTRACT: every accounts-tab provider in the unified catalog (the
+    `hermes model` universe) must be offered by /api/providers/oauth. This keeps
+    the desktop Accounts tab in lockstep with the CLI picker — no provider the
+    CLI can sign into may be missing from the GUI.
+    """
+    from hermes_cli.provider_catalog import provider_catalog
+
+    resp = client.get("/api/providers/oauth", headers=HEADERS)
+    assert resp.status_code == 200, resp.text
+    offered = {p["id"] for p in resp.json()["providers"]}
+    for d in provider_catalog():
+        if d.tab == "accounts":
+            assert d.slug in offered, (
+                f"{d.slug} is an accounts-tab provider in `hermes model` but is "
+                f"missing from the desktop Accounts tab (/api/providers/oauth)"
+            )
+
+
+def test_gemini_cli_and_copilot_acp_now_in_accounts():
+    """Regression: google-gemini-cli and copilot-acp were canonical providers the
+    CLI could configure, but had no Accounts card (the reported GUI/CLI drift).
+    """
+    resp = client.get("/api/providers/oauth", headers=HEADERS)
+    assert resp.status_code == 200, resp.text
+    providers = {p["id"]: p for p in resp.json()["providers"]}
+    assert "google-gemini-cli" in providers
+    assert "copilot-acp" in providers
+    # copilot-acp is managed by an external CLI: read-only card, not auto-removable.
+    assert providers["copilot-acp"]["flow"] == "external"
+    assert providers["copilot-acp"]["disconnectable"] is False
+
+
 def test_oauth_catalog_marks_external_providers_not_disconnectable():
     """External CLI credentials are visible in Accounts but cannot be removed by Hermes."""
     resp = client.get("/api/providers/oauth", headers=HEADERS)
@@ -804,3 +837,56 @@ def test_unknown_pkce_provider_rejected_cleanly():
     # 4xx — what we MUST NOT see is a 200 with claude.ai in the body.
     assert resp.status_code >= 400, resp.text
     assert "claude.ai" not in resp.text.lower()
+
+
+def test_status_falls_through_to_generic_dispatcher_for_catalog_only_provider():
+    """Accounts-tab providers with no hardcoded branch reflect REAL status.
+
+    Providers appended to the Accounts tab from the unified provider_catalog()
+    carry status_fn=None and may have no explicit branch in
+    _resolve_provider_status. Before the fallthrough they rendered permanently
+    logged-out; now they dispatch to hermes_cli.auth.get_auth_status (the
+    canonical slug dispatcher) so membership AND status both auto-extend.
+    """
+    import hermes_cli.web_server as ws
+
+    fake_status = {
+        "logged_in": True,
+        "provider": "some-future-oauth",
+        "name": "Future OAuth Provider",
+        "access_token": "sk-future-secret-token-xyz",
+        "expires_at": "2026-12-01T00:00:00Z",
+        "has_refresh_token": True,
+    }
+    with patch("hermes_cli.auth.get_auth_status", return_value=fake_status):
+        out = ws._resolve_provider_status("some-future-oauth", None)
+
+    assert out["logged_in"] is True
+    assert out["source"] == "some-future-oauth"
+    assert out["source_label"] == "Future OAuth Provider"
+    # Token is previewed, never returned whole.
+    assert out["token_preview"] and "sk-future-secret-token-xyz" not in out["token_preview"]
+    assert out["expires_at"] == "2026-12-01T00:00:00Z"
+    assert out["has_refresh_token"] is True
+
+
+def test_status_hardcoded_branch_wins_over_generic_fallback():
+    """An existing hardcoded branch (nous) is unaffected by the fallthrough."""
+    import hermes_cli.web_server as ws
+
+    with patch(
+        "hermes_cli.auth.get_nous_auth_status",
+        return_value={"logged_in": True, "portal_base_url": "https://portal.test"},
+    ):
+        out = ws._resolve_provider_status("nous", None)
+    assert out["source"] == "nous_portal"
+    assert out["source_label"] == "https://portal.test"
+
+
+def test_status_unknown_provider_degrades_to_logged_out():
+    """A provider the generic dispatcher can't resolve stays logged-out cleanly."""
+    import hermes_cli.web_server as ws
+
+    with patch("hermes_cli.auth.get_auth_status", return_value={"logged_in": False}):
+        out = ws._resolve_provider_status("totally-unknown", None)
+    assert out["logged_in"] is False
diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py
index e0ad77dfc8a..0a5319a0518 100644
--- a/tests/hermes_cli/test_web_server.py
+++ b/tests/hermes_cli/test_web_server.py
@@ -1299,6 +1299,57 @@ class TestWebServerEndpoints:
         for key, info in data.items():
             assert info["channel_managed"] is (key in channel_keys)
 
+    def test_get_env_vars_surfaces_catalog_providers(self):
+        """Every keys-tab provider in the unified catalog must appear in /api/env
+        as a provider card, even when it has no hand entry in OPTIONAL_ENV_VARS.
+
+        Regression for the GUI⇄CLI drift: openai-api, kilocode, novita,
+        tencent-tokenhub, copilot were configurable via `hermes model` but
+        invisible in the desktop Providers → API keys tab.
+        """
+        from hermes_cli.provider_catalog import provider_catalog
+
+        data = self.client.get("/api/env").json()
+        for d in provider_catalog():
+            if d.tab != "keys" or not d.api_key_env_vars:
+                continue
+            # The PRIMARY credential var must surface as this provider's card.
+            # (Shared aliases like GITHUB_TOKEN are intentionally left on their
+            # existing tool category and not hijacked — see the copilot test.)
+            primary = d.api_key_env_vars[0]
+            assert primary in data, f"{primary} ({d.slug}) missing from /api/env"
+            info = data[primary]
+            assert info["category"] == "provider"
+            assert info["provider"] == d.slug
+            assert info["provider_label"] == d.label
+
+    def test_get_env_vars_provider_rows_carry_grouping_hints(self):
+        """Provider env rows expose the backend `provider`/`provider_label` the
+        desktop Keys tab groups by (so it no longer relies on prefix guesses)."""
+        data = self.client.get("/api/env").json()
+        # OPENAI_API_KEY is a hand-listed protected var AND a catalog provider;
+        # it must come back tagged to the openai-api provider.
+        assert data["OPENAI_API_KEY"]["provider"] == "openai-api"
+        assert data["OPENAI_API_KEY"]["category"] == "provider"
+
+    def test_get_env_vars_copilot_uses_provider_token_not_shared_github_token(self):
+        """Copilot surfaces as its own provider card via COPILOT_GITHUB_TOKEN;
+        the shared GITHUB_TOKEN keeps its existing (tool) category."""
+        data = self.client.get("/api/env").json()
+        assert data["COPILOT_GITHUB_TOKEN"]["provider"] == "copilot"
+        assert data["COPILOT_GITHUB_TOKEN"]["category"] == "provider"
+        # Shared GITHUB_TOKEN must NOT be hijacked into the copilot provider card.
+        assert data.get("GITHUB_TOKEN", {}).get("provider", "") != "copilot"
+
+    def test_get_env_vars_bedrock_aws_vars_tagged_to_provider(self):
+        """Bedrock (aws_sdk, no api-key) must still appear on the Keys tab: its
+        AWS_REGION/AWS_PROFILE settings are tagged to the bedrock provider card.
+        """
+        data = self.client.get("/api/env").json()
+        assert data["AWS_REGION"]["provider"] == "bedrock"
+        assert data["AWS_REGION"]["category"] == "provider"
+        assert data["AWS_PROFILE"]["provider"] == "bedrock"
+
     def test_platform_scoped_messaging_env_vars_are_channel_managed(self):
         from hermes_cli.web_server import (
             _MESSAGING_KEYS_PAGE_KEYS,
@@ -1552,6 +1603,27 @@ class TestWebServerEndpoints:
         assert telegram["enabled"] is False
         assert any(field["key"] == "TELEGRAM_BOT_TOKEN" and field["required"] for field in telegram["env_vars"])
 
+    def test_slack_messaging_platform_exposes_user_allowlist(self):
+        resp = self.client.get("/api/messaging/platforms")
+
+        assert resp.status_code == 200
+        platforms = resp.json()["platforms"]
+        slack = next(platform for platform in platforms if platform["id"] == "slack")
+        fields = {field["key"]: field for field in slack["env_vars"]}
+
+        assert "allowed Slack member IDs" in slack["description"]
+        assert set(fields) >= {
+            "SLACK_BOT_TOKEN",
+            "SLACK_APP_TOKEN",
+            "SLACK_ALLOWED_USERS",
+        }
+        assert fields["SLACK_ALLOWED_USERS"]["prompt"] == "Allowed Slack member IDs"
+        assert fields["SLACK_ALLOWED_USERS"]["is_password"] is False
+        assert "member IDs" in fields["SLACK_ALLOWED_USERS"]["description"]
+        assert "Bot User OAuth Token" in fields["SLACK_BOT_TOKEN"]["help"]
+        assert "App-Level Tokens" in fields["SLACK_APP_TOKEN"]["help"]
+        assert "Copy member ID" in fields["SLACK_ALLOWED_USERS"]["help"]
+
     def test_weixin_messaging_metadata_describes_personal_ilink_setup(self):
         resp = self.client.get("/api/messaging/platforms")
 
@@ -1628,6 +1700,70 @@ class TestWebServerEndpoints:
         telegram = next(platform for platform in status if platform["id"] == "telegram")
         assert telegram["enabled"] is False
 
+    def test_update_messaging_platform_saves_slack_allowed_users(self):
+        from hermes_cli.config import load_env
+
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_ALLOWED_USERS": "U01ABC2DEF3,U04XYZ5LMN6"}},
+        )
+
+        assert resp.status_code == 200
+        assert load_env()["SLACK_ALLOWED_USERS"] == "U01ABC2DEF3,U04XYZ5LMN6"
+
+    def test_update_messaging_platform_rejects_swapped_slack_bot_token(self):
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_BOT_TOKEN": "xapp-wrong-token-type"}},
+        )
+
+        assert resp.status_code == 400
+        assert "xoxb-" in resp.json()["detail"]
+
+    def test_update_messaging_platform_rejects_swapped_slack_app_token(self):
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_APP_TOKEN": "xoxb-wrong-token-type"}},
+        )
+
+        assert resp.status_code == 400
+        assert "xapp-" in resp.json()["detail"]
+
+    def test_update_messaging_platform_rejects_invalid_slack_allowed_users(self):
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_ALLOWED_USERS": "U01ABC2DEF3,not-a-user"}},
+        )
+
+        assert resp.status_code == 400
+        assert "member IDs" in resp.json()["detail"]
+
+    def test_update_messaging_platform_accepts_slack_allowed_users_wildcard(self):
+        # "*" is the gateway's allow-all wildcard (gateway/platforms/slack.py),
+        # so the dashboard must accept it rather than rejecting it as malformed.
+        from hermes_cli.config import load_env
+
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_ALLOWED_USERS": "*"}},
+        )
+
+        assert resp.status_code == 200
+        assert load_env()["SLACK_ALLOWED_USERS"] == "*"
+
+    def test_update_messaging_platform_accepts_slack_allowed_users_trailing_comma(self):
+        # The gateway drops empty entries (gateway/platforms/slack.py), so a
+        # trailing/interior comma must not be rejected by the dashboard.
+        from hermes_cli.config import load_env
+
+        resp = self.client.put(
+            "/api/messaging/platforms/slack",
+            json={"env": {"SLACK_ALLOWED_USERS": "U01ABC2DEF3,,W04XYZ5LMN6,"}},
+        )
+
+        assert resp.status_code == 200
+        assert load_env()["SLACK_ALLOWED_USERS"] == "U01ABC2DEF3,,W04XYZ5LMN6,"
+
     def test_messaging_platform_test_reports_missing_required_setup(self):
         resp = self.client.put("/api/messaging/platforms/discord", json={"enabled": True})
         assert resp.status_code == 200
@@ -5062,6 +5198,7 @@ class TestPtyWebSocket:
 
         _argv, _cwd, env = self.ws_module._resolve_chat_argv()
 
+        assert env["HERMES_TUI_DASHBOARD"] == "1"
         assert env["HERMES_TUI_INLINE"] == "1"
         assert env["HERMES_TUI_DISABLE_MOUSE"] == "1"
 
diff --git a/tests/hermes_cli/test_web_server_files.py b/tests/hermes_cli/test_web_server_files.py
index 46ba18b1355..b295f0ab998 100644
--- a/tests/hermes_cli/test_web_server_files.py
+++ b/tests/hermes_cli/test_web_server_files.py
@@ -436,3 +436,55 @@ def test_stream_upload_large_file_under_cap_succeeds(forced_files_client, monkey
     assert created.status_code == 200
     assert file_path.stat().st_size == len(payload)
     assert file_path.read_bytes() == payload
+
+
+def test_stream_upload_cleans_temp_on_cancellation(forced_files_client):
+    """A client disconnect mid-stream (asyncio.CancelledError) must not leak a temp file.
+
+    CancelledError is a BaseException, not an Exception, so it bypasses the
+    endpoint's ``except`` clauses entirely. The cleanup therefore lives in a
+    ``finally`` keyed on a success flag — without it, every aborted large
+    upload (the exact NS-501 scenario) would orphan a partial ``.upload`` temp
+    file in the target directory. We invoke the endpoint coroutine directly so
+    the BaseException propagates instead of being swallowed by the test client.
+    """
+    import asyncio
+
+    _client, root = forced_files_client
+    target = root / "out" / "aborted.bin"
+    target.parent.mkdir(parents=True, exist_ok=True)
+
+    class _AbortingUpload:
+        """UploadFile stand-in that yields one chunk then aborts like a dropped client."""
+
+        filename = "aborted.bin"
+
+        def __init__(self):
+            self._calls = 0
+
+        async def read(self, _size):
+            self._calls += 1
+            if self._calls == 1:
+                return b"partial chunk before the client vanished"
+            raise asyncio.CancelledError()
+
+        async def close(self):
+            return None
+
+    request = SimpleNamespace()
+
+    with pytest.raises(asyncio.CancelledError):
+        asyncio.run(
+            web_server.upload_managed_file_stream(
+                request=request,
+                file=_AbortingUpload(),
+                path=str(target),
+                overwrite=True,
+            )
+        )
+
+    # No partial data was promoted into place ...
+    assert not target.exists()
+    # ... and no .upload temp file was left behind.
+    leftovers = [p.name for p in target.parent.iterdir() if ".upload" in p.name]
+    assert leftovers == [], f"temp upload files leaked on cancellation: {leftovers}"
diff --git a/tests/openviking_plugin/test_openviking.py b/tests/openviking_plugin/test_openviking.py
index f10fc502000..171e6abc8ac 100644
--- a/tests/openviking_plugin/test_openviking.py
+++ b/tests/openviking_plugin/test_openviking.py
@@ -265,6 +265,355 @@ class TestOpenVikingSkillQuerySafety:
         assert RecordingVikingClient.calls == []
 
 
+class TestOpenVikingTurnConversion:
+    def test_extract_current_turn_anchors_on_latest_matching_user_and_assistant(self):
+        messages = [
+            {"role": "user", "content": "Please inspect the repository for assemble hooks."},
+            {"role": "assistant", "content": "Earlier answer."},
+            {"role": "user", "content": "Please inspect the repository for assemble hooks."},
+            {
+                "role": "assistant",
+                "content": "I will search the codebase.",
+                "tool_calls": [
+                    {
+                        "id": "call_rg_1",
+                        "type": "function",
+                        "function": {
+                            "name": "shell_command",
+                            "arguments": json.dumps({"command": "rg assemble"}),
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_rg_1",
+                "name": "shell_command",
+                "content": "agent/context_engine.py: no preassemble hook",
+            },
+            {"role": "assistant", "content": "The current main does not expose assemble."},
+        ]
+
+        turn = OpenVikingMemoryProvider._extract_current_turn_messages(
+            messages,
+            "Please inspect the repository for assemble hooks.",
+            "The current main does not expose assemble.",
+        )
+
+        assert turn == messages[2:]
+
+    def test_messages_to_openviking_batch_coalesces_tool_results(self):
+        turn = [
+            {"role": "user", "content": "Please inspect the repository for assemble hooks."},
+            {
+                "role": "assistant",
+                "content": "I will search the codebase.",
+                "tool_calls": [
+                    {
+                        "id": "call_rg_1",
+                        "type": "function",
+                        "function": {
+                            "name": "shell_command",
+                            "arguments": json.dumps({"command": "rg assemble"}),
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_rg_1",
+                "name": "shell_command",
+                "content": "agent/context_engine.py: no preassemble hook",
+            },
+            {"role": "assistant", "content": "The current main does not expose assemble."},
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        assert [message["role"] for message in batch] == ["user", "assistant", "assistant", "assistant"]
+        assert batch[0]["parts"] == [
+            {"type": "text", "text": "Please inspect the repository for assemble hooks."}
+        ]
+        assert batch[1]["parts"] == [
+            {"type": "text", "text": "I will search the codebase."}
+        ]
+        assert batch[2]["parts"] == [
+            {
+                "type": "tool",
+                "tool_id": "call_rg_1",
+                "tool_name": "shell_command",
+                "tool_input": {"command": "rg assemble"},
+                "tool_output": "agent/context_engine.py: no preassemble hook",
+                "tool_status": "completed",
+            }
+        ]
+        assert batch[3]["parts"] == [
+            {"type": "text", "text": "The current main does not expose assemble."}
+        ]
+
+    def test_messages_to_openviking_batch_marks_json_tool_error_results(self):
+        turn = [
+            {"role": "user", "content": "Check the file."},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_read_1",
+                        "type": "function",
+                        "function": {
+                            "name": "read_file",
+                            "arguments": json.dumps({"path": "missing.md"}),
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_read_1",
+                "name": "read_file",
+                "content": json.dumps({"error": "File not found", "exit_code": 1}),
+            },
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        assert batch[1]["role"] == "assistant"
+        assert batch[1]["parts"] == [
+            {
+                "type": "tool",
+                "tool_id": "call_read_1",
+                "tool_name": "read_file",
+                "tool_input": {"path": "missing.md"},
+                "tool_output": json.dumps({"error": "File not found", "exit_code": 1}),
+                "tool_status": "error",
+            }
+        ]
+
+    def test_messages_to_openviking_batch_keeps_pending_tool_call_without_result(self):
+        turn = [
+            {"role": "user", "content": "Start a long running check."},
+            {
+                "role": "assistant",
+                "content": "Starting it now.",
+                "tool_calls": [
+                    {
+                        "id": "call_long_1",
+                        "type": "function",
+                        "function": {
+                            "name": "long_check",
+                            "arguments": json.dumps({"target": "repo"}),
+                        },
+                    }
+                ],
+            },
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        assert batch[1]["parts"] == [
+            {"type": "text", "text": "Starting it now."},
+            {
+                "type": "tool",
+                "tool_id": "call_long_1",
+                "tool_name": "long_check",
+                "tool_input": {"target": "repo"},
+                "tool_status": "pending",
+            },
+        ]
+
+    def test_messages_to_openviking_batch_coalesces_adjacent_tool_results(self):
+        turn = [
+            {"role": "user", "content": "Run both tools."},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "call_a",
+                        "type": "function",
+                        "function": {
+                            "name": "first_tool",
+                            "arguments": json.dumps({"x": 1}),
+                        },
+                    },
+                    {
+                        "id": "call_b",
+                        "type": "function",
+                        "function": {
+                            "name": "second_tool",
+                            "arguments": json.dumps({"y": 2}),
+                        },
+                    },
+                ],
+            },
+            {"role": "tool", "tool_call_id": "call_a", "name": "first_tool", "content": "a"},
+            {"role": "tool", "tool_call_id": "call_b", "name": "second_tool", "content": "b"},
+            {"role": "assistant", "content": "Done."},
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        assert [message["role"] for message in batch] == ["user", "assistant", "assistant"]
+        assert batch[1]["parts"] == [
+            {
+                "type": "tool",
+                "tool_id": "call_a",
+                "tool_name": "first_tool",
+                "tool_input": {"x": 1},
+                "tool_output": "a",
+                "tool_status": "completed",
+            },
+            {
+                "type": "tool",
+                "tool_id": "call_b",
+                "tool_name": "second_tool",
+                "tool_input": {"y": 2},
+                "tool_output": "b",
+                "tool_status": "completed",
+            },
+        ]
+
+    def test_messages_to_openviking_batch_skips_openviking_recall_tool_results(self):
+        for recall_tool_name in ("viking_search", "viking_read", "viking_browse"):
+            turn = [
+                {"role": "user", "content": "What did we decide about context assembly?"},
+                {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [
+                        {
+                            "id": "call_recall_1",
+                            "type": "function",
+                            "function": {
+                                "name": recall_tool_name,
+                                "arguments": json.dumps({"query": "context assembly decision"}),
+                            },
+                        },
+                        {
+                            "id": "call_shell_1",
+                            "type": "function",
+                            "function": {
+                                "name": "shell_command",
+                                "arguments": json.dumps({"command": "rg preassemble"}),
+                            },
+                        },
+                    ],
+                },
+                {
+                    "role": "tool",
+                    "tool_call_id": "call_recall_1",
+                    "name": recall_tool_name,
+                    "content": json.dumps({
+                        "results": [
+                            {
+                                "uri": "viking://user/hermes/memories/context",
+                                "abstract": "Old OpenViking memory content",
+                            }
+                        ]
+                    }),
+                },
+                {
+                    "role": "tool",
+                    "tool_call_id": "call_shell_1",
+                    "name": "shell_command",
+                    "content": "plugins/memory/openviking/__init__.py",
+                },
+                {"role": "assistant", "content": "We decided to keep sync_turn scoped to ingestion."},
+            ]
+
+            batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+            assert [message["role"] for message in batch] == ["user", "assistant", "assistant"]
+            assert batch[1]["parts"] == [
+                {
+                    "type": "tool",
+                    "tool_id": "call_shell_1",
+                    "tool_name": "shell_command",
+                    "tool_input": {"command": "rg preassemble"},
+                    "tool_output": "plugins/memory/openviking/__init__.py",
+                    "tool_status": "completed",
+                }
+            ]
+            batch_text = json.dumps(batch)
+            assert recall_tool_name not in batch_text
+            assert "Old OpenViking memory content" not in batch_text
+
+    def test_messages_to_openviking_batch_empty_tool_id_does_not_drop_other_results(self):
+        # A recall tool result that arrives with an empty tool_call_id must not
+        # poison the skip set with "" and silently drop unrelated tool results
+        # that also lack an id. Empty tool_call_id is reachable in the canonical
+        # transcript (agent_runtime_helpers defaults it to "").
+        turn = [
+            {"role": "user", "content": "What did we decide?"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {
+                        "id": "",
+                        "type": "function",
+                        "function": {
+                            "name": "viking_search",
+                            "arguments": json.dumps({"query": "decision"}),
+                        },
+                    }
+                ],
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "",
+                "name": "viking_search",
+                "content": json.dumps({"results": ["recall stuff"]}),
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "",
+                "name": "shell_command",
+                "content": "important shell output",
+            },
+            {"role": "assistant", "content": "done"},
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        batch_text = json.dumps(batch)
+        # The unrelated (empty-id) shell result must survive.
+        assert "important shell output" in batch_text
+        # The recall tool result must still be excluded.
+        assert "recall stuff" not in batch_text
+        assert "viking_search" not in batch_text
+
+    def test_messages_to_openviking_batch_preserves_responses_text_parts(self):
+        turn = [
+            {"role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+            {"role": "assistant", "content": [{"type": "output_text", "text": "answer"}]},
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(turn)
+
+        assert batch == [
+            {"role": "user", "parts": [{"type": "text", "text": "hello"}]},
+            {"role": "assistant", "parts": [{"type": "text", "text": "answer"}]},
+        ]
+
+    def test_messages_to_openviking_batch_adds_assistant_peer_id_when_requested(self):
+        turn = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "answer"},
+        ]
+
+        batch = OpenVikingMemoryProvider._messages_to_openviking_batch(
+            turn,
+            assistant_peer_id="hermes",
+        )
+
+        assert batch == [
+            {"role": "user", "parts": [{"type": "text", "text": "hello"}]},
+            {"role": "assistant", "parts": [{"type": "text", "text": "answer"}], "peer_id": "hermes"},
+        ]
+
+
 class TestOpenVikingRead:
     def test_overview_read_normalizes_uri_and_unwraps_result(self):
         provider = OpenVikingMemoryProvider()
diff --git a/tests/plugins/memory/test_hindsight_provider.py b/tests/plugins/memory/test_hindsight_provider.py
index bbcb151baa9..5cd485d4c1a 100644
--- a/tests/plugins/memory/test_hindsight_provider.py
+++ b/tests/plugins/memory/test_hindsight_provider.py
@@ -83,6 +83,66 @@ def _make_mock_client():
     return client
 
 
+def _provider_for_mode(tmp_path, monkeypatch, mode: str):
+    """Create an initialized provider without pre-seeding its client."""
+    config = {
+        "mode": mode,
+        "apiKey": "test-key",
+        "api_url": "http://localhost:9999",
+        "bank_id": "test-bank",
+        "budget": "mid",
+        "memory_mode": "hybrid",
+    }
+    config_path = tmp_path / "hindsight" / "config.json"
+    config_path.parent.mkdir(parents=True, exist_ok=True)
+    config_path.write_text(json.dumps(config))
+
+    monkeypatch.setattr(
+        "plugins.memory.hindsight.get_hermes_home", lambda: tmp_path
+    )
+
+    provider = HindsightMemoryProvider()
+    provider.initialize(session_id="test-session", hermes_home=str(tmp_path), platform="cli")
+    return provider
+
+
+def _assert_cloud_client_lazy_installed_before_import(tmp_path, monkeypatch, mode: str):
+    """Cloud/local-external clients must ensure lazy deps before importing."""
+    import builtins
+
+    provider = _provider_for_mode(tmp_path, monkeypatch, mode)
+    ensure_calls = []
+
+    def fake_ensure(feature, prompt=True):
+        ensure_calls.append((feature, prompt))
+
+    class FakeHindsight:
+        def __init__(self, **kwargs):
+            self.kwargs = kwargs
+
+    real_import = builtins.__import__
+
+    def guarded_import(name, globals=None, locals=None, fromlist=(), level=0):
+        if name == "hindsight_client":
+            if ensure_calls != [("memory.hindsight", False)]:
+                raise ModuleNotFoundError("No module named 'hindsight_client'")
+            return SimpleNamespace(Hindsight=FakeHindsight)
+        return real_import(name, globals, locals, fromlist, level)
+
+    monkeypatch.setattr("tools.lazy_deps.ensure", fake_ensure)
+    monkeypatch.setattr(builtins, "__import__", guarded_import)
+
+    client = provider._get_client()
+
+    assert ensure_calls == [("memory.hindsight", False)]
+    assert isinstance(client, FakeHindsight)
+    assert client.kwargs == {
+        "base_url": "http://localhost:9999",
+        "timeout": 120.0,
+        "api_key": "test-key",
+    }
+
+
 class _FakeSessionDB:
     def __init__(self, messages=None):
         self._messages = list(messages or [])
@@ -232,6 +292,14 @@ class TestSchemas:
 
 
 class TestConfig:
+    def test_cloud_client_lazy_installs_dependency_before_import(self, tmp_path, monkeypatch):
+        _assert_cloud_client_lazy_installed_before_import(tmp_path, monkeypatch, "cloud")
+
+    def test_local_external_client_lazy_installs_dependency_before_import(self, tmp_path, monkeypatch):
+        _assert_cloud_client_lazy_installed_before_import(
+            tmp_path, monkeypatch, "local_external"
+        )
+
     def test_default_values(self, provider):
         assert provider._auto_retain is True
         assert provider._auto_recall is True
diff --git a/tests/plugins/memory/test_openviking_provider.py b/tests/plugins/memory/test_openviking_provider.py
index 954385fa54e..28f2d8e9d46 100644
--- a/tests/plugins/memory/test_openviking_provider.py
+++ b/tests/plugins/memory/test_openviking_provider.py
@@ -1975,7 +1975,10 @@ def test_on_session_switch_commits_old_session_and_rotates_id():
 
     provider.on_session_switch("new-sid", parent_session_id="old-sid")
 
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
     assert provider._session_id == "new-sid"
     assert provider._turn_count == 0
 
@@ -1998,7 +2001,10 @@ def test_on_session_switch_commits_pending_tokens_without_turn_count():
     provider.on_session_switch("new-sid")
 
     provider._client.get.assert_called_once_with("/api/v1/sessions/old-sid")
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
     assert provider._session_id == "new-sid"
     assert provider._turn_count == 0
 
@@ -2051,7 +2057,10 @@ def test_on_session_switch_waits_for_inflight_sync_thread():
     provider.on_session_switch("new-sid")
 
     assert join_calls, "expected on_session_switch to join the in-flight sync thread"
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
 
 
 def test_on_session_switch_noop_on_empty_new_id():
@@ -2186,6 +2195,78 @@ def test_sync_turn_retries_batch_write_with_fresh_client():
     )]
 
 
+def test_sync_turn_structured_messages_include_assistant_peer_id():
+    provider = OpenVikingMemoryProvider()
+    provider._client = MagicMock()
+    provider._endpoint = "http://test"
+    provider._api_key = ""
+    provider._account = "acct"
+    provider._user = "usr"
+    provider._agent = "hermes"
+    provider._session_id = "sid-structured"
+
+    captured = []
+
+    class StubClient:
+        def __init__(self, *a, **kw):
+            pass
+
+        def post(self, path, payload=None, **kwargs):
+            captured.append((path, payload))
+            return {}
+
+    import plugins.memory.openviking as _mod
+
+    real_client_cls = _mod._VikingClient
+    _mod._VikingClient = StubClient
+    messages = [
+        {"role": "user", "content": [{"type": "input_text", "text": "u"}]},
+        {
+            "role": "assistant",
+            "content": "Looking.",
+            "tool_calls": [
+                {
+                    "id": "call-1",
+                    "type": "function",
+                    "function": {"name": "shell_command", "arguments": json.dumps({"cmd": "pwd"})},
+                }
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call-1", "name": "shell_command", "content": "ok"},
+        {"role": "assistant", "content": [{"type": "output_text", "text": "a"}]},
+    ]
+    try:
+        provider.sync_turn("u", "a", messages=messages)
+        assert provider._drain_writers("sid-structured", timeout=2.0)
+    finally:
+        _mod._VikingClient = real_client_cls
+
+    assert captured == [(
+        "/api/v1/sessions/sid-structured/messages/batch",
+        {
+            "messages": [
+                {"role": "user", "parts": [{"type": "text", "text": "u"}]},
+                {"role": "assistant", "parts": [{"type": "text", "text": "Looking."}], "peer_id": "hermes"},
+                {
+                    "role": "assistant",
+                    "parts": [
+                        {
+                            "type": "tool",
+                            "tool_id": "call-1",
+                            "tool_name": "shell_command",
+                            "tool_input": {"cmd": "pwd"},
+                            "tool_output": "ok",
+                            "tool_status": "completed",
+                        }
+                    ],
+                    "peer_id": "hermes",
+                },
+                {"role": "assistant", "parts": [{"type": "text", "text": "a"}], "peer_id": "hermes"},
+            ]
+        },
+    )]
+
+
 def test_sync_turn_noop_when_session_id_blank():
     provider = OpenVikingMemoryProvider()
     provider._client = MagicMock()
@@ -2206,7 +2287,10 @@ def test_on_session_end_marks_session_clean_after_successful_commit():
 
     provider.on_session_end([])
 
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
     assert provider._turn_count == 0
 
 
@@ -2228,7 +2312,10 @@ def test_on_session_end_commits_pending_tokens_without_turn_count():
     provider.on_session_end([])
 
     provider._client.get.assert_called_once_with("/api/v1/sessions/old-sid")
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
 
 
 def test_end_then_switch_does_not_double_commit():
@@ -2241,7 +2328,10 @@ def test_end_then_switch_does_not_double_commit():
     provider.on_session_switch("new-sid", parent_session_id="old-sid")
 
     # Exactly one commit call, on the OLD session, fired by on_session_end.
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
     assert provider._session_id == "new-sid"
     assert provider._turn_count == 0
 
@@ -2253,7 +2343,10 @@ def test_end_then_switch_with_pending_tokens_does_not_double_commit():
     provider.on_session_end([])
     provider.on_session_switch("new-sid", parent_session_id="old-sid")
 
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
     assert provider._session_id == "new-sid"
     assert provider._turn_count == 0
 
@@ -2400,7 +2493,10 @@ def test_on_session_switch_does_not_block_caller_on_slow_drain():
     # Let the finalizer finish so it doesn't leak past the test.
     release_drain.set()
     assert provider._drain_finalizers(timeout=5.0)
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
 
 
 def test_on_session_switch_defers_old_commit_to_finalizer_thread():
@@ -2415,7 +2511,7 @@ def test_on_session_switch_defers_old_commit_to_finalizer_thread():
     committed = threading.Event()
     drain_timeouts = []
 
-    def fake_post(path):
+    def fake_post(path, payload=None):
         committed.set()
         return {}
 
@@ -2433,7 +2529,10 @@ def test_on_session_switch_defers_old_commit_to_finalizer_thread():
     assert provider._turn_count == 0
     # The old-session commit lands on the finalizer thread, not inline.
     assert committed.wait(timeout=5.0), "old session was not finalized off-thread"
-    provider._client.post.assert_called_once_with("/api/v1/sessions/old-sid/commit")
+    provider._client.post.assert_called_once_with(
+        "/api/v1/sessions/old-sid/commit",
+        {"keep_recent_count": 0},
+    )
     # The finalizer drains with the deferred (longer) budget, not inline 10s.
     assert drain_timeouts == [_DEFERRED_COMMIT_TIMEOUT]
 
diff --git a/tests/run_agent/test_codex_app_server_integration.py b/tests/run_agent/test_codex_app_server_integration.py
index 14c058178b9..b0d2ec23861 100644
--- a/tests/run_agent/test_codex_app_server_integration.py
+++ b/tests/run_agent/test_codex_app_server_integration.py
@@ -12,7 +12,7 @@ Verifies that:
 
 from __future__ import annotations
 
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 
@@ -148,6 +148,17 @@ class TestRunConversationCodexPath:
                  and m.get("content") == "echo: hello"]
         assert final, f"expected final assistant message in {msgs}"
 
+    def test_projected_messages_are_synced_to_external_memory(self, fake_session):
+        agent = _make_codex_agent()
+        agent._memory_manager = MagicMock()
+        agent._memory_manager.build_system_prompt.return_value = ""
+
+        with patch.object(agent, "_spawn_background_review", return_value=None):
+            result = agent.run_conversation("hello")
+
+        agent._memory_manager.sync_all.assert_called_once()
+        assert agent._memory_manager.sync_all.call_args.kwargs["messages"] == result["messages"]
+
     def test_nudge_counters_tick(self, fake_session):
         """The skill nudge counter must accumulate tool_iterations across
         turns. The memory nudge counter is gated on memory being configured
diff --git a/tests/run_agent/test_nonretryable_error_html_summary.py b/tests/run_agent/test_nonretryable_error_html_summary.py
new file mode 100644
index 00000000000..db765b124f3
--- /dev/null
+++ b/tests/run_agent/test_nonretryable_error_html_summary.py
@@ -0,0 +1,130 @@
+"""Regression: non-retryable API failures must not leak raw HTML pages.
+
+A scheduled cron job fell back to the Codex (``chatgpt.com``) provider, which
+returned a Cloudflare *challenge* page (HTTP 403) instead of a normal API
+response.  The conversation loop classified this as a non-retryable client
+error and returned the failure dict — but the ``error`` field carried
+``str(api_error)``, i.e. the entire ~60 KB Cloudflare HTML page.  The cron
+scheduler then delivered that verbatim to Discord, where it was split into
+~31 messages (the reporter's "31 part discord message which is cloudflares
+challenge page").
+
+The sibling "max retries exhausted" path already summarized the error via
+``_summarize_api_error`` (which collapses HTML pages to a one-liner); the
+non-retryable path did not.  These tests lock the contract: whichever
+terminal path is taken, ``result['error']`` is a short, HTML-free summary.
+"""
+
+from unittest.mock import MagicMock, patch
+
+import run_agent
+from run_agent import AIAgent
+
+
+# A representative Cloudflare "managed challenge" body, matching the shape the
+# Codex backend returned in the field report (no <title>, large inline
+# ``_cf_chl_opt`` script).  Padded so length-based assertions are meaningful.
+_CLOUDFLARE_CHALLENGE_HTML = (
+    "<!DOCTYPE html>\n<html>\n  <head>\n"
+    '    <meta http-equiv="refresh" content="360"></head>\n'
+    "  <body>\n    <div class=\"data\"><noscript>"
+    "Enable JavaScript and cookies to continue</noscript>"
+    "<script>(function(){window._cf_chl_opt = {cRay: 'a0ca002c4f91769c',"
+    "cZone: 'chatgpt.com', cType: 'managed', "
+    + ("md: '" + "x" * 4000 + "',")
+    + "};})();</script></div>\n  </body>\n</html>\n"
+)
+
+
+def _make_403_html_error() -> Exception:
+    """An exception mimicking a Codex 403 whose body is a Cloudflare page."""
+    err = Exception(_CLOUDFLARE_CHALLENGE_HTML)
+    err.status_code = 403
+    return err
+
+
+def _make_agent() -> AIAgent:
+    # Drive the standard chat-completions path with a concrete model so the
+    # turn actually reaches ``client.chat.completions.create`` — that is where
+    # the mocked 403 is raised.  The non-retryable abort being exercised lives
+    # in the shared conversation loop and is provider-agnostic; a Cloudflare
+    # "managed challenge" 403 can surface on any provider sitting behind
+    # Cloudflare (it was first reported on the Codex backend).  Pinning
+    # ``api_mode`` + ``model`` here avoids the earlier abort the previous
+    # revision hit: an empty model on the Codex Responses path raised a
+    # validation ``ValueError`` *before* any API call, so the test passed
+    # without ever touching the 403 summarization path.
+    with (
+        patch("run_agent.get_tool_definitions", return_value=[]),
+        patch("run_agent.check_toolset_requirements", return_value={}),
+        patch("run_agent.OpenAI"),
+    ):
+        a = AIAgent(
+            api_key="test-key-1234567890",
+            base_url="https://api.openai.com/v1",
+            provider="openai",
+            api_mode="chat_completions",
+            model="gpt-5.5",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+    a.client = MagicMock()
+    a._cached_system_prompt = "You are helpful."
+    a._use_prompt_caching = False
+    a.tool_delay = 0
+    a.compression_enabled = False
+    a.save_trajectories = False
+    return a
+
+
+def test_summarize_collapses_cloudflare_challenge_page():
+    """``_summarize_api_error`` must never echo the raw HTML body."""
+    summary = AIAgent._summarize_api_error(_make_403_html_error())
+
+    assert "<html" not in summary.lower()
+    assert "<!doctype" not in summary.lower()
+    assert "_cf_chl_opt" not in summary
+    # A one-liner, not a multi-kilobyte page.
+    assert len(summary) < 200
+    # Still informative: the HTTP status survives.
+    assert "403" in summary
+
+
+def test_non_retryable_failure_error_is_summarized_not_raw_html():
+    """The terminal non-retryable dict must carry a short, HTML-free error.
+
+    This is the exact field path: a 403 Cloudflare challenge with no fallback
+    configured aborts as a non-retryable client error.  Before the fix the
+    returned ``error`` was the full ~60 KB page.
+
+    The mocked 403 is the *only* failure the turn can hit — the agent reaches
+    ``client.chat.completions.create`` (asserted below), so the test cannot
+    pass vacuously by aborting on some earlier, unrelated error.
+    """
+    agent = _make_agent()
+    agent.client.chat.completions.create.side_effect = _make_403_html_error()
+
+    with (
+        patch.object(agent, "_persist_session"),
+        patch.object(agent, "_save_trajectory"),
+        patch.object(agent, "_cleanup_task_resources"),
+    ):
+        result = agent.run_conversation("daily briefing please")
+
+    # Guard against a vacuous pass: the mocked 403 must actually be the
+    # failure that aborted the turn.  (The previous revision never reached
+    # this call and still "passed".)
+    assert agent.client.chat.completions.create.called
+    assert result.get("failed") is True
+    error = result.get("error") or ""
+    # The whole point of the fix: no raw HTML / Cloudflare markup leaks.
+    assert "<html" not in error.lower()
+    assert "<!doctype" not in error.lower()
+    assert "_cf_chl_opt" not in error
+    # Still informative: the summarized 403 status survives into the field
+    # delivered downstream.
+    assert "403" in error
+    # The original page was tens of kilobytes; a summary is short.
+    assert len(error) < 500
+    assert len(error) < len(_CLOUDFLARE_CHALLENGE_HTML)
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index f2787628d4d..385a296f889 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -5813,12 +5813,126 @@ class TestAnthropicCredentialRefresh:
 
         response = SimpleNamespace(content=[])
         agent._anthropic_client = MagicMock()
-        agent._anthropic_client.messages.create.return_value = response
+        stream_cm = MagicMock()
+        stream_cm.__enter__.return_value.get_final_message.return_value = response
+        agent._anthropic_client.messages.stream.return_value = stream_cm
 
         with patch.object(agent, "_try_refresh_anthropic_client_credentials", return_value=True) as refresh:
             result = agent._anthropic_messages_create({"model": "claude-sonnet-4-20250514"})
 
         refresh.assert_called_once_with()
+        agent._anthropic_client.messages.stream.assert_called_once_with(model="claude-sonnet-4-20250514")
+        agent._anthropic_client.messages.create.assert_not_called()
+        assert result is response
+
+    def test_anthropic_messages_create_falls_back_when_stream_unavailable(self):
+        with (
+            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+        ):
+            agent = AIAgent(
+                api_key="sk-ant-oat01-current-token",
+                base_url="https://openrouter.ai/api/v1",
+                api_mode="anthropic_messages",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+
+        response = SimpleNamespace(content=[])
+        agent._anthropic_client = MagicMock()
+        agent._anthropic_client.messages.stream.side_effect = RuntimeError(
+            "stream is not supported by this provider"
+        )
+        agent._anthropic_client.messages.create.return_value = response
+
+        with patch.object(agent, "_try_refresh_anthropic_client_credentials", return_value=False):
+            result = agent._anthropic_messages_create({"model": "claude-sonnet-4-20250514"})
+
+        agent._anthropic_client.messages.stream.assert_called_once_with(model="claude-sonnet-4-20250514")
+        agent._anthropic_client.messages.create.assert_called_once_with(model="claude-sonnet-4-20250514")
+        assert result is response
+
+    def test_anthropic_messages_create_honors_disable_streaming(self):
+        with (
+            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+        ):
+            agent = AIAgent(
+                api_key="sk-ant-oat01-current-token",
+                base_url="https://openrouter.ai/api/v1",
+                api_mode="anthropic_messages",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+
+        response = SimpleNamespace(content=[])
+        agent._disable_streaming = True
+        agent._anthropic_client = MagicMock()
+        agent._anthropic_client.messages.create.return_value = response
+
+        with patch.object(agent, "_try_refresh_anthropic_client_credentials", return_value=False):
+            result = agent._anthropic_messages_create({"model": "claude-sonnet-4-20250514"})
+
+        agent._anthropic_client.messages.stream.assert_not_called()
+        agent._anthropic_client.messages.create.assert_called_once_with(model="claude-sonnet-4-20250514")
+        assert result is response
+
+    def test_anthropic_messages_create_does_not_mask_bedrock_stream_validation_errors(self):
+        with (
+            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+        ):
+            agent = AIAgent(
+                api_key="sk-ant-oat01-current-token",
+                base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
+                api_mode="anthropic_messages",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+
+        exc = RuntimeError("ValidationException: InvokeModelWithResponseStream input malformed")
+        agent._anthropic_client = MagicMock()
+        agent._anthropic_client.messages.stream.side_effect = exc
+
+        with (
+            patch.object(agent, "_try_refresh_anthropic_client_credentials", return_value=False),
+            pytest.raises(RuntimeError, match="input malformed"),
+        ):
+            agent._anthropic_messages_create({"model": "claude-sonnet-4-20250514"})
+
+        agent._anthropic_client.messages.create.assert_not_called()
+
+    def test_anthropic_messages_create_falls_back_for_bedrock_stream_access_denied(self):
+        with (
+            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+        ):
+            agent = AIAgent(
+                api_key="sk-ant-oat01-current-token",
+                base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
+                api_mode="anthropic_messages",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+
+        response = SimpleNamespace(content=[])
+        agent._anthropic_client = MagicMock()
+        agent._anthropic_client.messages.stream.side_effect = RuntimeError(
+            "User is not authorized to perform: bedrock:InvokeModelWithResponseStream"
+        )
+        agent._anthropic_client.messages.create.return_value = response
+
+        with patch.object(agent, "_try_refresh_anthropic_client_credentials", return_value=False):
+            result = agent._anthropic_messages_create({"model": "claude-sonnet-4-20250514"})
+
         agent._anthropic_client.messages.create.assert_called_once_with(model="claude-sonnet-4-20250514")
         assert result is response
 
diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py
index e4650ed5dc7..1d727132a8c 100644
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -2065,6 +2065,89 @@ class TestSessionTitle:
         assert session["ended_at"] is not None
 
 
+class TestSessionTitleLineage:
+    """Renaming a compression continuation back to its base title must succeed
+    by transferring the title off the ended, hidden predecessor.
+
+    After a context compaction the original session is ended and projected
+    behind its live tip in the session list (list_sessions_rich), so the user
+    cannot see or free it. Without lineage-aware handling, renaming the visible
+    tip back to the base name dead-ends with "already in use by <session they
+    can't find>".
+    """
+
+    def _make_compression_chain(self, db, t0, *, root="root", tip="tip"):
+        db.create_session(root, "cli")
+        db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0, root))
+        db._conn.execute(
+            "UPDATE sessions SET ended_at=?, end_reason='compression' WHERE id=?",
+            (t0 + 100, root),
+        )
+        db.create_session(tip, "cli", parent_session_id=root)
+        db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0 + 200, tip))
+        db._conn.commit()
+
+    def test_rename_continuation_back_to_base_transfers_title(self, db):
+        import time as _time
+        self._make_compression_chain(db, _time.time() - 3600)
+        db.set_session_title("root", "fingerprint-scanner")
+        db.set_session_title("tip", "fingerprint-scanner #2")
+
+        # User renames the visible tip back to the base name — must succeed.
+        assert db.set_session_title("tip", "fingerprint-scanner") is True
+        assert db.get_session("tip")["title"] == "fingerprint-scanner"
+        # Title transferred off the hidden ancestor — no duplicate titles.
+        assert db.get_session("root")["title"] is None
+
+    def test_transfer_walks_multi_level_chain(self, db):
+        import time as _time
+        t0 = _time.time() - 7200
+        # root (compression) -> mid (compression) -> tip
+        self._make_compression_chain(db, t0, root="root", tip="mid")
+        db._conn.execute(
+            "UPDATE sessions SET ended_at=?, end_reason='compression' WHERE id=?",
+            (t0 + 300, "mid"),
+        )
+        db.create_session("tip", "cli", parent_session_id="mid")
+        db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0 + 400, "tip"))
+        db._conn.commit()
+
+        db.set_session_title("root", "deep-dive")
+        assert db.set_session_title("tip", "deep-dive") is True
+        assert db.get_session("tip")["title"] == "deep-dive"
+        assert db.get_session("root")["title"] is None
+
+    def test_unrelated_session_still_conflicts(self, db):
+        db.create_session("a", "cli")
+        db.create_session("b", "cli")
+        db.set_session_title("a", "shared")
+        with pytest.raises(ValueError, match="already in use"):
+            db.set_session_title("b", "shared")
+        # The unrelated holder keeps its title.
+        assert db.get_session("a")["title"] == "shared"
+
+    def test_non_compression_child_still_conflicts(self, db):
+        """A child whose parent did NOT end via compression (delegate/branch
+        spawned while the parent was live) is not a continuation, so renaming it
+        to the parent's title must still raise."""
+        import time as _time
+        t0 = _time.time() - 3600
+        db.create_session("parent", "cli")
+        db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0, "parent"))
+        db.create_session("child", "cli", parent_session_id="parent")
+        # Child started BEFORE parent ended, and parent ended for a non-
+        # compression reason — not a continuation edge.
+        db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0 + 10, "child"))
+        db._conn.execute(
+            "UPDATE sessions SET ended_at=?, end_reason='user_exit' WHERE id=?",
+            (t0 + 100, "parent"),
+        )
+        db._conn.commit()
+        db.set_session_title("parent", "shared")
+        with pytest.raises(ValueError, match="already in use"):
+            db.set_session_title("child", "shared")
+
+
 class TestSanitizeTitle:
     """Tests for SessionDB.sanitize_title() validation and cleaning."""
 
diff --git a/tests/tools/test_clarify_tool.py b/tests/tools/test_clarify_tool.py
index 8659e1f13af..0c38961dd8d 100644
--- a/tests/tools/test_clarify_tool.py
+++ b/tests/tools/test_clarify_tool.py
@@ -9,6 +9,7 @@ from tools.clarify_tool import (
     check_clarify_requirements,
     MAX_CHOICES,
     CLARIFY_SCHEMA,
+    _flatten_choice,
 )
 
 
@@ -164,6 +165,70 @@ class TestCheckClarifyRequirements:
         assert check_clarify_requirements() is True
 
 
+class TestClarifyDictChoices:
+    """Dict-shaped choices must be unwrapped to user-facing text at the source.
+
+    LLMs sometimes emit [{"description": "..."}] instead of bare strings. The
+    naive str(c) coercion leaked the Python dict repr onto every surface (CLI
+    panel, Discord buttons, Telegram list) AND returned it verbatim as the
+    user's answer. _flatten_choice normalises at the one platform-agnostic
+    entry point so the whole class is fixed in one place.
+    """
+
+    def test_flatten_unwraps_label_first(self):
+        assert _flatten_choice({"label": "Short", "description": "Long"}) == "Short"
+
+    def test_flatten_unwraps_description_when_no_label(self):
+        assert _flatten_choice({"description": "A loose layout"}) == "A loose layout"
+
+    def test_flatten_unwrap_order_label_over_description(self):
+        assert _flatten_choice({"description": "verbose", "label": "tight"}) == "tight"
+
+    def test_flatten_drops_name_value_only_dict(self):
+        # name/value are component-shaped fields, not user-facing labels —
+        # picking them would leak raw enum values / short model ids.
+        assert _flatten_choice({"name": "tight", "value": "x"}) == ""
+
+    def test_flatten_prefers_canonical_key_over_name(self):
+        assert _flatten_choice({"name": "tight", "description": "Tight desc"}) == "Tight desc"
+
+    def test_flatten_drops_keyless_dict(self):
+        assert _flatten_choice({"foo": "bar", "n": 1}) == ""
+
+    def test_flatten_passthrough_string_and_scalar(self):
+        assert _flatten_choice("plain") == "plain"
+        assert _flatten_choice(7) == "7"
+        assert _flatten_choice(None) == ""
+
+    def test_dict_choices_reach_callback_as_clean_text(self):
+        """The whole point: the UI callback never sees a dict repr."""
+        seen = []
+
+        def cb(question, choices):
+            seen.extend(choices or [])
+            return choices[0]
+
+        result = json.loads(clarify_tool(
+            "Pick a layout",
+            choices=[
+                {"choice": "Tight", "description": "Tight, covers all 3 points"},
+                {"description": "Loose layout"},
+                {"name": "modelid", "value": "abc"},  # dropped, not leaked
+                "A plain string choice",
+            ],
+            callback=cb,
+        ))  # type: ignore
+        assert seen == [
+            "Tight, covers all 3 points",
+            "Loose layout",
+            "A plain string choice",
+        ]
+        # and the resolved answer is clean text, not a dict repr
+        assert result["user_response"] == "Tight, covers all 3 points"
+        assert "{" not in result["user_response"]
+        assert all("{" not in c for c in result["choices_offered"])
+
+
 class TestClarifySchema:
     """Tests for the OpenAI function-calling schema."""
 
diff --git a/tests/tools/test_tts_piper.py b/tests/tools/test_tts_piper.py
index c30b26dc9b9..78567adf9bb 100644
--- a/tests/tools/test_tts_piper.py
+++ b/tests/tools/test_tts_piper.py
@@ -8,6 +8,7 @@ without requiring the ``piper-tts`` package to actually be installed
 
 import json
 import sys
+import types
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
@@ -219,7 +220,7 @@ class TestGeneratePiperTts:
 
         # The SynthesisConfig import happens inline inside _generate_piper_tts
         # via ``from piper import SynthesisConfig``. Inject a fake piper
-        # module so that import resolves.
+        # module so that that import resolves.
         monkeypatch.setitem(sys.modules, "piper", FakePiperModule)
 
         config = {
@@ -239,6 +240,96 @@ class TestGeneratePiperTts:
         assert kwargs["length_scale"] == 2.0
         assert kwargs["volume"] == 0.8
 
+    def test_speaker_id_passed_through_to_synconfig(self, tmp_path, monkeypatch):
+        """speaker_id flows from config to SynthesisConfig when set."""
+        model = self._prepare_voice_files(tmp_path)
+        monkeypatch.setattr(tts_tool, "_import_piper", lambda: _StubPiperVoice)
+
+        fake_syn_cls = MagicMock()
+        monkeypatch.setitem(sys.modules, "piper", types.SimpleNamespace(SynthesisConfig=fake_syn_cls))
+
+        config = {"piper": {"voice": str(model), "speaker_id": 2}}
+        tts_tool._generate_piper_tts("hi", str(tmp_path / "out.wav"), config)
+
+        fake_syn_cls.assert_called_once()
+        assert fake_syn_cls.call_args.kwargs["speaker_id"] == 2
+
+    def test_speaker_id_alone_triggers_synconfig(self, tmp_path, monkeypatch):
+        """Setting ONLY speaker_id (no other advanced knobs) still constructs SynthesisConfig.
+
+        Regression guard: has_advanced must include speaker_id, otherwise
+        this knob gets silently dropped on the simplest configuration.
+        """
+        model = self._prepare_voice_files(tmp_path)
+        monkeypatch.setattr(tts_tool, "_import_piper", lambda: _StubPiperVoice)
+
+        fake_syn_cls = MagicMock()
+        monkeypatch.setitem(sys.modules, "piper", types.SimpleNamespace(SynthesisConfig=fake_syn_cls))
+
+        config = {"piper": {"voice": str(model), "speaker_id": 1}}
+        tts_tool._generate_piper_tts("hi", str(tmp_path / "out.wav"), config)
+
+        fake_syn_cls.assert_called_once()
+
+    def test_speaker_id_default_zero_when_unset(self, tmp_path, monkeypatch):
+        """No speaker_id in config → SynthesisConfig.speaker_id == 0 (Piper's default)."""
+        model = self._prepare_voice_files(tmp_path)
+        monkeypatch.setattr(tts_tool, "_import_piper", lambda: _StubPiperVoice)
+
+        fake_syn_cls = MagicMock()
+        monkeypatch.setitem(sys.modules, "piper", types.SimpleNamespace(SynthesisConfig=fake_syn_cls))
+
+        config = {"piper": {"voice": str(model), "length_scale": 1.5}}
+        tts_tool._generate_piper_tts("hi", str(tmp_path / "out.wav"), config)
+
+        assert fake_syn_cls.call_args.kwargs["speaker_id"] == 0
+
+    def test_speaker_id_bool_rejected_to_zero(self, tmp_path, monkeypatch):
+        """True/False would coerce to 1/0 and hide a config mistake — reject outright."""
+        model = self._prepare_voice_files(tmp_path)
+        monkeypatch.setattr(tts_tool, "_import_piper", lambda: _StubPiperVoice)
+
+        fake_syn_cls = MagicMock()
+        monkeypatch.setitem(sys.modules, "piper", types.SimpleNamespace(SynthesisConfig=fake_syn_cls))
+
+        for bad in (True, False):
+            fake_syn_cls.reset_mock()
+            config = {"piper": {"voice": str(model), "speaker_id": bad}}
+            tts_tool._generate_piper_tts("hi", str(tmp_path / f"out-{bad}.wav"), config)
+            assert fake_syn_cls.call_args.kwargs["speaker_id"] == 0
+
+    def test_speaker_id_non_int_dropped_to_zero(self, tmp_path, monkeypatch):
+        """Unparseable config (string, list, dict) drops to 0 instead of raising."""
+        model = self._prepare_voice_files(tmp_path)
+        monkeypatch.setattr(tts_tool, "_import_piper", lambda: _StubPiperVoice)
+
+        fake_syn_cls = MagicMock()
+        monkeypatch.setitem(sys.modules, "piper", types.SimpleNamespace(SynthesisConfig=fake_syn_cls))
+
+        for bad in ("two", [1, 2], {"k": 1}, None):
+            fake_syn_cls.reset_mock()
+            config = {"piper": {"voice": str(model), "speaker_id": bad}}
+            tts_tool._generate_piper_tts("hi", str(tmp_path / f"out-{type(bad).__name__}.wav"), config)
+            assert fake_syn_cls.call_args.kwargs["speaker_id"] == 0
+
+    def test_speaker_id_does_not_invalidate_voice_cache(self, tmp_path, monkeypatch):
+        """Switching speaker_id between calls must NOT trigger a model reload.
+
+        PiperVoice is bound to a model, not a speaker — speaker is applied
+        per-call via syn_config.speaker_id. The voice cache should serve the
+        same PiperVoice instance for the same (model, cuda) regardless of
+        how many distinct speaker_ids the user cycles through.
+        """
+        model = self._prepare_voice_files(tmp_path)
+        monkeypatch.setattr(tts_tool, "_import_piper", lambda: _StubPiperVoice)
+
+        for speaker in (0, 1, 2, 3):
+            config = {"piper": {"voice": str(model), "speaker_id": speaker}}
+            tts_tool._generate_piper_tts("hi", str(tmp_path / f"out-{speaker}.wav"), config)
+
+        # Only one PiperVoice.load() call across four calls with different speakers.
+        assert _StubPiperVoice.loaded == [str(model)]
+
 
 # ---------------------------------------------------------------------------
 # text_to_speech_tool end-to-end (provider == "piper")
diff --git a/tests/tools/test_tts_xai_speech_tags.py b/tests/tools/test_tts_xai_speech_tags.py
index 37bde1c710a..4343a387f7a 100644
--- a/tests/tools/test_tts_xai_speech_tags.py
+++ b/tests/tools/test_tts_xai_speech_tags.py
@@ -1,8 +1,16 @@
 """Tests for xAI TTS speech-tag handling."""
 
-from unittest.mock import Mock
+from types import SimpleNamespace
+from unittest.mock import Mock, patch
 
-from tools.tts_tool import _apply_xai_auto_speech_tags, _generate_xai_tts
+import pytest
+
+from tools.tts_tool import (
+    _XAI_INLINE_SPEECH_TAGS,
+    _XAI_WRAPPING_SPEECH_TAGS,
+    _apply_xai_auto_speech_tags,
+    _generate_xai_tts,
+)
 
 
 def test_apply_xai_auto_speech_tags_adds_light_pause_after_first_sentence():
@@ -72,8 +80,20 @@ def test_apply_xai_auto_speech_tags_single_newline_still_gets_first_sentence_pau
     )
 
 
-def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypatch):
+def test_generate_xai_tts_sends_auxiliary_rewriter_output_to_api(
+    tmp_path, monkeypatch
+):
+    """auto_speech_tags=True should send the auxiliary rewriter's tagged
+    output (not the conservative local pause fallback) to the xAI TTS API.
+
+    The previous version of this test asserted on the local pause-tagged
+    text — which only happened to match because ``call_llm`` returns
+    ``None`` in the test environment and the function silently fell
+    back. With the new auxiliary-rewrite path the user-visible contract
+    is "what the LLM said wins", so this test pins that down.
+    """
     captured = {}
+    rewriter_output = "Bonjour Monsieur Talbot. [warmly] Ceci est un test. [soft laugh]"
 
     class FakeResponse:
         content = b"mp3"
@@ -88,8 +108,15 @@ def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypa
         captured["timeout"] = timeout
         return FakeResponse()
 
+    fake_response = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content=rewriter_output))]
+    )
+
     monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
     monkeypatch.setattr("requests.post", fake_post)
+    monkeypatch.setattr(
+        "agent.auxiliary_client.call_llm", lambda *a, **kw: fake_response
+    )
 
     out = tmp_path / "out.mp3"
     _generate_xai_tts(
@@ -102,7 +129,178 @@ def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypa
     assert captured["url"] == "https://api.x.ai/v1/tts"
     assert captured["json"]["voice_id"] == "ara"
     assert captured["json"]["language"] == "fr"
-    assert captured["json"]["text"] == "Bonjour Monsieur Talbot. [pause] Ceci est un test."
+    assert captured["json"]["text"] == rewriter_output
+
+
+def test_auto_speech_tags_calls_auxiliary_rewriter_with_tts_audio_tags_task():
+    """When input has no explicit speech tags, the function must call the
+    auxiliary rewriter with task='tts_audio_tags' and a system prompt
+    that documents the xAI inline + wrapping tag vocabulary.
+    """
+    response = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content="[warmly] Hi."))]
+    )
+
+    with patch("agent.auxiliary_client.call_llm", return_value=response) as mock_call:
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    assert result == "[warmly] Hi."
+    mock_call.assert_called_once()
+    call_kwargs = mock_call.call_args.kwargs
+    assert call_kwargs["task"] == "tts_audio_tags"
+    assert call_kwargs["temperature"] == 0.7
+
+    messages = call_kwargs["messages"]
+    assert messages[0]["role"] == "system"
+    assert messages[1]["role"] == "user"
+
+    system_prompt = messages[0]["content"]
+    # All documented inline + wrapping tag names must appear in the prompt
+    # so the auxiliary model knows what's valid. The prompt lists them
+    # comma-separated in two example lines ("Valid inline tags (use as
+    # `[tag]`): pause, long-pause, ..." and a similar line for wrapping).
+    for tag in _XAI_INLINE_SPEECH_TAGS:
+        assert tag in system_prompt, (
+            f"inline tag {tag!r} missing from system prompt"
+        )
+    for tag in _XAI_WRAPPING_SPEECH_TAGS:
+        assert tag in system_prompt, (
+            f"wrapping tag {tag!r} missing from system prompt"
+        )
+    # The prompt must explicitly show the BBCode-style closing syntax so
+    # the rewriter uses [/tag] and not <tag>...</tag>.
+    assert "[/tag]" in system_prompt
+
+    # The user message carries the locally pause-tagged transcript (the
+    # conservative fallback the rewriter is asked to enrich).
+    assert "TRANSCRIPT TO TAG" in messages[1]["content"]
+    assert "[pause]" in messages[1]["content"]
+
+
+def test_auto_speech_tags_strips_markdown_fences_from_rewriter_output():
+    """If the auxiliary model wraps its reply in ```...``` fences the
+    function must strip them before returning.
+    """
+    fenced = "```\n[warmly] Bonjour. [soft laugh]\n```"
+    response = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content=fenced))]
+    )
+
+    with patch("agent.auxiliary_client.call_llm", return_value=response):
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    assert result == "[warmly] Bonjour. [soft laugh]"
+
+
+def test_auto_speech_tags_strips_markdown_fence_with_language_hint():
+    """The fence regex accepts an optional language tag like ```text ...```."""
+    fenced = "```text\n[warmly] Bonjour.\n```"
+    response = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content=fenced))]
+    )
+
+    with patch("agent.auxiliary_client.call_llm", return_value=response):
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    assert result == "[warmly] Bonjour."
+
+
+def test_auto_speech_tags_falls_back_to_local_on_auxiliary_exception(caplog):
+    """If the auxiliary rewriter raises (timeout, network, provider error,
+    anything) the function must silently fall back to the local
+    pause-tagged text so the user still gets audio.
+    """
+    import logging
+
+    with caplog.at_level(logging.DEBUG, logger="tools.tts_tool"), patch(
+        "agent.auxiliary_client.call_llm",
+        side_effect=RuntimeError("upstream provider timed out"),
+    ):
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    # Local fallback: first sentence gets a [pause] inserted, single
+    # paragraph, no other rewriter activity.
+    assert result == (
+        "Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
+    )
+    assert "xAI TTS audio tag rewrite failed" in caplog.text
+
+
+def test_auto_speech_tags_falls_back_to_local_when_rewriter_returns_empty():
+    """An empty / None rewriter response must also fall back to local."""
+    empty_response = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content=""))]
+    )
+
+    with patch(
+        "agent.auxiliary_client.call_llm", return_value=empty_response
+    ):
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    assert result == (
+        "Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
+    )
+
+
+def test_auto_speech_tags_skips_auxiliary_when_input_has_explicit_tags():
+    """If the user/model already supplied explicit speech tags we trust
+    them and never call the rewriter — that would risk the rewriter
+    overwriting intentional markup.
+    """
+    tagged = "Bonjour. [pause] <whisper>Déjà balisé.</whisper>"
+
+    with patch("agent.auxiliary_client.call_llm") as mock_call:
+        result = _apply_xai_auto_speech_tags(tagged)
+
+    mock_call.assert_not_called()
+    # The local pass is a no-op for already-tagged text (no double
+    # paragraph normalization, no first-sentence pause injection).
+    assert result == tagged
+
+
+def test_auto_speech_tags_skips_auxiliary_for_empty_input():
+    with patch("agent.auxiliary_client.call_llm") as mock_call:
+        assert _apply_xai_auto_speech_tags("") == ""
+        assert _apply_xai_auto_speech_tags("   \n  ") == "   \n  "
+
+    mock_call.assert_not_called()
+
+
+def test_auto_speech_tags_skips_auxiliary_for_whitespace_only_input():
+    """Whitespace-only input short-circuits before the rewriter runs."""
+    with patch("agent.auxiliary_client.call_llm") as mock_call:
+        assert _apply_xai_auto_speech_tags("   ") == "   "
+
+    mock_call.assert_not_called()
+
+
+@pytest.mark.parametrize("bad_response", [None, SimpleNamespace(choices=[])])
+def test_auto_speech_tags_falls_back_to_local_on_malformed_rewriter_response(
+    bad_response,
+):
+    """Both ``None`` and a response with no choices must fall back to the
+    conservative local pass rather than crash.
+    """
+    with patch(
+        "agent.auxiliary_client.call_llm", return_value=bad_response
+    ):
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    assert result == (
+        "Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
+    )
 
 
 def test_generate_xai_tts_leaves_text_plain_by_default(tmp_path, monkeypatch):
@@ -126,3 +324,207 @@ def test_generate_xai_tts_leaves_text_plain_by_default(tmp_path, monkeypatch):
     )
 
     assert captured["json"]["text"] == "Bonjour Monsieur Talbot. Ceci est un test."
+
+
+def test_generate_xai_tts_omits_speed_and_latency_by_default(tmp_path, monkeypatch):
+    """No speed / optimize_streaming_latency in the request body unless
+    the user explicitly sets them. Keeps the existing minimal-payload
+    contract for default configs.
+    """
+    captured = {}
+
+    fake_response = Mock()
+    fake_response.content = b"mp3"
+    fake_response.raise_for_status.return_value = None
+
+    def fake_post(url, headers, json, timeout):
+        captured["json"] = json
+        return fake_response
+
+    monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
+    monkeypatch.setattr("requests.post", fake_post)
+
+    _generate_xai_tts(
+        "Hello world.",
+        str(tmp_path / "out.mp3"),
+        {"xai": {"voice_id": "ara", "language": "en"}},
+    )
+
+    assert "speed" not in captured["json"]
+    assert "optimize_streaming_latency" not in captured["json"]
+
+
+def test_generate_xai_tts_sends_speed_when_set(tmp_path, monkeypatch):
+    """tts.xai.speed flows into the POST body."""
+    captured = {}
+
+    fake_response = Mock()
+    fake_response.content = b"mp3"
+    fake_response.raise_for_status.return_value = None
+
+    def fake_post(url, headers, json, timeout):
+        captured["json"] = json
+        return fake_response
+
+    monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
+    monkeypatch.setattr("requests.post", fake_post)
+
+    _generate_xai_tts(
+        "Hello world.",
+        str(tmp_path / "out.mp3"),
+        {"xai": {"voice_id": "ara", "language": "en", "speed": 1.5}},
+    )
+
+    assert captured["json"]["speed"] == 1.5
+
+
+def test_generate_xai_tts_speed_clamped_to_valid_range(tmp_path, monkeypatch):
+    """speed values outside xAI's 0.7..1.5 band are clamped, not sent raw."""
+    captured = {}
+
+    fake_response = Mock()
+    fake_response.content = b"mp3"
+    fake_response.raise_for_status.return_value = None
+
+    def fake_post(url, headers, json, timeout):
+        captured["json"] = json
+        return fake_response
+
+    monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
+    monkeypatch.setattr("requests.post", fake_post)
+
+    # Below 0.7 -> 0.7
+    _generate_xai_tts(
+        "Hello.",
+        str(tmp_path / "out.mp3"),
+        {"xai": {"voice_id": "eve", "language": "en", "speed": 0.1}},
+    )
+    assert captured["json"]["speed"] == 0.7
+
+    # Above 1.5 -> 1.5
+    _generate_xai_tts(
+        "Hello.",
+        str(tmp_path / "out.mp3"),
+        {"xai": {"voice_id": "eve", "language": "en", "speed": 3.0}},
+    )
+    assert captured["json"]["speed"] == 1.5
+
+
+def test_generate_xai_tts_omits_speed_when_exactly_default(tmp_path, monkeypatch):
+    """speed == 1.0 is the API default; the field stays out of the payload."""
+    captured = {}
+
+    fake_response = Mock()
+    fake_response.content = b"mp3"
+    fake_response.raise_for_status.return_value = None
+
+    def fake_post(url, headers, json, timeout):
+        captured["json"] = json
+        return fake_response
+
+    monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
+    monkeypatch.setattr("requests.post", fake_post)
+
+    _generate_xai_tts(
+        "Hello.",
+        str(tmp_path / "out.mp3"),
+        {"xai": {"voice_id": "eve", "language": "en", "speed": 1.0}},
+    )
+
+    assert "speed" not in captured["json"]
+
+
+def test_generate_xai_tts_sends_optimize_streaming_latency_when_set(tmp_path, monkeypatch):
+    """tts.xai.optimize_streaming_latency flows into the POST body."""
+    captured = {}
+
+    fake_response = Mock()
+    fake_response.content = b"mp3"
+    fake_response.raise_for_status.return_value = None
+
+    def fake_post(url, headers, json, timeout):
+        captured["json"] = json
+        return fake_response
+
+    monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
+    monkeypatch.setattr("requests.post", fake_post)
+
+    _generate_xai_tts(
+        "Hello world.",
+        str(tmp_path / "out.mp3"),
+        {"xai": {"voice_id": "ara", "language": "en", "optimize_streaming_latency": 2}},
+    )
+
+    assert captured["json"]["optimize_streaming_latency"] == 2
+
+
+def test_generate_xai_tts_optimize_streaming_latency_omitted_at_default(tmp_path, monkeypatch):
+    """optimize_streaming_latency == 0 is the API default; field is not sent."""
+    captured = {}
+
+    fake_response = Mock()
+    fake_response.content = b"mp3"
+    fake_response.raise_for_status.return_value = None
+
+    def fake_post(url, headers, json, timeout):
+        captured["json"] = json
+        return fake_response
+
+    monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
+    monkeypatch.setattr("requests.post", fake_post)
+
+    _generate_xai_tts(
+        "Hello world.",
+        str(tmp_path / "out.mp3"),
+        {"xai": {"voice_id": "ara", "language": "en", "optimize_streaming_latency": 0}},
+    )
+
+    assert "optimize_streaming_latency" not in captured["json"]
+
+
+def test_generate_xai_tts_global_speed_used_as_fallback(tmp_path, monkeypatch):
+    """Global tts.speed is the fallback when tts.xai.speed is unset."""
+    captured = {}
+
+    fake_response = Mock()
+    fake_response.content = b"mp3"
+    fake_response.raise_for_status.return_value = None
+
+    def fake_post(url, headers, json, timeout):
+        captured["json"] = json
+        return fake_response
+
+    monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
+    monkeypatch.setattr("requests.post", fake_post)
+
+    _generate_xai_tts(
+        "Hello.",
+        str(tmp_path / "out.mp3"),
+        {"speed": 0.8, "xai": {"voice_id": "ara", "language": "en"}},
+    )
+
+    assert captured["json"]["speed"] == 0.8
+
+
+def test_generate_xai_tts_provider_speed_overrides_global(tmp_path, monkeypatch):
+    """tts.xai.speed wins over the global tts.speed fallback."""
+    captured = {}
+
+    fake_response = Mock()
+    fake_response.content = b"mp3"
+    fake_response.raise_for_status.return_value = None
+
+    def fake_post(url, headers, json, timeout):
+        captured["json"] = json
+        return fake_response
+
+    monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
+    monkeypatch.setattr("requests.post", fake_post)
+
+    _generate_xai_tts(
+        "Hello.",
+        str(tmp_path / "out.mp3"),
+        {"speed": 1.5, "xai": {"voice_id": "ara", "language": "en", "speed": 0.7}},
+    )
+
+    assert captured["json"]["speed"] == 0.7
diff --git a/tests/tui_gateway/test_goal_command.py b/tests/tui_gateway/test_goal_command.py
index d06f5b8fbbd..cfff285f1ef 100644
--- a/tests/tui_gateway/test_goal_command.py
+++ b/tests/tui_gateway/test_goal_command.py
@@ -185,15 +185,17 @@ def test_goal_requires_session(server):
 # ── slash.exec /goal routing ──────────────────────────────────────────
 
 
-def test_slash_exec_rejects_goal_routes_to_command_dispatch(server, session):
-    """slash.exec must reject /goal with 4018 so the TUI client falls through
-    to command.dispatch. Without this, the HermesCLI slash-worker subprocess
-    would set the goal but silently drop the kickoff — the queue is in-proc."""
+def test_slash_exec_routes_goal_to_command_dispatch(server, session):
+    """slash.exec must route /goal directly to command.dispatch internally
+    instead of returning an error.  Previously the 4018 error required the
+    TUI client to retry via command.dispatch, but some clients failed the
+    fallback, leaving the command empty ("empty command")."""
     sid, _, _ = session
     r = _call(server, "slash.exec", command="goal status", session_id=sid)
-    assert "error" in r
-    assert r["error"]["code"] == 4018
-    assert "command.dispatch" in r["error"]["message"]
+    # Should succeed by routing to command.dispatch internally
+    assert "result" in r
+    assert r["result"]["type"] == "exec"
+    assert "No active goal" in r["result"]["output"]
 
 
 def test_pending_input_commands_includes_goal(server):
diff --git a/tests/tui_gateway/test_make_agent_provider.py b/tests/tui_gateway/test_make_agent_provider.py
index 9cd5b0d5f14..94b606dbd38 100644
--- a/tests/tui_gateway/test_make_agent_provider.py
+++ b/tests/tui_gateway/test_make_agent_provider.py
@@ -443,7 +443,9 @@ def test_apply_model_switch_does_not_leak_process_env():
 
     with (
         patch("hermes_cli.model_switch.parse_model_flags",
-              return_value=("glm-5.1", None, False, False)),
+              return_value=("glm-5.1", None, False, False, True)),
+        patch("hermes_cli.model_switch.resolve_persist_behavior",
+              return_value=False),
         patch("hermes_cli.model_switch.switch_model", return_value=_FakeResult()),
         patch("tui_gateway.server._emit"),
         patch("tui_gateway.server._restart_slash_worker"),
diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py
index 60d3c7a5c4f..775a07cb317 100644
--- a/tests/tui_gateway/test_protocol.py
+++ b/tests/tui_gateway/test_protocol.py
@@ -1121,20 +1121,45 @@ def test_slash_exec_plugin_handler_error_returns_output(server):
 
 
 @pytest.mark.parametrize("cmd", ["retry", "queue hello", "q hello", "steer fix the test", "plan"])
-def test_slash_exec_rejects_pending_input_commands(server, cmd):
-    """slash.exec must reject commands that use _pending_input in the CLI."""
-    sid = "test-session"
-    server._sessions[sid] = {"session_key": sid, "agent": None}
+def test_slash_exec_routes_pending_input_commands_to_dispatch(server, cmd):
+    """slash.exec must route _pending_input commands to command.dispatch
+    internally instead of returning the old 4018 "use command.dispatch"
+    fallback error (#48848). Some TUI clients failed that client-side
+    fallback, dropping the input and surfacing "empty command".
 
-    resp = server.handle_request({
+    The contract is that slash.exec produces exactly the response
+    command.dispatch would for the same command — no fragile retry hop.
+    """
+    base, _, arg = cmd.partition(" ")
+
+    def fresh_session():
+        return {"session_key": "test-session", "agent": None}
+
+    sid = "test-session"
+
+    # Response from the (new) internal routing in slash.exec.
+    server._sessions[sid] = fresh_session()
+    routed = server.handle_request({
         "id": "r1",
         "method": "slash.exec",
         "params": {"command": cmd, "session_id": sid},
     })
 
-    assert "error" in resp
-    assert resp["error"]["code"] == 4018
-    assert "pending-input command" in resp["error"]["message"]
+    # Response from calling command.dispatch directly with the parsed parts.
+    server._sessions[sid] = fresh_session()
+    direct = server.handle_request({
+        "id": "r1",
+        "method": "command.dispatch",
+        "params": {"name": base, "arg": arg, "session_id": sid},
+    })
+
+    # slash.exec must no longer emit the old client-fallback rejection.
+    if "error" in routed:
+        assert "pending-input command" not in routed["error"]["message"]
+
+    # Internal routing must yield the same payload as command.dispatch.
+    assert routed.get("result") == direct.get("result")
+    assert routed.get("error") == direct.get("error")
 
 
 def test_command_dispatch_queue_sends_message(server):
diff --git a/tools/checkpoint_manager.py b/tools/checkpoint_manager.py
index f0b47734cea..720973b67e0 100644
--- a/tools/checkpoint_manager.py
+++ b/tools/checkpoint_manager.py
@@ -272,6 +272,28 @@ def _git_env(
     return env
 
 
+def _repair_bare_repo_dirs(store: Path) -> None:
+    """Recreate refs/ and branches/ dirs that ``git gc`` may have removed.
+
+    ``git gc --prune=now`` on a bare repo with only packed refs can remove
+    the empty ``refs/heads/`` directory.  Git 2.34+ requires ``refs/`` (and
+    some versions require ``branches/``) to exist even when all refs are
+    packed in ``packed-refs``.  Without them, ``git add -A`` returns
+    ``fatal: not a git repository`` and all checkpoint operations fail
+    silently.
+    """
+    for subdir in ("refs/heads", "branches"):
+        path = store / subdir
+        if not path.exists():
+            try:
+                path.mkdir(parents=True, exist_ok=True)
+                logger.debug("Repaired missing %s in checkpoint store", subdir)
+            except OSError as exc:
+                logger.warning(
+                    "Cannot create %s in checkpoint store: %s", subdir, exc,
+                )
+
+
 def _run_git(
     args: List[str],
     store: Path,
@@ -1086,6 +1108,7 @@ class CheckpointManager:
             ["gc", "--prune=now", "--quiet"],
             store, working_dir, timeout=_GIT_TIMEOUT * 3,
         )
+        _repair_bare_repo_dirs(store)
 
     def _enforce_size_cap(self, store: Path) -> None:
         """If total store size exceeds ``max_total_size_mb``, drop oldest
@@ -1173,6 +1196,7 @@ class CheckpointManager:
             ["gc", "--prune=now", "--quiet"],
             store, str(store.parent), timeout=_GIT_TIMEOUT * 3,
         )
+        _repair_bare_repo_dirs(store)
 
 
 def format_checkpoint_list(checkpoints: List[Dict], directory: str) -> str:
@@ -1384,6 +1408,7 @@ def prune_checkpoints(
             ["gc", "--prune=now", "--quiet"],
             store, str(base), timeout=_GIT_TIMEOUT * 3,
         )
+        _repair_bare_repo_dirs(store)
 
         # Size-cap pass across remaining projects.
         if max_total_size_mb > 0:
@@ -1455,6 +1480,7 @@ def prune_checkpoints(
                 ["gc", "--prune=now", "--quiet"],
                 store, str(base), timeout=_GIT_TIMEOUT * 3,
             )
+            _repair_bare_repo_dirs(store)
 
     size_after = _dir_size_bytes(base)
     delta = size_before - size_after
diff --git a/tools/clarify_tool.py b/tools/clarify_tool.py
index c44787554cc..e831d38fb4d 100644
--- a/tools/clarify_tool.py
+++ b/tools/clarify_tool.py
@@ -20,6 +20,39 @@ from typing import List, Optional, Callable
 MAX_CHOICES = 4
 
 
+def _flatten_choice(c) -> str:
+    """Coerce a single choice into its user-facing display string.
+
+    The schema declares choices as bare strings, but LLMs sometimes emit
+    dict-shaped choices like ``[{"description": "..."}]``. A naive ``str(c)``
+    turns the whole dict into its Python repr — ``{'description': '...'}`` —
+    which then leaks onto every surface that renders the choice (CLI panel,
+    Discord buttons, Telegram numbered list) AND is returned verbatim as the
+    user's answer. Normalising here, at the one platform-agnostic entry point,
+    fixes the whole class in one place instead of per-adapter.
+
+    Dict unwrap order is the canonical LLM tool-call user-facing keys:
+    ``label`` → ``description`` → ``text`` → ``title``. ``name`` and ``value``
+    are deliberately excluded — they're component-shaped fields that could
+    carry raw enum values or short identifiers, not human-readable labels. A
+    dict with none of the canonical keys is dropped (returns ""), since a
+    garbage label is worse than no choice at all.
+    """
+    if c is None:
+        return ""
+    if isinstance(c, str):
+        return c.strip()
+    if isinstance(c, dict):
+        for key in ("label", "description", "text", "title"):
+            v = c.get(key)
+            if isinstance(v, str) and v.strip():
+                return v.strip()
+        return ""
+    if isinstance(c, (list, tuple)):
+        return " ".join(_flatten_choice(x) for x in c).strip()
+    return str(c).strip()
+
+
 def clarify_tool(
     question: str,
     choices: Optional[List[str]] = None,
@@ -48,7 +81,12 @@ def clarify_tool(
     if choices is not None:
         if not isinstance(choices, list):
             return tool_error("choices must be a list of strings.")
-        choices = [str(c).strip() for c in choices if str(c).strip()]
+        # LLMs sometimes emit dict-shaped choices (e.g. [{"description": "..."}])
+        # instead of bare strings. _flatten_choice unwraps them to their
+        # user-facing text here — the single platform-agnostic entry point —
+        # so the CLI panel, Discord buttons, and Telegram list all render clean
+        # text and the resolved answer is never a raw Python dict repr.
+        choices = [s for s in (_flatten_choice(c) for c in choices) if s]
         if len(choices) > MAX_CHOICES:
             choices = choices[:MAX_CHOICES]
         if not choices:
@@ -93,6 +131,12 @@ CLARIFY_SCHEMA = {
         "or types their own answer via a 5th 'Other' option.\n"
         "2. **Open-ended** — omit choices entirely. The user types a free-form "
         "response.\n\n"
+        "CRITICAL: when you are offering options, put each option ONLY in the "
+        "`choices` array — NEVER enumerate the options inside the `question` "
+        "text. The UI renders `choices` as selectable rows; options written "
+        "into the question string render as dead prose the user can't pick. "
+        "Right: question='Which deployment target?', choices=['staging', "
+        "'prod']. Wrong: question='Which target? 1) staging 2) prod', choices=[].\n\n"
         "Use this tool when:\n"
         "- The task is ambiguous and you need the user to choose an approach\n"
         "- You want post-task feedback ('How did that work out?')\n"
@@ -107,16 +151,22 @@ CLARIFY_SCHEMA = {
         "properties": {
             "question": {
                 "type": "string",
-                "description": "The question to present to the user.",
+                "description": (
+                    "The question itself, and ONLY the question (e.g. 'Which "
+                    "deployment target?'). Do NOT embed the answer options here "
+                    "— pass them as separate elements in `choices`."
+                ),
             },
             "choices": {
                 "type": "array",
                 "items": {"type": "string"},
                 "maxItems": MAX_CHOICES,
                 "description": (
-                    "Up to 4 answer choices. Omit this parameter entirely to "
-                    "ask an open-ended question. When provided, the UI "
-                    "automatically appends an 'Other (type your answer)' option."
+                    "REQUIRED whenever you are presenting selectable options: "
+                    "each distinct option is its own array element (up to 4). "
+                    "The UI renders these as pickable rows and auto-appends an "
+                    "'Other (type your answer)' option. Omit this parameter "
+                    "entirely ONLY for a genuinely open-ended free-text question."
                 ),
             },
         },
diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py
index 98bacbf42a0..4e2159a1a02 100644
--- a/tools/lazy_deps.py
+++ b/tools/lazy_deps.py
@@ -178,7 +178,7 @@ LAZY_DEPS: dict[str, tuple[str, ...]] = {
         "fastapi==0.133.1",
         "uvicorn[standard]==0.41.0",
         "starlette==1.0.1",  # CVE-2026-48710 (BadHost) — keep lazy-install in sync with pyproject [web]
-        "python-multipart==0.0.20",  # FastAPI UploadFile/Form for streaming uploads (NS-501)
+        "python-multipart==0.0.27",  # FastAPI UploadFile/Form for streaming uploads (NS-501)
     ),
     # Vision image-resize recovery (Pillow). Pillow is now a CORE dependency
     # (pyproject `dependencies`), so this entry is a belt-and-suspenders fallback
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index db419196a47..2c5a1be5975 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -2662,10 +2662,19 @@ def _interrupted_call_result() -> str:
 # ---------------------------------------------------------------------------
 
 def _interpolate_env_vars(value):
-    """Recursively resolve ``${VAR}`` placeholders from ``os.environ``."""
+    """Recursively resolve ``${VAR}`` placeholders.
+
+    Resolves from the active profile's secret scope when multiplexing is on
+    (so an MCP server config's ``${API_KEY}`` picks up the routed profile's
+    value, not the process-global ``os.environ`` which may hold another
+    profile's), falling back to ``os.environ`` otherwise. Unset vars keep the
+    literal ``${VAR}`` placeholder, as before.
+    """
+    from agent.secret_scope import get_secret as _get_secret
+
     if isinstance(value, str):
         def _replace(m):
-            return os.environ.get(m.group(1), m.group(0))
+            return _get_secret(m.group(1), m.group(0)) or m.group(0)
         return _ENV_VAR_PATTERN.sub(_replace, value)
     if isinstance(value, dict):
         return {k: _interpolate_env_vars(v) for k, v in value.items()}
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 71907a3a3cc..26d0f425c56 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -2058,6 +2058,29 @@ def terminal_tool(
                         env = new_env
                     logger.info("%s environment ready for task %s", env_type, effective_task_id[:8])
 
+        # Hard-block: gateway lifecycle commands (systemctl/launchctl/hermes
+        # restart|stop targeting hermes-gateway) must never run inside the
+        # gateway process itself. The restart would SIGTERM the gateway, which
+        # kills this very subprocess before it can complete — the service may
+        # never restart. This mirrors the `hermes gateway restart` guard in
+        # hermes_cli/gateway.py and the cron-path guard in hermes_cli/cron.py,
+        # but applies unconditionally (force=True cannot help here).
+        if os.environ.get("_HERMES_GATEWAY") == "1":
+            from hermes_cli.cron import _contains_gateway_lifecycle_command
+            if _contains_gateway_lifecycle_command(command):
+                return json.dumps({
+                    "output": "",
+                    "exit_code": 1,
+                    "error": (
+                        "Blocked: cannot restart or stop the gateway from inside the "
+                        "gateway process. The gateway would kill this command before "
+                        "it could complete (SIGTERM propagates to child processes). "
+                        "Run `hermes gateway restart` from a separate shell outside "
+                        "the running gateway."
+                    ),
+                    "status": "error",
+                }, ensure_ascii=False)
+
         # Pre-exec security checks (tirith + dangerous command detection)
         # Skip check if force=True (user has confirmed they want to run it)
         approval_note = None
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index c6e7c22de0f..d803086983e 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -187,6 +187,13 @@ DEFAULT_XAI_SAMPLE_RATE = 24000
 DEFAULT_XAI_BIT_RATE = 128000
 DEFAULT_XAI_AUTO_SPEECH_TAGS = False
 DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"
+# xAI TTS `speed` accepts 0.7..1.5; 1.0 is the API default (omitted => default).
+DEFAULT_XAI_SPEED_MIN = 0.7
+DEFAULT_XAI_SPEED_MAX = 1.5
+DEFAULT_XAI_SPEED_DEFAULT = 1.0
+# xAI TTS `optimize_streaming_latency` accepts 0, 1, or 2; 0 (best quality) is
+# the API default (omitted => default). Values >0 trade quality for time-to-first-audio.
+DEFAULT_XAI_OPTIMIZE_STREAMING_LATENCY_DEFAULT = 0
 DEFAULT_GEMINI_TTS_MODEL = "gemini-2.5-flash-preview-tts"
 DEFAULT_GEMINI_TTS_VOICE = "Kore"
 DEFAULT_GEMINI_TTS_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
@@ -1092,22 +1099,71 @@ def _xai_bool_config(value: Any, default: bool = False) -> bool:
 
 
 def _apply_xai_auto_speech_tags(text: str) -> str:
-    """Add light xAI speech tags for more natural voice-mode replies.
+    """Add xAI speech tags for more natural voice-mode replies.
 
-    The transform is intentionally conservative: it only inserts pauses. It
-    never fabricates laughter or whispering, and it leaves explicit user/model
-    speech tags untouched.
+    First applies a conservative local transform (inserts [pause] between
+    paragraphs and after the first sentence). Then, if the result contains
+    no explicit user/model speech tags, asks the configured auxiliary model
+    to rewrite the transcript with a richer set of xAI-supported tags
+    (laughs, sighs, whispers, soft/loud, slow/fast, etc.) so the voice
+    output sounds more expressive. Falls back to the local result on any
+    auxiliary-model failure.
     """
     clean = text.strip()
-    if not clean or _XAI_SPEECH_TAG_RE.search(clean):
+    if not clean:
         return text
 
-    clean = re.sub(r"\n\s*\n+", " [pause] ", clean)
-    clean = re.sub(r"\s*\n\s*", " ", clean)
-    if not _XAI_SPEECH_TAG_RE.search(clean):
-        clean = _XAI_FIRST_SENTENCE_RE.sub(r"\1 [pause] ", clean, count=1)
-    clean = re.sub(r"\s{2,}", " ", clean).strip()
-    return clean
+    # Local conservative pass: pauses only.
+    local = clean
+    local = re.sub(r"\n\s*\n+", " [pause] ", local)
+    local = re.sub(r"\s*\n\s*", " ", local)
+    if not _XAI_SPEECH_TAG_RE.search(local):
+        local = _XAI_FIRST_SENTENCE_RE.sub(r"\1 [pause] ", local, count=1)
+    local = re.sub(r"\s{2,}", " ", local).strip()
+
+    # If the user/model already supplied explicit speech tags, trust them
+    # and don't re-rewrite.
+    if _XAI_SPEECH_TAG_RE.search(clean):
+        return local
+
+    # Auxiliary rewrite for richer emotion tags (mirrors the Gemini path).
+    inline = ", ".join(_XAI_INLINE_SPEECH_TAGS)
+    wrapping = ", ".join(_XAI_WRAPPING_SPEECH_TAGS)
+    system_prompt = (
+        "You rewrite transcripts for the xAI /v1/tts endpoint by inserting "
+        "expressive speech tags.\n\n"
+        "Valid inline tags (use as `[tag]`): " + inline + ".\n"
+        "Valid wrapping tags (use as `[tag]...[/tag]`): " + wrapping + ".\n\n"
+        "Rules:\n"
+        "- Preserve the spoken words, order, and meaning.\n"
+        "- Do not add new spoken sentences or remove existing spoken words.\n"
+        "- Use inline `[tag]` for short modifiers (laughs, sighs, pause, etc.).\n"
+        "- Use wrapping `[tag]...[/tag]` for sustained effects (whisper, soft, slow, fast, loud, etc.).\n"
+        "- Do not use angle-bracket tags like `<tag>...</tag>` — xAI uses BBCode-style closing tags with `[/tag]`.\n"
+        "- Do not use SSML.\n"
+        "- Do not explain or comment.\n"
+        "- Return only the tagged TTS script."
+    )
+    try:
+        from agent.auxiliary_client import call_llm
+
+        response = call_llm(
+            task="tts_audio_tags",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"TRANSCRIPT TO TAG:\n{local}"},
+            ],
+            temperature=0.7,
+        )
+        tagged = _extract_auxiliary_message_content(response).strip()
+        # Strip markdown fences if the LLM wrapped the response.
+        fence = re.fullmatch(r"```(?:[A-Za-z0-9_-]+)?\s*(.*?)\s*```", tagged, flags=re.DOTALL)
+        if fence:
+            tagged = fence.group(1).strip()
+        return tagged or local
+    except Exception as exc:
+        logger.debug("xAI TTS audio tag rewrite failed; using locally-tagged text: %s", exc)
+        return local
 
 
 def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) -> str:
@@ -1135,6 +1191,31 @@ def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) -
         xai_config.get("auto_speech_tags", xai_config.get("speech_tags")),
         DEFAULT_XAI_AUTO_SPEECH_TAGS,
     )
+    # ``tts.xai.speed`` overrides global ``tts.speed``; the xAI TTS API
+    # accepts 0.7..1.5 (1.0 = normal). Out-of-range values are clamped so a
+    # misconfigured agent can't 400 the request — the API would reject
+    # anything outside the band.
+    speed = xai_config.get("speed", tts_config.get("speed"))
+    if speed is not None and speed != "":
+        try:
+            speed = float(speed)
+        except (TypeError, ValueError):
+            speed = None
+    if speed is not None:
+        speed = max(DEFAULT_XAI_SPEED_MIN, min(DEFAULT_XAI_SPEED_MAX, speed))
+    # ``tts.xai.optimize_streaming_latency`` is 0, 1, or 2 (xAI-specific;
+    # trades chunk-boundary quality for time-to-first-audio).
+    optimize_streaming_latency = xai_config.get(
+        "optimize_streaming_latency",
+        tts_config.get("optimize_streaming_latency"),
+    )
+    if optimize_streaming_latency is not None and optimize_streaming_latency != "":
+        try:
+            optimize_streaming_latency = int(optimize_streaming_latency)
+        except (TypeError, ValueError):
+            optimize_streaming_latency = None
+    if optimize_streaming_latency is not None:
+        optimize_streaming_latency = max(0, min(2, optimize_streaming_latency))
     if auto_speech_tags:
         text = _apply_xai_auto_speech_tags(text)
     base_url = str(
@@ -1163,6 +1244,18 @@ def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) -
         if codec == "mp3" and bit_rate:
             output_format["bit_rate"] = bit_rate
         payload["output_format"] = output_format
+    # Only attach `speed` when the caller asked for something other than the
+    # API default (1.0). Keeps the existing minimal-payload contract for
+    # users who never touch the knob.
+    if speed is not None and speed != DEFAULT_XAI_SPEED_DEFAULT:
+        payload["speed"] = speed
+    # Only attach `optimize_streaming_latency` when the caller explicitly
+    # opts in to a non-default value (anything other than 0).
+    if (
+        optimize_streaming_latency is not None
+        and optimize_streaming_latency != DEFAULT_XAI_OPTIMIZE_STREAMING_LATENCY_DEFAULT
+    ):
+        payload["optimize_streaming_latency"] = optimize_streaming_latency
 
     response = requests.post(
         f"{base_url}/tts",
@@ -1889,6 +1982,18 @@ def _generate_piper_tts(text: str, output_path: str, tts_config: Dict[str, Any])
 
     model_path = _resolve_piper_voice_path(voice_name, download_dir)
 
+    # Tolerant speaker_id parse: drop bad input (non-int strings, lists, dicts)
+    # to 0 (Piper's own default). Booleans are rejected outright — True/False
+    # would silently coerce to 1/0 and hide a config mistake.
+    _raw_speaker = piper_config.get("speaker_id", 0)
+    if isinstance(_raw_speaker, bool) or not isinstance(_raw_speaker, int):
+        speaker_id = 0
+    else:
+        speaker_id = _raw_speaker
+
+    # speaker_id is applied per-call via syn_config.speaker_id — the same
+    # PiperVoice instance serves all speakers, so it stays out of the cache
+    # key. Multi-speaker workflows share one model load.
     cache_key = f"{model_path}::cuda={use_cuda}"
     global _piper_voice_cache
     if cache_key not in _piper_voice_cache:
@@ -1903,7 +2008,14 @@ def _generate_piper_tts(text: str, output_path: str, tts_config: Dict[str, Any])
     syn_config = None
     has_advanced = any(
         k in piper_config
-        for k in ("length_scale", "noise_scale", "noise_w_scale", "volume", "normalize_audio")
+        for k in (
+            "length_scale",
+            "noise_scale",
+            "noise_w_scale",
+            "volume",
+            "normalize_audio",
+            "speaker_id",
+        )
     )
     if has_advanced:
         try:
@@ -1914,6 +2026,7 @@ def _generate_piper_tts(text: str, output_path: str, tts_config: Dict[str, Any])
                 noise_w_scale=float(piper_config.get("noise_w_scale", 0.8)),
                 volume=float(piper_config.get("volume", 1.0)),
                 normalize_audio=bool(piper_config.get("normalize_audio", True)),
+                speaker_id=speaker_id,
             )
         except ImportError:
             logger.warning(
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 1b92831df3d..1ea3331b880 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -2139,14 +2139,25 @@ def _apply_model_switch(
     *,
     confirm_expensive_model: bool = False,
     pin_session_override: bool = True,
-    parsed_flags: tuple[str, str, bool, bool] | None = None,
+    parsed_flags: tuple[str, str, bool, bool, bool] | None = None,
 ) -> dict:
-    from hermes_cli.model_switch import parse_model_flags, switch_model
+    from hermes_cli.model_switch import (
+        parse_model_flags,
+        resolve_persist_behavior,
+        switch_model,
+    )
     from hermes_cli.runtime_provider import resolve_runtime_provider
 
     if parsed_flags is None:
         parsed_flags = parse_model_flags(raw_input)
-    model_input, explicit_provider, persist_global, _force_refresh = parsed_flags
+    (
+        model_input,
+        explicit_provider,
+        is_global_flag,
+        _force_refresh,
+        is_session,
+    ) = parsed_flags
+    persist_global = resolve_persist_behavior(is_global_flag, is_session)
     if not model_input:
         raise ValueError("model value required")
 
@@ -7596,7 +7607,7 @@ def _(rid, params: dict) -> dict:
                 from hermes_cli.model_switch import parse_model_flags
 
                 parsed_flags = parse_model_flags(value)
-                _model_input, explicit_provider, _persist_global, _force_refresh = parsed_flags
+                _model_input, explicit_provider, _persist_global, _force_refresh, _is_session = parsed_flags
                 if session.get("agent") is None and not explicit_provider.strip():
                     session_id = params.get("session_id", "")
                     _start_agent_build(session_id, session)
@@ -8462,7 +8473,9 @@ _TUI_EXTRA: list[tuple[str, str, str]] = [
 
 # Commands that queue messages onto _pending_input in the CLI.
 # In the TUI the slash worker subprocess has no reader for that queue,
-# so slash.exec rejects them → TUI falls through to command.dispatch.
+# so slash.exec routes them to command.dispatch internally (which handles
+# them and returns a structured payload) instead of erroring out and
+# relying on a client-side fallback. See #48848.
 _PENDING_INPUT_COMMANDS: frozenset[str] = frozenset(
     {
         "retry",
@@ -9729,8 +9742,16 @@ def _(rid, params: dict) -> dict:
     _cmd_arg = _cmd_parts[1] if len(_cmd_parts) > 1 else ""
 
     if _cmd_base in _PENDING_INPUT_COMMANDS:
-        return _err(
-            rid, 4018, f"pending-input command: use command.dispatch for /{_cmd_base}"
+        # Route directly to command.dispatch instead of returning an error
+        # that requires the frontend to retry.  Some TUI clients fail the
+        # fallback, leaving the command empty and showing "empty command".
+        return _methods["command.dispatch"](
+            rid,
+            {
+                "name": _cmd_base,
+                "arg": _cmd_arg,
+                "session_id": params.get("session_id", ""),
+            },
         )
 
     if _cmd_base in _WORKER_BLOCKED_COMMANDS:
diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts
index a671063e5e9..8f49dd9a513 100644
--- a/ui-tui/src/__tests__/createSlashHandler.test.ts
+++ b/ui-tui/src/__tests__/createSlashHandler.test.ts
@@ -2,13 +2,30 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'
 
 import { createSlashHandler } from '../app/createSlashHandler.js'
 import { getOverlayState, resetOverlayState } from '../app/overlayStore.js'
+import { DASHBOARD_EXIT_DISABLED_MESSAGE, DASHBOARD_UPDATE_DISABLED_MESSAGE } from '../app/slash/commands/core.js'
 import { getUiState, patchUiState, resetUiState } from '../app/uiStore.js'
 import { TUI_SESSION_MODEL_FLAG } from '../domain/slash.js'
 
+// DASHBOARD_TUI_MODE resolves once at module load from HERMES_TUI_DASHBOARD,
+// so toggling process.env in a test body can't move it. Mock just that one
+// export (everything else stays real) and flip the holder per test.
+const envState = { dashboardTuiMode: false }
+vi.mock('../config/env.js', async importActual => {
+  const actual = await importActual<typeof import('../config/env.js')>()
+
+  return {
+    ...actual,
+    get DASHBOARD_TUI_MODE() {
+      return envState.dashboardTuiMode
+    }
+  }
+})
+
 describe('createSlashHandler', () => {
   beforeEach(() => {
     resetOverlayState()
     resetUiState()
+    envState.dashboardTuiMode = false
   })
 
   it('opens the unified sessions overlay for /resume', () => {
@@ -68,6 +85,24 @@ describe('createSlashHandler', () => {
     expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
   })
 
+  it('keeps hosted dashboard chat alive for /exit', () => {
+    envState.dashboardTuiMode = true
+    const ctx = buildCtx()
+
+    expect(createSlashHandler(ctx)('/exit')).toBe(true)
+    expect(ctx.session.die).not.toHaveBeenCalled()
+    expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
+    expect(ctx.transcript.sys).toHaveBeenCalledWith(DASHBOARD_EXIT_DISABLED_MESSAGE)
+  })
+
+  it('keeps /quit available outside hosted dashboard chat', () => {
+    envState.dashboardTuiMode = false
+    const ctx = buildCtx()
+
+    expect(createSlashHandler(ctx)('/quit')).toBe(true)
+    expect(ctx.session.die).toHaveBeenCalledTimes(1)
+  })
+
   it('handles /update locally and exits with code 42 via dieWithCode', () => {
     vi.useFakeTimers()
     const ctx = buildCtx()
@@ -83,6 +118,22 @@ describe('createSlashHandler', () => {
     vi.useRealTimers()
   })
 
+  it('refuses /update in hosted dashboard chat instead of killing the PTY', () => {
+    vi.useFakeTimers()
+    envState.dashboardTuiMode = true
+    const ctx = buildCtx()
+
+    expect(createSlashHandler(ctx)('/update')).toBe(true)
+    expect(ctx.session.dieWithCode).not.toHaveBeenCalled()
+    expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
+    expect(ctx.transcript.sys).toHaveBeenCalledWith(DASHBOARD_UPDATE_DISABLED_MESSAGE)
+
+    vi.advanceTimersByTime(150)
+    expect(ctx.session.dieWithCode).not.toHaveBeenCalled()
+
+    vi.useRealTimers()
+  })
+
   it('routes /status to live session.status instead of slash worker', async () => {
     patchUiState({ sid: 'sid-abc' })
     const rpc = vi.fn(() => Promise.resolve({ output: 'Hermes TUI Status' }))
diff --git a/ui-tui/src/__tests__/gatewayClient.test.ts b/ui-tui/src/__tests__/gatewayClient.test.ts
index a872a008ddb..43d96add35a 100644
--- a/ui-tui/src/__tests__/gatewayClient.test.ts
+++ b/ui-tui/src/__tests__/gatewayClient.test.ts
@@ -187,6 +187,46 @@ describe('GatewayClient websocket attach mode', () => {
     gw.kill()
   })
 
+  it('publishes local dashboard-control events to the sidecar websocket', async () => {
+    process.env.HERMES_TUI_GATEWAY_URL = 'ws://gateway.test/api/ws?token=abc'
+    process.env.HERMES_TUI_SIDECAR_URL = 'ws://gateway.test/api/pub?token=abc&channel=demo'
+
+    const gw = new GatewayClient()
+    const seen: string[] = []
+
+    gw.on('event', ev => seen.push(ev.type))
+    gw.start()
+
+    const gatewaySocket = FakeWebSocket.instances[0]!
+
+    gatewaySocket.open()
+    await vi.waitFor(() => expect(FakeWebSocket.instances).toHaveLength(2))
+
+    const sidecarSocket = FakeWebSocket.instances[1]!
+
+    sidecarSocket.open()
+    gw.drain()
+
+    gw.publishLocalEvent({
+      payload: { reason: 'idle_exit_hotkey' },
+      session_id: 'sid-old',
+      type: 'dashboard.new_session_requested'
+    })
+
+    expect(seen).toContain('dashboard.new_session_requested')
+    expect(JSON.parse(sidecarSocket.sent.at(-1) ?? '{}')).toEqual({
+      jsonrpc: '2.0',
+      method: 'event',
+      params: {
+        payload: { reason: 'idle_exit_hotkey' },
+        session_id: 'sid-old',
+        type: 'dashboard.new_session_requested'
+      }
+    })
+
+    gw.kill()
+  })
+
   it('emits exit when attached websocket closes', () => {
     process.env.HERMES_TUI_GATEWAY_URL = 'ws://gateway.test/api/ws?token=abc'
     const gw = new GatewayClient()
diff --git a/ui-tui/src/__tests__/gracefulExit.test.ts b/ui-tui/src/__tests__/gracefulExit.test.ts
new file mode 100644
index 00000000000..6c805dfce7c
--- /dev/null
+++ b/ui-tui/src/__tests__/gracefulExit.test.ts
@@ -0,0 +1,11 @@
+import { describe, expect, it } from 'vitest'
+
+import { shouldExitForSignal } from '../lib/gracefulExit.js'
+
+describe('shouldExitForSignal', () => {
+  it('ignores only the signals explicitly disabled for embedded dashboard chat', () => {
+    expect(shouldExitForSignal('SIGINT', ['SIGINT'])).toBe(false)
+    expect(shouldExitForSignal('SIGTERM', ['SIGINT'])).toBe(true)
+    expect(shouldExitForSignal('SIGHUP', ['SIGINT'])).toBe(true)
+  })
+})
diff --git a/ui-tui/src/__tests__/textInputFastEcho.test.ts b/ui-tui/src/__tests__/textInputFastEcho.test.ts
index 6221314a062..98928d1baf1 100644
--- a/ui-tui/src/__tests__/textInputFastEcho.test.ts
+++ b/ui-tui/src/__tests__/textInputFastEcho.test.ts
@@ -178,6 +178,43 @@ describe('supportsFastEchoTerminal', () => {
     expect(supportsFastEchoTerminal({ TERM_PROGRAM: 'Apple_Terminal' } as NodeJS.ProcessEnv)).toBe(false)
   })
 
+  it('disables fast-echo inside tmux', () => {
+    expect(supportsFastEchoTerminal({ TMUX: '/tmp/tmux-1000/default,1234,0' } as NodeJS.ProcessEnv)).toBe(false)
+    expect(supportsFastEchoTerminal({ TMUX: '/private/tmp/tmux-501/default' } as NodeJS.ProcessEnv)).toBe(false)
+  })
+
+  it('tmux wins over Termux fast-echo opt-in', () => {
+    expect(
+      supportsFastEchoTerminal({
+        TMUX: '/tmp/tmux-1000/default,1234,0',
+        HERMES_TUI_TERMUX_FAST_ECHO: '1',
+        TERMUX_VERSION: '0.118.0'
+      } as NodeJS.ProcessEnv)
+    ).toBe(false)
+  })
+
+  it('keeps fast-echo enabled when TMUX is empty or unset', () => {
+    expect(supportsFastEchoTerminal({ TMUX: '' } as NodeJS.ProcessEnv)).toBe(true)
+    expect(supportsFastEchoTerminal({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv)).toBe(true)
+  })
+
+  it('disables fast-echo when only a tmux-flavored TERM is present (SSH from tmux, no TMUX forwarded)', () => {
+    // OpenSSH forwards TERM but not TMUX, so a TUI on a remote host launched
+    // from inside local tmux sees TERM=tmux-256color with no TMUX var. The
+    // cursor-drift bug still applies, so fast-echo must stay off.
+    expect(supportsFastEchoTerminal({ TERM: 'tmux' } as NodeJS.ProcessEnv)).toBe(false)
+    expect(supportsFastEchoTerminal({ TERM: 'tmux-256color' } as NodeJS.ProcessEnv)).toBe(false)
+  })
+
+  it('does NOT disable fast-echo for screen-flavored TERM (GNU screen out of scope, no reported drift)', () => {
+    // GNU screen sets TERM=screen/screen-256color and has no reported drift.
+    // We must not widen the tmux guard to screen* and regress its perf.
+    expect(supportsFastEchoTerminal({ TERM: 'screen' } as NodeJS.ProcessEnv)).toBe(true)
+    expect(supportsFastEchoTerminal({ TERM: 'screen-256color' } as NodeJS.ProcessEnv)).toBe(true)
+    // And an unrelated 256color TERM must stay enabled.
+    expect(supportsFastEchoTerminal({ TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(true)
+  })
+
   it('disables fast-echo by default in Termux mode', () => {
     expect(
       supportsFastEchoTerminal({ TERMUX_VERSION: '0.118.0', PREFIX: '/data/data/com.termux/files/usr' } as NodeJS.ProcessEnv)
diff --git a/ui-tui/src/__tests__/useInputHandlers.test.ts b/ui-tui/src/__tests__/useInputHandlers.test.ts
index 0d3fd69c1ed..fa9372d5356 100644
--- a/ui-tui/src/__tests__/useInputHandlers.test.ts
+++ b/ui-tui/src/__tests__/useInputHandlers.test.ts
@@ -1,6 +1,11 @@
 import { describe, expect, it, vi } from 'vitest'
 
-import { applyVoiceRecordResponse, shouldFallThroughForScroll } from '../app/useInputHandlers.js'
+import {
+  applyVoiceRecordResponse,
+  handleIdleHotkeyExit,
+  shouldAllowIdleHotkeyExit,
+  shouldFallThroughForScroll
+} from '../app/useInputHandlers.js'
 
 const baseKey = {
   downArrow: false,
@@ -42,6 +47,38 @@ describe('shouldFallThroughForScroll — keep transcript scrolling alive during
   })
 })
 
+describe('shouldAllowIdleHotkeyExit', () => {
+  it('keeps idle exit hotkeys enabled in normal terminals', () => {
+    expect(shouldAllowIdleHotkeyExit(false)).toBe(true)
+  })
+
+  it('disables idle exit hotkeys in dashboard chat', () => {
+    expect(shouldAllowIdleHotkeyExit(true)).toBe(false)
+  })
+})
+
+describe('handleIdleHotkeyExit', () => {
+  it('exits in normal terminals', () => {
+    const actions = { die: vi.fn(), sys: vi.fn() }
+
+    handleIdleHotkeyExit(actions, false)
+
+    expect(actions.die).toHaveBeenCalledTimes(1)
+    expect(actions.sys).not.toHaveBeenCalled()
+  })
+
+  it('asks the dashboard for a fresh chat instead of leaving a ghost session', () => {
+    const actions = { die: vi.fn(), sys: vi.fn() }
+    const requestDashboardNewSession = vi.fn()
+
+    handleIdleHotkeyExit(actions, true, requestDashboardNewSession)
+
+    expect(actions.die).not.toHaveBeenCalled()
+    expect(requestDashboardNewSession).toHaveBeenCalledTimes(1)
+    expect(actions.sys).toHaveBeenCalledWith('starting a fresh dashboard chat...')
+  })
+})
+
 describe('applyVoiceRecordResponse', () => {
   it('reverts optimistic REC state when the gateway reports voice busy', () => {
     const setProcessing = vi.fn()
diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts
index 5c021dbcdf9..5c74eb3eb42 100644
--- a/ui-tui/src/app/slash/commands/core.ts
+++ b/ui-tui/src/app/slash/commands/core.ts
@@ -1,6 +1,6 @@
 import { forceRedraw, type MouseTrackingMode } from '@hermes/ink'
 
-import { NO_CONFIRM_DESTRUCTIVE } from '../../../config/env.js'
+import { DASHBOARD_TUI_MODE, NO_CONFIRM_DESTRUCTIVE } from '../../../config/env.js'
 import { dailyFortune, randomFortune } from '../../../content/fortunes.js'
 import { HOTKEYS } from '../../../content/hotkeys.js'
 import { isSectionName, nextDetailsMode, parseDetailsMode, SECTION_NAMES } from '../../../domain/details.js'
@@ -76,6 +76,14 @@ const DETAILS_USAGE =
 
 const DETAILS_SECTION_USAGE = 'usage: /details <section> [hidden|collapsed|expanded|reset]'
 
+// Shown when /exit or /quit is refused in the hosted dashboard chat. Kept as a
+// constant so the test asserts against the same source of truth as production.
+export const DASHBOARD_EXIT_DISABLED_MESSAGE =
+  'exit is disabled in hosted dashboard chat — use /new to start a fresh session'
+
+export const DASHBOARD_UPDATE_DISABLED_MESSAGE =
+  'update is disabled in hosted dashboard chat — the hosted environment is managed separately'
+
 export const coreCommands: SlashCommand[] = [
   {
     help: 'list commands + hotkeys',
@@ -113,13 +121,34 @@ export const coreCommands: SlashCommand[] = [
     aliases: ['exit'],
     help: 'exit hermes',
     name: 'quit',
-    run: (_arg, ctx) => ctx.session.die()
+    run: (_arg, ctx) => {
+      // In the hosted dashboard chat there is no in-page restart path after
+      // the PTY child exits, so quitting bricks the tab until a refresh. The
+      // keyboard idle-exit (Ctrl+C / Ctrl+D) and SIGINT handling already refuse
+      // to die in this mode (see useInputHandlers + entry.tsx); gate /exit and
+      // /quit on the same DASHBOARD_TUI_MODE flag. Unlike the keyboard path
+      // (which auto-starts a fresh chat), the explicit quit command refuses and
+      // instructs the user to run /new themselves.
+      if (DASHBOARD_TUI_MODE) {
+        ctx.transcript.sys(DASHBOARD_EXIT_DISABLED_MESSAGE)
+
+        return
+      }
+
+      ctx.session.die()
+    }
   },
 
   {
     help: 'update Hermes Agent to the latest version (exits TUI)',
     name: 'update',
     run: (_arg, ctx) => {
+      if (DASHBOARD_TUI_MODE) {
+        ctx.transcript.sys(DASHBOARD_UPDATE_DISABLED_MESSAGE)
+
+        return
+      }
+
       ctx.transcript.sys('exiting TUI to run update...')
       // Exit code 42 signals the Python wrapper to exec `hermes update`.
       // Use dieWithCode for proper cleanup (gateway kill + Ink unmount).
diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts
index 20d3493f547..f19cccfe5b5 100644
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -2,6 +2,7 @@ import { forceRedraw, useInput } from '@hermes/ink'
 import { useStore } from '@nanostores/react'
 import { useEffect, useRef } from 'react'
 
+import { DASHBOARD_TUI_MODE } from '../config/env.js'
 import { TYPING_IDLE_MS } from '../config/timing.js'
 import type {
   ApprovalRespondResponse,
@@ -15,13 +16,30 @@ import { computePrecisionWheelStep, initPrecisionWheel } from '../lib/precisionW
 import { computeWheelStep, initWheelAccelForHost } from '../lib/wheelAccel.js'
 
 import { getInputSelection } from './inputSelectionStore.js'
-import type { InputHandlerContext, InputHandlerResult } from './interfaces.js'
+import type { InputHandlerActions, InputHandlerContext, InputHandlerResult } from './interfaces.js'
 import { $isBlocked, $overlayState, patchOverlayState } from './overlayStore.js'
 import { turnController } from './turnController.js'
 import { patchTurnState } from './turnStore.js'
 import { getUiState } from './uiStore.js'
 
 const isCtrl = (key: { ctrl: boolean }, ch: string, target: string) => key.ctrl && ch.toLowerCase() === target
+const DASHBOARD_NEW_SESSION_MESSAGE = 'starting a fresh dashboard chat...'
+
+export const shouldAllowIdleHotkeyExit = (dashboardTuiMode = DASHBOARD_TUI_MODE) => !dashboardTuiMode
+
+export function handleIdleHotkeyExit(
+  actions: Pick<InputHandlerActions, 'die' | 'sys'>,
+  dashboardTuiMode = DASHBOARD_TUI_MODE,
+  requestDashboardNewSession?: () => void
+) {
+  if (!shouldAllowIdleHotkeyExit(dashboardTuiMode)) {
+    requestDashboardNewSession?.()
+
+    return actions.sys(DASHBOARD_NEW_SESSION_MESSAGE)
+  }
+
+  return actions.die()
+}
 
 /**
  * Approval / clarify / confirm overlays mount their own `useInput` handlers
@@ -505,11 +523,23 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
         return cActions.clearIn()
       }
 
-      return actions.die()
+      return handleIdleHotkeyExit(actions, DASHBOARD_TUI_MODE, () => {
+        gateway.gw.publishLocalEvent({
+          payload: { reason: 'idle_exit_hotkey' },
+          session_id: live.sid ?? undefined,
+          type: 'dashboard.new_session_requested'
+        })
+      })
     }
 
     if (isAction(key, ch, 'd')) {
-      return actions.die()
+      return handleIdleHotkeyExit(actions, DASHBOARD_TUI_MODE, () => {
+        gateway.gw.publishLocalEvent({
+          payload: { reason: 'idle_exit_hotkey' },
+          session_id: live.sid ?? undefined,
+          type: 'dashboard.new_session_requested'
+        })
+      })
     }
 
     if (isAction(key, ch, 'l')) {
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index 564484999f6..deb22914695 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -359,6 +359,22 @@ export function supportsFastEchoTerminal(env: NodeJS.ProcessEnv = process.env):
     return false
   }
 
+  // tmux adds a PTY multiplexing layer that desyncs stdout.write() cursor
+  // advances from its internal cursor model, causing cursor drift and ghost
+  // whitespace under the fast-echo bypass path.
+  //
+  // `TMUX` catches the local case. It is NOT forwarded over SSH, so when the
+  // TUI runs on a remote host launched from inside local tmux we only see a
+  // tmux-flavored `TERM` (tmux sets `tmux`/`tmux-256color`); match that too so
+  // remote-over-tmux sessions still fall back to the safe render path. We
+  // deliberately do NOT match `screen*`: GNU screen sets the same TERM and has
+  // no reported drift, so widening to screen would disable the optimization for
+  // those users with no evidence of a bug.
+  const term = (env.TERM ?? '').trim().toLowerCase()
+  if ((env.TMUX ?? '').trim().length > 0 || term === 'tmux' || term.startsWith('tmux-')) {
+    return false
+  }
+
   // Termux terminals are especially sensitive to bypass-path cursor drift and
   // stale paints at soft-wrap boundaries on tall/narrow viewports. Keep this
   // off by default in Termux mode; allow explicit opt-in for local debugging.
diff --git a/ui-tui/src/config/env.ts b/ui-tui/src/config/env.ts
index 3b5b9bee4d4..843512ed76a 100644
--- a/ui-tui/src/config/env.ts
+++ b/ui-tui/src/config/env.ts
@@ -1,4 +1,5 @@
 import type { MouseTrackingMode } from '@hermes/ink'
+
 import { isTermuxTuiMode } from '../lib/termux.js'
 
 const truthy = (v?: string) => /^(?:1|true|yes|on)$/i.test((v ?? '').trim())
@@ -43,12 +44,19 @@ export const STARTUP_IMAGE = (process.env.HERMES_TUI_IMAGE ?? '').trim()
 //   behavior.
 const mouseTrackingOverride = parseToggle(process.env.HERMES_TUI_MOUSE_TRACKING)
 const mouseTrackingDisabledLegacy = truthy(process.env.HERMES_TUI_DISABLE_MOUSE)
+
 const resolvedBootMouseEnabled =
   mouseTrackingOverride ?? (TERMUX_TUI_MODE ? false : !mouseTrackingDisabledLegacy)
+
 export const MOUSE_TRACKING: MouseTrackingMode = resolvedBootMouseEnabled ? 'all' : 'off'
 
 export const NO_CONFIRM_DESTRUCTIVE = truthy(process.env.HERMES_TUI_NO_CONFIRM)
 
+// Set by the dashboard PTY launcher. This is intentionally narrower than
+// INLINE_MODE: users can opt into inline terminal rendering locally, but the
+// browser-embedded TUI has no healthy restart path after an idle exit.
+export const DASHBOARD_TUI_MODE = truthy(process.env.HERMES_TUI_DASHBOARD)
+
 // HERMES_DEV_CREDITS — dev-only live-spend readout (Δ status segment + "(dev credits)"
 // banner). Throwaway dev scaffolding; the whole readout gates on this one flag.
 export const DEV_CREDITS_MODE = truthy(process.env.HERMES_DEV_CREDITS)
diff --git a/ui-tui/src/entry.tsx b/ui-tui/src/entry.tsx
index 22fee6bccbd..de60d966760 100644
--- a/ui-tui/src/entry.tsx
+++ b/ui-tui/src/entry.tsx
@@ -5,7 +5,7 @@ import './lib/forceTruecolor.js'
 
 import type { FrameEvent } from '@hermes/ink'
 
-import { TERMUX_TUI_MODE } from './config/env.js'
+import { DASHBOARD_TUI_MODE, TERMUX_TUI_MODE } from './config/env.js'
 import { GatewayClient } from './gatewayClient.js'
 import { setupGracefulExit } from './lib/gracefulExit.js'
 import { formatBytes, type HeapDumpResult, performHeapDump } from './lib/memory.js'
@@ -76,7 +76,12 @@ setupGracefulExit({
     recordParentLifecycle(`graceful-exit received signal=${signal} → killing gateway`)
     resetTerminalModes()
     process.stderr.write(`hermes-tui lifecycle: received ${signal}\n`)
-  }
+  },
+  // The dashboard chat tab has no in-page restart path after the PTY child
+  // exits. Ignore SIGINT there so Ctrl+C cannot kill the embedded TUI if raw
+  // mode briefly drops and the terminal driver turns the keystroke into a
+  // signal instead of input bytes. SIGTERM/SIGHUP still cleanly shut down.
+  ignoredSignals: DASHBOARD_TUI_MODE ? ['SIGINT'] : []
 })
 
 const stopMemoryMonitor = startMemoryMonitor({
diff --git a/ui-tui/src/gatewayClient.ts b/ui-tui/src/gatewayClient.ts
index 5dfbe880fb1..88ddc0fcdc3 100644
--- a/ui-tui/src/gatewayClient.ts
+++ b/ui-tui/src/gatewayClient.ts
@@ -307,6 +307,13 @@ export class GatewayClient extends EventEmitter {
     }
   }
 
+  publishLocalEvent(ev: GatewayEvent) {
+    const frame = JSON.stringify({ jsonrpc: '2.0', method: 'event', params: ev })
+
+    this.mirrorEventToSidecar(frame)
+    this.publish(ev)
+  }
+
   private handleWebSocketFrame(raw: unknown) {
     const text = asWireText(raw)
 
diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts
index 016171008c1..74a6f7627d1 100644
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -634,6 +634,7 @@ export type GatewayEvent =
     }
   | { payload?: { state?: 'idle' | 'listening' | 'transcribing' }; session_id?: string; type: 'voice.status' }
   | { payload?: { no_speech_limit?: boolean; text?: string }; session_id?: string; type: 'voice.transcript' }
+  | { payload?: { reason?: string }; session_id?: string; type: 'dashboard.new_session_requested' }
   | { payload: { line: string }; session_id?: string; type: 'gateway.stderr' }
   | {
       payload?: { level?: 'info' | 'warn' | 'error'; message?: string }
diff --git a/ui-tui/src/lib/gracefulExit.ts b/ui-tui/src/lib/gracefulExit.ts
index 2896fd12651..089269ac1ae 100644
--- a/ui-tui/src/lib/gracefulExit.ts
+++ b/ui-tui/src/lib/gracefulExit.ts
@@ -1,11 +1,16 @@
 interface SetupOptions {
   cleanups?: (() => Promise<void> | void)[]
   failsafeMs?: number
+  ignoredSignals?: GracefulSignal[]
   onError?: (scope: 'uncaughtException' | 'unhandledRejection', err: unknown) => void
   onSignal?: (signal: NodeJS.Signals) => void
 }
 
-const SIGNAL_EXIT_CODE: Record<'SIGHUP' | 'SIGINT' | 'SIGTERM', number> = {
+export type GracefulSignal = 'SIGHUP' | 'SIGINT' | 'SIGTERM'
+
+const SIGNALS: readonly GracefulSignal[] = ['SIGINT', 'SIGTERM', 'SIGHUP']
+
+const SIGNAL_EXIT_CODE: Record<GracefulSignal, number> = {
   SIGHUP: 129,
   SIGINT: 130,
   SIGTERM: 143
@@ -13,7 +18,16 @@ const SIGNAL_EXIT_CODE: Record<'SIGHUP' | 'SIGINT' | 'SIGTERM', number> = {
 
 let wired = false
 
-export function setupGracefulExit({ cleanups = [], failsafeMs = 4000, onError, onSignal }: SetupOptions = {}) {
+export const shouldExitForSignal = (signal: GracefulSignal, ignoredSignals: readonly GracefulSignal[] = []) =>
+  !ignoredSignals.includes(signal)
+
+export function setupGracefulExit({
+  cleanups = [],
+  failsafeMs = 4000,
+  ignoredSignals = [],
+  onError,
+  onSignal
+}: SetupOptions = {}) {
   if (wired) {
     return
   }
@@ -38,8 +52,14 @@ export function setupGracefulExit({ cleanups = [], failsafeMs = 4000, onError, o
     void Promise.allSettled(cleanups.map(fn => Promise.resolve().then(fn))).finally(() => process.exit(code))
   }
 
-  for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP'] as const) {
-    process.on(sig, () => exit(SIGNAL_EXIT_CODE[sig], sig))
+  for (const sig of SIGNALS) {
+    process.on(sig, () => {
+      if (!shouldExitForSignal(sig, ignoredSignals)) {
+        return
+      }
+
+      exit(SIGNAL_EXIT_CODE[sig], sig)
+    })
   }
 
   process.on('uncaughtException', err => onError?.('uncaughtException', err))
diff --git a/uv.lock b/uv.lock
index fc340bdbe89..095b7563311 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1713,7 +1713,7 @@ requires-dist = [
     { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==1.3.0" },
     { name = "python-dotenv", specifier = "==1.2.2" },
     { name = "python-multipart", specifier = ">=0.0.9,<1" },
-    { name = "python-multipart", marker = "extra == 'web'", specifier = "==0.0.20" },
+    { name = "python-multipart", marker = "extra == 'web'", specifier = "==0.0.27" },
     { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'messaging'", specifier = "==22.6" },
     { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'termux'", specifier = "==22.6" },
     { name = "pywinpty", marker = "sys_platform == 'win32'", specifier = ">=2.0.0,<3" },
@@ -3317,11 +3317,11 @@ wheels = [
 
 [[package]]
 name = "python-multipart"
-version = "0.0.20"
+version = "0.0.27"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158, upload-time = "2024-12-16T19:45:46.972Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/9b/f23807317a113dc36e74e75eb265a02dd1a4d9082abc3c1064acd22997c4/python_multipart-0.0.27.tar.gz", hash = "sha256:9870a6a8c5a20a5bf4f07c017bd1489006ff8836cff097b6933355ee2b49b602", size = 44043, upload-time = "2026-04-27T10:51:26.649Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload-time = "2024-12-16T19:45:44.423Z" },
+    { url = "https://files.pythonhosted.org/packages/99/78/4126abcbdbd3c559d43e0db7f7b9173fc6befe45d39a2856cc0b8ec2a5a6/python_multipart-0.0.27-py3-none-any.whl", hash = "sha256:6fccfad17a27334bd0193681b369f476eda3409f17381a2d65aa7df3f7275645", size = 29254, upload-time = "2026-04-27T10:51:24.997Z" },
 ]
 
 [[package]]
diff --git a/web/package.json b/web/package.json
index 665a780c71d..6666773c737 100644
--- a/web/package.json
+++ b/web/package.json
@@ -8,7 +8,8 @@
     "build": "tsc -b && vite build",
     "lint": "eslint .",
     "preview": "vite preview",
-    "typecheck": "tsc -p . --noEmit"
+    "typecheck": "tsc -p . --noEmit",
+    "test": "vitest run"
   },
   "dependencies": {
     "@nous-research/ui": "0.18.2",
@@ -48,6 +49,7 @@
     "three": "^0.180.0",
     "typescript": "^6.0.3",
     "typescript-eslint": "^8.56.1",
-    "vite": "^8.0.16"
+    "vite": "^8.0.16",
+    "vitest": "^4.1.5"
   }
 }
diff --git a/web/src/components/ChatSessionList.tsx b/web/src/components/ChatSessionList.tsx
new file mode 100644
index 00000000000..c1988681f35
--- /dev/null
+++ b/web/src/components/ChatSessionList.tsx
@@ -0,0 +1,260 @@
+/**
+ * ChatSessionList — a ChatGPT-style conversation switcher that sits beside
+ * the embedded TUI on the dashboard Chat tab.
+ *
+ * It lists the most recent sessions for the active management profile and
+ * lets the user swap between them without leaving the Chat page. Selecting
+ * a row sets `/chat?resume=<id>`; ChatPage treats the resume target as part
+ * of the PTY identity, so the change tears down the current terminal child
+ * and respawns it resuming that conversation (see ChatPage.tsx). The
+ * "New session" action clears the resume param, which spawns a fresh PTY.
+ *
+ * Best-effort, like ChatSidebar: a failed fetch surfaces a small inline
+ * error with a retry affordance and the terminal pane keeps working.
+ *
+ * This is a navigation surface, NOT a session-management one — delete,
+ * rename, export, and bulk actions live on the Sessions page. Keeping this
+ * panel read-only (plus select / new) avoids duplicating that machinery and
+ * keeps the chat context focused on switching conversations quickly.
+ */
+
+import { Button } from "@nous-research/ui/ui/components/button";
+import { ListItem } from "@nous-research/ui/ui/components/list-item";
+import { Spinner } from "@nous-research/ui/ui/components/spinner";
+import { AlertCircle, MessageSquarePlus, RefreshCw } from "lucide-react";
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import { useSearchParams } from "react-router-dom";
+
+import { useI18n } from "@/i18n";
+import { api, type SessionInfo } from "@/lib/api";
+import { cn, timeAgo } from "@/lib/utils";
+
+const SESSION_LIMIT = 30;
+interface ChatSessionListProps {
+  /** Active resume target (the session currently shown in the terminal). */
+  activeSessionId: string | null;
+  /** Management profile from the dashboard switcher — scopes the listing. */
+  profile?: string;
+  className?: string;
+  /** Optional callback fired after a row is picked (e.g. close mobile sheet). */
+  onPicked?: () => void;
+  /**
+   * Starts a fresh chat. ChatPage supplies its `startFreshDashboardChat`,
+   * which clears `?resume` AND bumps the reconnect nonce so a brand-new PTY
+   * spawns even when the user is already on an unsaved fresh session. When
+   * omitted, we fall back to clearing the resume param ourselves.
+   */
+  onNewChat?: () => void;
+}
+
+function rowLabel(session: SessionInfo, untitled: string): string {
+  const title = session.title?.trim();
+  if (title && title !== "Untitled") return title;
+  const preview = session.preview?.trim();
+  if (preview) return preview;
+  return untitled;
+}
+
+export function ChatSessionList({
+  activeSessionId,
+  profile,
+  className,
+  onPicked,
+  onNewChat,
+}: ChatSessionListProps) {
+  const { t } = useI18n();
+  const [, setSearchParams] = useSearchParams();
+  const [sessions, setSessions] = useState<SessionInfo[] | null>(null);
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+  // Bumped to force a refetch (after switching, on Refresh, on mount).
+  const [reloadNonce, setReloadNonce] = useState(0);
+
+  // `profile` is read inside the fetch; it's part of the scope key so a
+  // profile switch refetches. The empty-string fallback keeps the dep
+  // stable when no profile is selected (default profile).
+  const scopeKey = profile ?? "";
+
+  // Monotonic request token: only the most recent fetch is allowed to
+  // commit state, so a fast profile switch (or Refresh spam) can't land a
+  // stale list out of order.
+  const reqRef = useRef(0);
+
+  const load = useCallback(() => {
+    const myReq = ++reqRef.current;
+    setLoading(true);
+    setError(null);
+    api
+      .getSessions(SESSION_LIMIT, 0, scopeKey)
+      .then((res) => {
+        if (reqRef.current !== myReq) return;
+        setSessions(res.sessions);
+      })
+      .catch((e: Error) => {
+        if (reqRef.current !== myReq) return;
+        setError(e.message || "failed to load sessions");
+      })
+      .finally(() => {
+        if (reqRef.current === myReq) setLoading(false);
+      });
+  }, [scopeKey]);
+
+  useEffect(() => {
+    // Dashboard data surfaces fetch from an effect on mount + scope change;
+    // keep this local and explicit until the shared lint profile is updated
+    // for async loaders (matches FilesPage).
+    // eslint-disable-next-line react-hooks/set-state-in-effect
+    load();
+    // `reloadNonce` is a manual refetch trigger (Refresh button / row pick).
+  }, [load, reloadNonce]);
+
+  const reload = useCallback(() => setReloadNonce((n) => n + 1), []);
+
+  // Picking a row sets `/chat?resume=<id>`. Re-picking the row already in
+  // the terminal is a no-op (avoids a needless PTY teardown).
+  const pick = useCallback(
+    (id: string) => {
+      onPicked?.();
+      if (id === activeSessionId) return;
+      setSearchParams(
+        (prev) => {
+          const next = new URLSearchParams(prev);
+          next.set("resume", id);
+          return next;
+        },
+        { replace: false },
+      );
+    },
+    [activeSessionId, onPicked, setSearchParams],
+  );
+
+  // "New chat" prefers ChatPage's robust handler (clears resume + forces a
+  // PTY respawn even from an already-fresh session). Fallback: clear the
+  // resume param ourselves, which spawns a fresh PTY whenever one was being
+  // resumed. Session management (delete/rename/export) lives on the Sessions
+  // page; this panel only switches and starts conversations.
+  const startNew = useCallback(() => {
+    onPicked?.();
+    if (onNewChat) {
+      onNewChat();
+      return;
+    }
+    setSearchParams(
+      (prev) => {
+        const next = new URLSearchParams(prev);
+        next.delete("resume");
+        return next;
+      },
+      { replace: false },
+    );
+  }, [onNewChat, onPicked, setSearchParams]);
+
+  const content = useMemo(() => {
+    if (loading && sessions === null) {
+      return (
+        <div className="flex items-center justify-center gap-2 px-2 py-6 text-xs text-text-secondary">
+          <Spinner /> {t.common.loading}
+        </div>
+      );
+    }
+    if (error) {
+      return (
+        <div className="flex flex-col items-start gap-2 px-2 py-4 text-xs">
+          <div className="flex items-start gap-2 text-destructive">
+            <AlertCircle className="mt-0.5 h-3.5 w-3.5 shrink-0" />
+            <span className="wrap-break-word">{error}</span>
+          </div>
+          <Button size="sm" outlined onClick={reload} prefix={<RefreshCw />}>
+            {t.common.retry}
+          </Button>
+        </div>
+      );
+    }
+    if (!sessions || sessions.length === 0) {
+      return (
+        <div className="px-2 py-6 text-center text-xs text-text-secondary">
+          {t.sessions.noSessions}
+        </div>
+      );
+    }
+    return (
+      <div className="flex flex-col gap-0.5">
+        {sessions.map((s) => {
+          const isActive = s.id === activeSessionId;
+          return (
+            <ListItem
+              key={s.id}
+              onClick={() => pick(s.id)}
+              aria-current={isActive ? "true" : undefined}
+              className={cn(
+                "flex-col items-start gap-0.5 rounded px-2 py-1.5",
+                "normal-case tracking-normal",
+                isActive
+                  ? "bg-primary/10 text-foreground border-l-2 border-primary"
+                  : "text-text-secondary hover:bg-midground/5 hover:text-foreground",
+              )}
+            >
+              <span className="w-full truncate text-sm font-medium">
+                {rowLabel(s, t.sessions.untitledSession)}
+              </span>
+              <span className="flex w-full items-center gap-1.5 text-[0.6875rem] text-text-tertiary">
+                <span>{timeAgo(s.last_active)}</span>
+                {s.message_count > 0 && (
+                  <>
+                    <span aria-hidden>·</span>
+                    <span>{s.message_count} msgs</span>
+                  </>
+                )}
+                {s.source && s.source !== "cli" && (
+                  <>
+                    <span aria-hidden>·</span>
+                    <span className="truncate">{s.source}</span>
+                  </>
+                )}
+              </span>
+            </ListItem>
+          );
+        })}
+      </div>
+    );
+  }, [activeSessionId, error, loading, pick, reload, sessions, t]);
+
+  return (
+    <aside
+      className={cn(
+        "flex h-full w-full min-w-0 shrink-0 flex-col overflow-hidden",
+        className,
+      )}
+    >
+      <div className="flex items-center justify-between gap-2 px-2 pb-2">
+        <span className="text-display text-xs tracking-wider text-text-tertiary">
+          {t.sessions.title}
+        </span>
+        <Button
+          ghost
+          size="icon"
+          onClick={reload}
+          aria-label={t.common.refresh}
+          title={t.common.refresh}
+          className="text-text-secondary hover:text-foreground"
+        >
+          <RefreshCw className={cn(loading && "animate-spin")} />
+        </Button>
+      </div>
+
+      <Button
+        outlined
+        size="sm"
+        onClick={startNew}
+        prefix={<MessageSquarePlus />}
+        className="mx-2 mb-2 justify-center"
+      >
+        {t.sessions.newChat}
+      </Button>
+
+      <div className="min-h-0 flex-1 overflow-y-auto overflow-x-hidden px-1 pb-1">
+        {content}
+      </div>
+    </aside>
+  );
+}
diff --git a/web/src/components/ChatSidebar.tsx b/web/src/components/ChatSidebar.tsx
index 1a53741d8fd..8d7d5505e6c 100644
--- a/web/src/components/ChatSidebar.tsx
+++ b/web/src/components/ChatSidebar.tsx
@@ -4,12 +4,13 @@
  *
  * Two WebSockets, one per concern:
  *
- *   1. **JSON-RPC sidecar** (`GatewayClient` → /api/ws) — drives the
- *      sidebar's own slot of the dashboard's in-process gateway.  Owns
- *      the model badge / picker / connection state / error banner.
- *      Independent of the PTY pane's session by design — those are the
- *      pieces the sidebar needs to be able to drive directly (model
- *      switch via slash.exec, etc.).
+ *   1. **JSON-RPC sidecar** (`GatewayClient` → /api/ws) — a lightweight
+ *      session used only for connection state (the "live" badge) and
+ *      credential warnings. Independent of the PTY pane's session by
+ *      design. The model badge does NOT come from here: it reads the
+ *      effective config model over REST (`/api/model/info`), and the model
+ *      picker writes config over REST (`/api/model/set`) then offers a
+ *      dashboard reload so the running chat adopts the new model.
  *
  *   2. **Event subscriber** (/api/events?channel=…) — passive, receives
  *      every dispatcher emit from the PTY-side `tui_gateway.entry` that
@@ -28,9 +29,10 @@ import { Badge } from "@nous-research/ui/ui/components/badge";
 import { Card } from "@nous-research/ui/ui/components/card";
 
 import { ModelPickerDialog } from "@/components/ModelPickerDialog";
+import { ModelReloadConfirm } from "@/components/ModelReloadConfirm";
 import { ToolCall, type ToolEntry } from "@/components/ToolCall";
 import { GatewayClient, type ConnectionState } from "@/lib/gatewayClient";
-import { HERMES_BASE_PATH, buildWsAuthParam } from "@/lib/api";
+import { api, HERMES_BASE_PATH, buildWsAuthParam } from "@/lib/api";
 
 import { cn } from "@/lib/utils";
 import { AlertCircle, ChevronDown, RefreshCw } from "lucide-react";
@@ -74,9 +76,22 @@ interface ChatSidebarProps {
   /** Management profile from the dashboard switcher — scopes session.create. */
   profile?: string;
   className?: string;
+  onDashboardNewSessionRequest?: () => void;
+  /**
+   * Render the tool-call activity card. Defaults to true. The dashboard Chat
+   * tab sets this false so the right rail stays a thin model + session-list
+   * column; the model picker and its event plumbing are unaffected.
+   */
+  showTools?: boolean;
 }
 
-export function ChatSidebar({ channel, profile, className }: ChatSidebarProps) {
+export function ChatSidebar({
+  channel,
+  profile,
+  className,
+  onDashboardNewSessionRequest,
+  showTools = true,
+}: ChatSidebarProps) {
   // `version` bumps on reconnect; gw is derived so we never call setState
   // for it inside an effect (React 19's set-state-in-effect rule). The
   // counter is the dependency on purpose — it's not read in the memo body,
@@ -86,11 +101,37 @@ export function ChatSidebar({ channel, profile, className }: ChatSidebarProps) {
   const gw = useMemo(() => new GatewayClient(), [version]);
 
   const [state, setState] = useState<ConnectionState>("idle");
-  const [sessionId, setSessionId] = useState<string | null>(null);
   const [info, setInfo] = useState<SessionInfo>({});
   const [tools, setTools] = useState<ToolEntry[]>([]);
   const [modelOpen, setModelOpen] = useState(false);
   const [error, setError] = useState<string | null>(null);
+  // The badge shows config.yaml's main model (`model.default`) via
+  // `/api/model/info` — the same value the Models page writes and a new chat
+  // session boots from. We deliberately don't use the sidecar's `session.info`
+  // model: that's a one-time snapshot of the throwaway sidecar agent taken when
+  // its session is created, and it never updates when the model is changed
+  // elsewhere, so the badge would go stale. `/api/model/info` is profile-scoped
+  // by `fetchJSON`, so it reads the same profile this sidebar is scoped to.
+  const [effectiveModel, setEffectiveModel] = useState("");
+  // Set after the picker saves a model and the user declines the reload: config
+  // is updated but the running session keeps its model until rebuilt.
+  const [modelNotice, setModelNotice] = useState<string | null>(null);
+  // Short name of a just-saved model awaiting confirm to reload (a fresh chat
+  // session is how the running chat adopts it; we confirm before discarding it).
+  const [pendingReloadModel, setPendingReloadModel] = useState<string | null>(
+    null,
+  );
+
+  const refreshEffectiveModel = useCallback(() => {
+    void api
+      .getModelInfo()
+      .then((r) => {
+        if (r?.model) setEffectiveModel(String(r.model));
+      })
+      .catch(() => {
+        // Best-effort: keep the last known label rather than blanking it.
+      });
+  }, []);
 
   // Profile or PTY channel change tears down both WebSockets. Bump `version`
   // (same path as the manual Reconnect button) so the gateway client is
@@ -112,16 +153,14 @@ export function ChatSidebar({ channel, profile, className }: ChatSidebarProps) {
 
   useEffect(() => {
     let cancelled = false;
-    setSessionId(null);
-    setInfo({});
-    setError(null);
+    queueMicrotask(() => {
+      if (cancelled) return;
+      setInfo({});
+      setError(null);
+    });
     const offState = gw.onState(setState);
 
     const offSessionInfo = gw.on<SessionInfo>("session.info", (ev) => {
-      if (ev.session_id) {
-        setSessionId(ev.session_id);
-      }
-
       if (ev.payload) {
         setInfo((prev) => ({ ...prev, ...ev.payload }));
       }
@@ -135,9 +174,10 @@ export function ChatSidebar({ channel, profile, className }: ChatSidebarProps) {
       }
     });
 
-    // Adopt whichever session the gateway hands us. session.create on the
-    // sidecar is independent of the PTY pane's session by design — we
-    // only need a sid to drive the model picker's slash.exec calls.
+    // Create the sidecar session so the gateway surfaces session-scoped
+    // signals (connection state, credential warnings). It's independent of the
+    // PTY pane's session by design. The model picker no longer rides this
+    // session — it writes config.yaml over REST — so we don't track its id.
     gw.connect()
       .then(() => {
         if (cancelled) {
@@ -150,12 +190,6 @@ export function ChatSidebar({ channel, profile, className }: ChatSidebarProps) {
           ...(profile ? { profile } : {}),
         });
       })
-      .then((created) => {
-        if (cancelled || !created?.session_id) {
-          return;
-        }
-        setSessionId(created.session_id);
-      })
       .catch((e: Error) => {
         if (!cancelled) {
           setError(e.message);
@@ -233,7 +267,9 @@ export function ChatSidebar({ channel, profile, className }: ChatSidebarProps) {
 
       const { type, payload } = frame.params;
 
-      if (type === "tool.start") {
+      if (type === "dashboard.new_session_requested") {
+        onDashboardNewSessionRequest?.();
+      } else if (type === "tool.start") {
         const p = payload as
           | { tool_id?: string; name?: string; context?: string }
           | undefined;
@@ -309,22 +345,32 @@ export function ChatSidebar({ channel, profile, className }: ChatSidebarProps) {
       unmounting = true;
       ws?.close();
     };
-  }, [channel, version]);
+  }, [channel, onDashboardNewSessionRequest, version]);
+
+  // Seed the badge on mount and re-read it whenever the sockets are rebuilt
+  // (a profile/channel switch bumps `version`).
+  useEffect(() => {
+    refreshEffectiveModel();
+  }, [refreshEffectiveModel, version]);
 
   const reconnect = useCallback(() => {
     setError(null);
     setTools([]);
+    setModelNotice(null);
+    setPendingReloadModel(null);
     setVersion((v) => v + 1);
   }, []);
 
-  const canPickModel = state === "open" && !!sessionId;
-  const modelLabel = (info.model ?? "—").split("/").slice(-1)[0] ?? "—";
+  // The picker writes config.yaml over REST and reloads — it doesn't ride the
+  // sidecar gateway session, so it's available whenever the sidebar is mounted.
+  const modelName = effectiveModel || info.model || "—";
+  const modelLabel = modelName.split("/").slice(-1)[0] ?? "—";
   const banner = error ?? info.credential_warning ?? null;
 
   return (
     <aside
       className={cn(
-        "flex h-full w-full min-w-0 shrink-0 flex-col gap-3 overflow-y-auto overflow-x-hidden pr-1 lg:w-80",
+        "flex h-full w-full min-w-0 shrink-0 flex-col gap-3 overflow-y-auto overflow-x-hidden pr-1",
         className,
       )}
     >
@@ -337,21 +383,18 @@ export function ChatSidebar({ channel, profile, className }: ChatSidebarProps) {
           <Button
             ghost
             size="sm"
-            disabled={!canPickModel}
             onClick={() => setModelOpen(true)}
             className={cn(
               "max-w-full min-w-0 px-0 py-0",
               "self-start normal-case tracking-normal text-sm font-medium",
               "hover:underline disabled:no-underline",
             )}
-            title={info.model ?? "switch model"}
+            title={modelName === "—" ? "switch model" : modelName}
           >
             <span className="flex min-w-0 max-w-full items-center gap-1">
               <span className="truncate">{modelLabel}</span>
 
-              {canPickModel ? (
-                <ChevronDown className="size-3.5 shrink-0 text-text-secondary" />
-              ) : null}
+              <ChevronDown className="size-3.5 shrink-0 text-text-secondary" />
             </span>
           </Button>
         </div>
@@ -361,6 +404,16 @@ export function ChatSidebar({ channel, profile, className }: ChatSidebarProps) {
         </Badge>
       </Card>
 
+      {modelNotice && (
+        <Card className="flex items-start gap-2 border-warning/40 bg-warning/5 px-3 py-2 text-xs">
+          <AlertCircle className="mt-0.5 h-3.5 w-3.5 shrink-0 text-warning" />
+
+          <div className="wrap-break-word min-w-0 flex-1 text-text-secondary">
+            {modelNotice}
+          </div>
+        </Card>
+      )}
+
       {banner && (
         <Card className="flex items-start gap-2 border-destructive/40 bg-destructive/5 px-3 py-2 text-xs">
           <AlertCircle className="mt-0.5 h-3.5 w-3.5 shrink-0 text-destructive" />
@@ -383,29 +436,66 @@ export function ChatSidebar({ channel, profile, className }: ChatSidebarProps) {
         </Card>
       )}
 
-      <Card className="flex min-h-0 flex-none flex-col px-2 py-2">
-        <div className="text-display px-1 pb-2 text-xs tracking-wider text-text-tertiary">
-          tools
-        </div>
+      {showTools && (
+        <Card className="flex min-h-0 flex-none flex-col px-2 py-2">
+          <div className="text-display px-1 pb-2 text-xs tracking-wider text-text-tertiary">
+            tools
+          </div>
 
-        <div className="flex min-h-0 flex-col gap-1.5">
-          {tools.length === 0 ? (
-            <div className="px-2 py-4 text-center text-xs text-text-secondary">
-              no tool calls yet
-            </div>
-          ) : (
-            tools.map((t) => <ToolCall key={t.id} tool={t} />)
-          )}
-        </div>
-      </Card>
+          <div className="flex min-h-0 flex-col gap-1.5">
+            {tools.length === 0 ? (
+              <div className="px-2 py-4 text-center text-xs text-text-secondary">
+                no tool calls yet
+              </div>
+            ) : (
+              tools.map((t) => <ToolCall key={t.id} tool={t} />)
+            )}
+          </div>
+        </Card>
+      )}
 
-      {modelOpen && canPickModel && sessionId && (
+      {modelOpen && (
         <ModelPickerDialog
-          gw={gw}
-          sessionId={sessionId}
-          onClose={() => setModelOpen(false)}
+          // Same path the Models page uses (REST /api/model/set), not the
+          // sidecar config.set RPC, which didn't reliably land in the
+          // config.yaml the agent boots from. Always persisted (alwaysGlobal).
+          loader={api.getModelOptions}
+          alwaysGlobal
+          onApply={async ({ provider, model, confirmExpensiveModel }) => {
+            setModelNotice(null);
+            setPendingReloadModel(null);
+            const result = await api.setModelAssignment({
+              confirm_expensive_model: confirmExpensiveModel,
+              scope: "main",
+              provider,
+              model,
+            });
+            // confirm_required => the dialog shows the expensive-model prompt
+            // and calls back; don't announce until the user confirms.
+            if (!result.confirm_required) {
+              refreshEffectiveModel();
+              // Ask before reloading: applying the model starts a fresh chat.
+              setPendingReloadModel(model.split("/").slice(-1)[0]);
+            }
+            return result;
+          }}
+          onClose={() => {
+            setModelOpen(false);
+            refreshEffectiveModel();
+          }}
         />
       )}
+
+      <ModelReloadConfirm
+        model={pendingReloadModel}
+        onCancel={() => {
+          const m = pendingReloadModel;
+          setPendingReloadModel(null);
+          setModelNotice(
+            `Model set to ${m}. Run /new or refresh the page to apply it to this chat.`,
+          );
+        }}
+      />
     </aside>
   );
 }
diff --git a/web/src/components/ModelReloadConfirm.tsx b/web/src/components/ModelReloadConfirm.tsx
new file mode 100644
index 00000000000..3b5d27d615b
--- /dev/null
+++ b/web/src/components/ModelReloadConfirm.tsx
@@ -0,0 +1,40 @@
+import { ConfirmDialog } from "@/components/ConfirmDialog";
+
+/**
+ * Confirm + full-page reload after a model change.
+ *
+ * Changing the main model persists to config.yaml, but the RUNNING chat keeps
+ * its model until its session is rebuilt. A full reload (fresh PTY session that
+ * boots its agent from the just-saved config) is the reliable way to apply it —
+ * the in-place hot-swap and partial remount both proved unreliable. We confirm
+ * first because the reload starts a fresh chat (the current one stays resumable
+ * in Sessions and the agent's memory is kept).
+ *
+ * Shared by the chat sidebar picker and the Models page so both behave
+ * identically. `model` is the short model name awaiting confirmation, or null
+ * when the dialog is closed.
+ */
+export function ModelReloadConfirm({
+  model,
+  description,
+  onCancel,
+}: {
+  model: string | null;
+  /** Override the default body copy (e.g. the Models-page phrasing). */
+  description?: string;
+  onCancel: () => void;
+}) {
+  return (
+    <ConfirmDialog
+      open={model !== null}
+      title="Switch model?"
+      description={
+        description ??
+        `Switching to ${model ?? ""} starts a fresh chat. Your current chat stays in your Sessions list and the agent's memory is kept. Reload now to apply it?`
+      }
+      confirmLabel="Reload"
+      onConfirm={() => window.location.reload()}
+      onCancel={onCancel}
+    />
+  );
+}
diff --git a/web/src/i18n/af.ts b/web/src/i18n/af.ts
index 2a8af6f0843..1c4997c191f 100644
--- a/web/src/i18n/af.ts
+++ b/web/src/i18n/af.ts
@@ -158,6 +158,7 @@ export const af: Translations = {
     selectedSessionsDeleted: "{count} sessies geskrap",
     failedToDeleteSelected: "Kon nie gekose sessies skrap nie",
     resumeInChat: "Hervat in Klets",
+    newChat: "Nuwe klets",
     previousPage: "Vorige bladsy",
     nextPage: "Volgende bladsy",
     roles: {
diff --git a/web/src/i18n/de.ts b/web/src/i18n/de.ts
index 11b4a095cb6..9f82bb3df7a 100644
--- a/web/src/i18n/de.ts
+++ b/web/src/i18n/de.ts
@@ -158,6 +158,7 @@ export const de: Translations = {
     selectedSessionsDeleted: "{count} Sitzungen gelöscht",
     failedToDeleteSelected: "Ausgewählte Sitzungen konnten nicht gelöscht werden",
     resumeInChat: "Im Chat fortsetzen",
+    newChat: "Neuer Chat",
     previousPage: "Vorherige Seite",
     nextPage: "Nächste Seite",
     roles: {
diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts
index 10fd8df4300..a6ab1a234ac 100644
--- a/web/src/i18n/en.ts
+++ b/web/src/i18n/en.ts
@@ -165,6 +165,7 @@ export const en: Translations = {
     selectedSessionsDeleted: "{count} sessions deleted",
     failedToDeleteSelected: "Failed to delete selected sessions",
     resumeInChat: "Resume in Chat",
+    newChat: "New chat",
     previousPage: "Previous page",
     nextPage: "Next page",
     roles: {
diff --git a/web/src/i18n/es.ts b/web/src/i18n/es.ts
index 598e0a3ad24..b17b5243864 100644
--- a/web/src/i18n/es.ts
+++ b/web/src/i18n/es.ts
@@ -158,6 +158,7 @@ export const es: Translations = {
     selectedSessionsDeleted: "{count} sesiones eliminadas",
     failedToDeleteSelected: "No se pudieron eliminar las sesiones seleccionadas",
     resumeInChat: "Reanudar en el chat",
+    newChat: "Nuevo chat",
     previousPage: "Página anterior",
     nextPage: "Página siguiente",
     roles: {
diff --git a/web/src/i18n/fr.ts b/web/src/i18n/fr.ts
index 659700a5864..62f378df719 100644
--- a/web/src/i18n/fr.ts
+++ b/web/src/i18n/fr.ts
@@ -158,6 +158,7 @@ export const fr: Translations = {
     selectedSessionsDeleted: "{count} sessions supprimées",
     failedToDeleteSelected: "Échec de la suppression des sessions sélectionnées",
     resumeInChat: "Reprendre dans le chat",
+    newChat: "Nouveau chat",
     previousPage: "Page précédente",
     nextPage: "Page suivante",
     roles: {
diff --git a/web/src/i18n/ga.ts b/web/src/i18n/ga.ts
index 214d69373a1..9172f6260bb 100644
--- a/web/src/i18n/ga.ts
+++ b/web/src/i18n/ga.ts
@@ -158,6 +158,7 @@ export const ga: Translations = {
     selectedSessionsDeleted: "Scriosadh {count} seisiún",
     failedToDeleteSelected: "Theip ar scriosadh na seisiún roghnaithe",
     resumeInChat: "Lean ar aghaidh sa chomhrá",
+    newChat: "Comhrá nua",
     previousPage: "Leathanach roimhe seo",
     nextPage: "An chéad leathanach eile",
     roles: {
diff --git a/web/src/i18n/hu.ts b/web/src/i18n/hu.ts
index cf9d121a06a..08e1b4e1fd1 100644
--- a/web/src/i18n/hu.ts
+++ b/web/src/i18n/hu.ts
@@ -158,6 +158,7 @@ export const hu: Translations = {
     selectedSessionsDeleted: "{count} munkamenet törölve",
     failedToDeleteSelected: "Nem sikerült törölni a kijelölt munkameneteket",
     resumeInChat: "Folytatás a csevegésben",
+    newChat: "Új csevegés",
     previousPage: "Előző oldal",
     nextPage: "Következő oldal",
     roles: {
diff --git a/web/src/i18n/it.ts b/web/src/i18n/it.ts
index 777f913075d..29b3b83ee53 100644
--- a/web/src/i18n/it.ts
+++ b/web/src/i18n/it.ts
@@ -158,6 +158,7 @@ export const it: Translations = {
     selectedSessionsDeleted: "{count} sessioni eliminate",
     failedToDeleteSelected: "Impossibile eliminare le sessioni selezionate",
     resumeInChat: "Riprendi nella chat",
+    newChat: "Nuova chat",
     previousPage: "Pagina precedente",
     nextPage: "Pagina successiva",
     roles: {
diff --git a/web/src/i18n/ja.ts b/web/src/i18n/ja.ts
index eb0f237a86c..4d6ef8e25a2 100644
--- a/web/src/i18n/ja.ts
+++ b/web/src/i18n/ja.ts
@@ -158,6 +158,7 @@ export const ja: Translations = {
     selectedSessionsDeleted: "{count}件のセッションを削除しました",
     failedToDeleteSelected: "選択したセッションの削除に失敗しました",
     resumeInChat: "チャットで再開",
+    newChat: "新しいチャット",
     previousPage: "前のページ",
     nextPage: "次のページ",
     roles: {
diff --git a/web/src/i18n/ko.ts b/web/src/i18n/ko.ts
index 44f689aa5f2..33a4e5362f5 100644
--- a/web/src/i18n/ko.ts
+++ b/web/src/i18n/ko.ts
@@ -158,6 +158,7 @@ export const ko: Translations = {
     selectedSessionsDeleted: "{count}개 세션이 삭제되었습니다",
     failedToDeleteSelected: "선택한 세션 삭제에 실패했습니다",
     resumeInChat: "채팅에서 다시 시작",
+    newChat: "새 채팅",
     previousPage: "이전 페이지",
     nextPage: "다음 페이지",
     roles: {
diff --git a/web/src/i18n/pt.ts b/web/src/i18n/pt.ts
index 7ad8f15b9ca..087bf16b7ea 100644
--- a/web/src/i18n/pt.ts
+++ b/web/src/i18n/pt.ts
@@ -158,6 +158,7 @@ export const pt: Translations = {
     selectedSessionsDeleted: "{count} sessões eliminadas",
     failedToDeleteSelected: "Falha ao eliminar as sessões selecionadas",
     resumeInChat: "Retomar no Chat",
+    newChat: "Novo chat",
     previousPage: "Página anterior",
     nextPage: "Página seguinte",
     roles: {
diff --git a/web/src/i18n/ru.ts b/web/src/i18n/ru.ts
index 8f7fcab6126..04f5bb720b6 100644
--- a/web/src/i18n/ru.ts
+++ b/web/src/i18n/ru.ts
@@ -158,6 +158,7 @@ export const ru: Translations = {
     selectedSessionsDeleted: "Удалено сессий: {count}",
     failedToDeleteSelected: "Не удалось удалить выбранные сессии",
     resumeInChat: "Продолжить в чате",
+    newChat: "Новый чат",
     previousPage: "Предыдущая страница",
     nextPage: "Следующая страница",
     roles: {
diff --git a/web/src/i18n/tr.ts b/web/src/i18n/tr.ts
index c597e3d6852..8e6f603186c 100644
--- a/web/src/i18n/tr.ts
+++ b/web/src/i18n/tr.ts
@@ -158,6 +158,7 @@ export const tr: Translations = {
     selectedSessionsDeleted: "{count} oturum silindi",
     failedToDeleteSelected: "Seçilen oturumlar silinemedi",
     resumeInChat: "Sohbette Devam Et",
+    newChat: "Yeni sohbet",
     previousPage: "Önceki sayfa",
     nextPage: "Sonraki sayfa",
     roles: {
diff --git a/web/src/i18n/types.ts b/web/src/i18n/types.ts
index 68a5c569377..1ce2813dd53 100644
--- a/web/src/i18n/types.ts
+++ b/web/src/i18n/types.ts
@@ -181,6 +181,7 @@ export interface Translations {
     selectedSessionsDeleted: string;
     failedToDeleteSelected: string;
     resumeInChat: string;
+    newChat: string;
     previousPage: string;
     nextPage: string;
     roles: {
diff --git a/web/src/i18n/uk.ts b/web/src/i18n/uk.ts
index 1382c1b2bf1..aab1c65d55e 100644
--- a/web/src/i18n/uk.ts
+++ b/web/src/i18n/uk.ts
@@ -158,6 +158,7 @@ export const uk: Translations = {
     selectedSessionsDeleted: "Видалено сесій: {count}",
     failedToDeleteSelected: "Не вдалося видалити вибрані сесії",
     resumeInChat: "Продовжити в чаті",
+    newChat: "Новий чат",
     previousPage: "Попередня сторінка",
     nextPage: "Наступна сторінка",
     roles: {
diff --git a/web/src/i18n/zh-hant.ts b/web/src/i18n/zh-hant.ts
index 09f611bb558..a80fa941db2 100644
--- a/web/src/i18n/zh-hant.ts
+++ b/web/src/i18n/zh-hant.ts
@@ -158,6 +158,7 @@ export const zhHant: Translations = {
     selectedSessionsDeleted: "已刪除 {count} 個工作階段",
     failedToDeleteSelected: "刪除所選工作階段失敗",
     resumeInChat: "在對話中繼續",
+    newChat: "新對話",
     previousPage: "上一頁",
     nextPage: "下一頁",
     roles: {
diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts
index 2bac16c3dec..0bdabbdb5d3 100644
--- a/web/src/i18n/zh.ts
+++ b/web/src/i18n/zh.ts
@@ -156,6 +156,7 @@ export const zh: Translations = {
     selectedSessionsDeleted: "已删除 {count} 个会话",
     failedToDeleteSelected: "删除所选会话失败",
     resumeInChat: "在对话中继续",
+    newChat: "新对话",
     previousPage: "上一页",
     nextPage: "下一页",
     roles: {
diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts
index ec03997b6c6..3955d3324c9 100644
--- a/web/src/lib/api.ts
+++ b/web/src/lib/api.ts
@@ -1346,6 +1346,7 @@ export interface MessagingPlatformEnvVar {
   redacted_value: string | null;
   description: string;
   prompt: string;
+  help: string;
   url: string | null;
   is_password: boolean;
   advanced: boolean;
diff --git a/web/src/lib/session-refresh.test.ts b/web/src/lib/session-refresh.test.ts
new file mode 100644
index 00000000000..0348835860a
--- /dev/null
+++ b/web/src/lib/session-refresh.test.ts
@@ -0,0 +1,21 @@
+import { describe, it, expect } from "vitest";
+import { shouldRefreshSessions } from "./session-refresh";
+
+describe("shouldRefreshSessions", () => {
+  it("returns false on the first poll (no baseline yet)", () => {
+    expect(shouldRefreshSessions(null, "s2")).toBe(false);
+  });
+
+  it("returns false when the current response has no sessions", () => {
+    expect(shouldRefreshSessions("s1", null)).toBe(false);
+    expect(shouldRefreshSessions(null, null)).toBe(false);
+  });
+
+  it("returns false when the newest session id is unchanged", () => {
+    expect(shouldRefreshSessions("s1", "s1")).toBe(false);
+  });
+
+  it("returns true when a new session appears at the head of the list", () => {
+    expect(shouldRefreshSessions("s1", "s2")).toBe(true);
+  });
+});
diff --git a/web/src/lib/session-refresh.ts b/web/src/lib/session-refresh.ts
new file mode 100644
index 00000000000..637c7f00eb1
--- /dev/null
+++ b/web/src/lib/session-refresh.ts
@@ -0,0 +1,26 @@
+/**
+ * Decide whether the paginated sessions list should be silently
+ * re-fetched after an overview poll.
+ *
+ * The dashboard's FastAPI server and a terminal CLI are separate
+ * processes that share the same SQLite session DB. There is no
+ * inter-process push channel, so the Sessions page polls the 50 newest
+ * sessions every few seconds (the "overview" poll). When that poll
+ * surfaces a session id at the head of the list that we have not seen
+ * before, a new session was created in another process and the
+ * paginated list is stale — refresh it.
+ *
+ * Returns false on the very first poll (no baseline yet) and when
+ * either id is null (empty DB / transient empty response), so we never
+ * trigger a spurious reload on mount or while the DB is empty.
+ */
+export function shouldRefreshSessions(
+  prevNewestId: string | null,
+  currentNewestId: string | null,
+): boolean {
+  return (
+    prevNewestId !== null &&
+    currentNewestId !== null &&
+    prevNewestId !== currentNewestId
+  );
+}
diff --git a/web/src/pages/ChannelsPage.tsx b/web/src/pages/ChannelsPage.tsx
index d42ab7b9e74..7658c0cd61a 100644
--- a/web/src/pages/ChannelsPage.tsx
+++ b/web/src/pages/ChannelsPage.tsx
@@ -4,6 +4,7 @@ import {
   Check,
   CheckCircle2,
   ExternalLink,
+  Info,
   PlugZap,
   QrCode,
   Radio,
@@ -55,6 +56,37 @@ function stateBadge(state: string) {
 }
 
 const TELEGRAM_USER_ID_RE = /^\d+$/;
+const SLACK_MEMBER_ID_RE = /^[UW][A-Z0-9]{2,}$/;
+const SLACK_TOKEN_PREFIXES: Record<string, string> = {
+  SLACK_BOT_TOKEN: "xoxb-",
+  SLACK_APP_TOKEN: "xapp-",
+};
+
+function validateMessagingEnvField(field: MessagingPlatformEnvVar, value: string): string | null {
+  const trimmed = value.trim();
+  if (!trimmed) return null;
+
+  const expectedPrefix = SLACK_TOKEN_PREFIXES[field.key];
+  if (expectedPrefix && !trimmed.startsWith(expectedPrefix)) {
+    return `${field.prompt || field.key} must start with ${expectedPrefix}`;
+  }
+
+  if (field.key === "SLACK_ALLOWED_USERS") {
+    // Mirror the gateway's parse (gateway/platforms/slack.py): drop empty
+    // entries so a trailing/interior comma isn't rejected here. "*" is the
+    // allow-all wildcard the gateway honors.
+    const parts = trimmed
+      .split(",")
+      .map((part) => part.trim())
+      .filter(Boolean);
+    const invalid = parts.find((part) => part !== "*" && !SLACK_MEMBER_ID_RE.test(part));
+    if (invalid) {
+      return `${invalid} does not look like a Slack member ID. Use IDs like U01ABC2DEF3.`;
+    }
+  }
+
+  return null;
+}
 
 function formatExpiry(expiresAt: string): string {
   const ms = Date.parse(expiresAt) - Date.now();
@@ -83,8 +115,12 @@ export default function ChannelsPage() {
   // Config modal state
   const [editing, setEditing] = useState<MessagingPlatform | null>(null);
   const [draftEnv, setDraftEnv] = useState<Record<string, string>>({});
+  const [fieldErrors, setFieldErrors] = useState<Record<string, string>>({});
   const [saving, setSaving] = useState(false);
-  const closeEdit = useCallback(() => setEditing(null), []);
+  const closeEdit = useCallback(() => {
+    setEditing(null);
+    setFieldErrors({});
+  }, []);
   const editModalRef = useModalBehavior({ open: editing !== null, onClose: closeEdit });
 
   // Per-card busy + restart-needed tracking
@@ -116,6 +152,7 @@ export default function ChannelsPage() {
       initial[v.key] = "";
     });
     setDraftEnv(initial);
+    setFieldErrors({});
     setEditing(platform);
   };
 
@@ -138,6 +175,16 @@ export default function ChannelsPage() {
       showToast(`${missing[0].prompt || missing[0].key} is required`, "error");
       return;
     }
+    const nextFieldErrors: Record<string, string> = {};
+    editing.env_vars.forEach((field) => {
+      const message = validateMessagingEnvField(field, draftEnv[field.key] || "");
+      if (message) nextFieldErrors[field.key] = message;
+    });
+    if (Object.keys(nextFieldErrors).length > 0) {
+      setFieldErrors(nextFieldErrors);
+      showToast("Fix the highlighted fields before saving.", "error");
+      return;
+    }
     setSaving(true);
     try {
       const body: MessagingPlatformUpdate = { env, enabled: true };
@@ -326,10 +373,22 @@ export default function ChannelsPage() {
               </p>
               {editing.env_vars.map((field: MessagingPlatformEnvVar) => (
                 <div className="grid gap-1.5" key={field.key}>
-                  <Label htmlFor={`field-${field.key}`}>
-                    {field.prompt || field.key}
-                    {field.required ? " *" : ""}
-                  </Label>
+                  <div className="flex items-center gap-1.5">
+                    <Label htmlFor={`field-${field.key}`}>
+                      {field.prompt || field.key}
+                      {field.required ? " *" : ""}
+                    </Label>
+                    {field.help && (
+                      <span
+                        aria-label={field.help}
+                        className="inline-flex text-muted-foreground hover:text-foreground"
+                        role="img"
+                        title={field.help}
+                      >
+                        <Info className="h-3.5 w-3.5" />
+                      </span>
+                    )}
+                  </div>
                   {field.description && (
                     <span className="text-xs text-muted-foreground">
                       {field.description}
@@ -344,10 +403,23 @@ export default function ChannelsPage() {
                         : field.key
                     }
                     value={draftEnv[field.key] ?? ""}
-                    onChange={(e) =>
-                      setDraftEnv((prev) => ({ ...prev, [field.key]: e.target.value }))
-                    }
+                    aria-invalid={Boolean(fieldErrors[field.key])}
+                    onChange={(e) => {
+                      const nextValue = e.target.value;
+                      setDraftEnv((prev) => ({ ...prev, [field.key]: nextValue }));
+                      setFieldErrors((prev) => {
+                        if (!prev[field.key]) return prev;
+                        const next = { ...prev };
+                        delete next[field.key];
+                        return next;
+                      });
+                    }}
                   />
+                  {fieldErrors[field.key] && (
+                    <span className="text-xs text-destructive">
+                      {fieldErrors[field.key]}
+                    </span>
+                  )}
                 </div>
               ))}
 
diff --git a/web/src/pages/ChatPage.tsx b/web/src/pages/ChatPage.tsx
index 4e3a6c23151..2a135ed1a57 100644
--- a/web/src/pages/ChatPage.tsx
+++ b/web/src/pages/ChatPage.tsx
@@ -32,6 +32,7 @@ import { createPortal } from "react-dom";
 import { useSearchParams } from "react-router-dom";
 
 import { ChatSidebar } from "@/components/ChatSidebar";
+import { ChatSessionList } from "@/components/ChatSessionList";
 import { usePageHeader } from "@/contexts/usePageHeader";
 import { useI18n } from "@/i18n";
 import { api } from "@/lib/api";
@@ -153,6 +154,15 @@ export default function ChatPage({ isActive = true }: { isActive?: boolean }) {
     setBanner(null);
     setReconnectNonce((n) => n + 1);
   }, []);
+  const startFreshDashboardChat = useCallback(() => {
+    const next = new URLSearchParams(searchParams);
+
+    next.delete("resume");
+    setSearchParams(next, { replace: true });
+    setSessionEnded(false);
+    setBanner(null);
+    setReconnectNonce((n) => n + 1);
+  }, [searchParams, setSearchParams]);
   // Raw state for the mobile side-sheet + a derived value that force-
   // closes whenever the chat tab isn't active.  The *derived* value is
   // what side-effects (body-scroll lock, keydown listener, portal render)
@@ -881,7 +891,20 @@ export default function ChatPage({ isActive = true }: { isActive?: boolean }) {
               "border-t border-current/10",
             )}
           >
-            <ChatSidebar channel={channel} profile={scopedProfile} />
+            <div className="border-b border-current/10 px-1 py-2">
+              <ChatSidebar
+                channel={channel}
+                profile={scopedProfile}
+                onDashboardNewSessionRequest={startFreshDashboardChat}
+                showTools={false}
+              />
+            </div>
+            <ChatSessionList
+              activeSessionId={resumeParam}
+              profile={scopedProfile}
+              onPicked={closeMobilePanel}
+              onNewChat={startFreshDashboardChat}
+            />
           </div>
         </div>
       </>,
@@ -964,10 +987,25 @@ export default function ChatPage({ isActive = true }: { isActive?: boolean }) {
             id="chat-side-panel"
             role="complementary"
             aria-label={modelToolsLabel}
-            className="flex min-h-0 shrink-0 flex-col overflow-hidden lg:h-full lg:w-80"
+            className="flex min-h-0 shrink-0 flex-col gap-3 overflow-hidden lg:h-full lg:w-60"
           >
+            {/* Model picker (tools card hidden — keeps the rail thin). */}
+            <div className="shrink-0">
+              <ChatSidebar
+                channel={channel}
+                profile={scopedProfile}
+                onDashboardNewSessionRequest={startFreshDashboardChat}
+                showTools={false}
+              />
+            </div>
+
+            {/* Session switcher fills the remaining height below the model box. */}
             <div className="min-h-0 flex-1 overflow-hidden">
-              <ChatSidebar channel={channel} profile={scopedProfile} />
+              <ChatSessionList
+                activeSessionId={resumeParam}
+                profile={scopedProfile}
+                onNewChat={startFreshDashboardChat}
+              />
             </div>
           </div>
         )}
diff --git a/web/src/pages/ModelsPage.tsx b/web/src/pages/ModelsPage.tsx
index 77953412b6f..0580feca4e1 100644
--- a/web/src/pages/ModelsPage.tsx
+++ b/web/src/pages/ModelsPage.tsx
@@ -32,6 +32,7 @@ import { usePageHeader } from "@/contexts/usePageHeader";
 import { useI18n } from "@/i18n";
 import { PluginSlot } from "@/plugins";
 import { ModelPickerDialog } from "@/components/ModelPickerDialog";
+import { ModelReloadConfirm } from "@/components/ModelReloadConfirm";
 
 const PERIODS = [
   { label: "7d", days: 7 },
@@ -697,6 +698,9 @@ function ModelSettingsPanel({
 }) {
   const [auxModalOpen, setAuxModalOpen] = useState(false);
   const [picker, setPicker] = useState<PickerTarget | null>(null);
+  const [pendingReloadModel, setPendingReloadModel] = useState<string | null>(
+    null,
+  );
 
   const mainProv = aux?.main.provider ?? "";
   const mainModel = aux?.main.model ?? "";
@@ -798,15 +802,19 @@ function ModelSettingsPanel({
             loader={api.getModelOptions}
             alwaysGlobal
             title="Set Main Model"
-            onApply={({ provider, model, confirmExpensiveModel }) =>
-              applyAssignment({
+            onApply={async ({ provider, model, confirmExpensiveModel }) => {
+              const result = await applyAssignment({
                 confirmExpensiveModel,
                 scope: "main",
                 task: "",
                 provider,
                 model,
-              })
-            }
+              });
+              if (!result.confirm_required) {
+                setPendingReloadModel(model.split("/").slice(-1)[0]);
+              }
+              return result;
+            }}
             onClose={() => setPicker(null)}
           />
         )}
@@ -819,6 +827,11 @@ function ModelSettingsPanel({
             onClose={() => setAuxModalOpen(false)}
           />
         )}
+
+        <ModelReloadConfirm
+          model={pendingReloadModel}
+          onCancel={() => setPendingReloadModel(null)}
+        />
       </CardContent>
     </Card>
   );
diff --git a/web/src/pages/SessionsPage.tsx b/web/src/pages/SessionsPage.tsx
index 2d70c399af2..1746cc48184 100644
--- a/web/src/pages/SessionsPage.tsx
+++ b/web/src/pages/SessionsPage.tsx
@@ -30,6 +30,7 @@ import {
   Archive,
 } from "lucide-react";
 import { api } from "@/lib/api";
+import { shouldRefreshSessions } from "@/lib/session-refresh";
 import type {
   SessionInfo,
   SessionMessage,
@@ -805,8 +806,12 @@ export default function SessionsPage() {
     };
   }, [setEnd]);
 
-  const loadSessions = useCallback((p: number) => {
-    setLoading(true);
+  const loadSessions = useCallback((p: number, silent = false) => {
+    // ``silent`` skips the loading spinner so background refreshes
+    // (triggered when the overview poll detects a new session from
+    // another process) don't flicker the whole page or drop the user's
+    // scroll position.
+    if (!silent) setLoading(true);
     api
       .getSessions(PAGE_SIZE, p * PAGE_SIZE)
       .then((resp) => {
@@ -814,7 +819,9 @@ export default function SessionsPage() {
         setTotal(resp.total);
       })
       .catch(() => {})
-      .finally(() => setLoading(false));
+      .finally(() => {
+        if (!silent) setLoading(false);
+      });
   }, []);
 
   const loadStats = useCallback(() => {
@@ -828,6 +835,15 @@ export default function SessionsPage() {
     loadStats();
   }, [loadStats]);
 
+  // Refs for the overview poll's new-session detection. The poll effect
+  // below is mounted once with stable deps, so it reads the current page
+  // and the last-seen newest session id through refs instead of capturing
+  // stale values. ``newestSeenRef`` starts null so the first poll sets a
+  // baseline without triggering a redundant reload (mount already loads).
+  const newestSeenRef = useRef<string | null>(null);
+  const pageRef = useRef(page);
+  pageRef.current = page;
+
   useEffect(() => {
     loadSessions(page);
     refreshEmptyCount();
@@ -841,13 +857,27 @@ export default function SessionsPage() {
         .catch(() => {});
       api
         .getSessions(50)
-        .then((r) => setOverviewSessions(r.sessions))
+        .then((r) => {
+          setOverviewSessions(r.sessions);
+          // The dashboard server and a terminal CLI are separate
+          // processes sharing one session DB — there is no push channel,
+          // so we detect sessions created in another process here. The
+          // overview poll already fetches the 50 newest sessions, so we
+          // reuse its head id as a cheap change signal: when it changes,
+          // silently refresh the paginated list so the new session shows
+          // up in real time without a visible loading flicker.
+          const newest = r.sessions[0]?.id ?? null;
+          if (shouldRefreshSessions(newestSeenRef.current, newest)) {
+            loadSessions(pageRef.current, true);
+          }
+          newestSeenRef.current = newest;
+        })
         .catch(() => {});
     };
     loadOverview();
     const id = setInterval(loadOverview, 5000);
     return () => clearInterval(id);
-  }, []);
+  }, [loadSessions]);
 
   useEffect(() => {
     const el = logScrollRef.current;
diff --git a/web/vitest.config.ts b/web/vitest.config.ts
new file mode 100644
index 00000000000..34baae684e8
--- /dev/null
+++ b/web/vitest.config.ts
@@ -0,0 +1,16 @@
+import { defineConfig } from "vitest/config";
+import react from "@vitejs/plugin-react";
+import path from "path";
+
+export default defineConfig({
+  plugins: [react()],
+  resolve: {
+    alias: {
+      "@": path.resolve(__dirname, "./src"),
+    },
+  },
+  test: {
+    environment: "node",
+    include: ["src/**/*.test.{ts,tsx}"],
+  },
+});
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 0cf004f1a0c..ae34084114c 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -743,7 +743,7 @@ Upload a debug report (system info + recent logs) to a paste service and get a s
 | `--expire <days>` | Paste expiry in days (default: 7). |
 | `--local` | Print the report locally instead of uploading. |
 
-The report includes system info (OS, Python version, Hermes version), recent agent and gateway logs (512 KB limit per file), and redacted API key status. Keys are always redacted — no secrets are uploaded.
+The report includes system info (OS, Python version, Hermes version), recent agent, gateway, GUI/dashboard, and desktop logs (512 KB limit per file), and redacted API key status. Keys are always redacted — no secrets are uploaded.
 
 Paste services tried in order: paste.rs, dpaste.com.
 
diff --git a/website/docs/user-guide/features/web-dashboard.md b/website/docs/user-guide/features/web-dashboard.md
index 2b6fbcfd653..d562879c243 100644
--- a/website/docs/user-guide/features/web-dashboard.md
+++ b/website/docs/user-guide/features/web-dashboard.md
@@ -119,6 +119,8 @@ The **Chat** tab embeds the full Hermes TUI (the same interface you get from `he
 
 **Resume an existing session:** from the **Sessions** tab, click the play icon (▶) next to any session. That jumps to `/chat?resume=<id>` and launches the TUI with `--resume`, loading the full history.
 
+**Session switcher (right rail):** the Chat tab carries its own ChatGPT-style conversation list in a thin right rail beside the terminal, so you can swap conversations without leaving the page. The rail stacks the model picker on top and the session list directly below it; the terminal takes up most of the screen. The list shows your most recent sessions for the active profile — title (falling back to a message preview), relative last-active time, message count, and the source channel for non-CLI sessions. Click any row to resume it in place (the terminal respawns with that conversation's history); the active session is highlighted. **New chat** starts a fresh session, and a refresh control re-pulls the list. The rail is read-only for switching — delete, rename, export, and bulk cleanup still live on the **Sessions** tab. On narrow screens it folds into a slide-over panel.
+
 **Prerequisites:**
 
 - Node.js (same requirement as `hermes --tui`; the TUI bundle is built on first launch)
diff --git a/website/docs/user-guide/multi-profile-gateways.md b/website/docs/user-guide/multi-profile-gateways.md
index e11c389038f..533a3d3c704 100644
--- a/website/docs/user-guide/multi-profile-gateways.md
+++ b/website/docs/user-guide/multi-profile-gateways.md
@@ -56,6 +56,139 @@ research gateway start
 That's it — three independent agents, each on its own process, restarting
 automatically on crash and on user login.
 
+## Alternative: one gateway for all profiles (multiplexing)
+
+The model above runs **one process per profile**. That is the default and is
+the right choice for most setups. But on a host with many profiles — or a
+container deployment where one process per profile is operationally heavy — you
+can instead run a **single multiplexing gateway**: the default profile's gateway
+becomes the sole inbound process and serves messages for *every* profile on the
+box.
+
+This is **opt-in** and **off by default**. When it's off, nothing on this page
+changes — every behavior below is inert.
+
+### When to prefer multiplexing
+
+- A container/VPS deployment where N supervisor units, N ports, and N PID files
+  are a burden.
+- Many low-traffic profiles that don't each justify a full process.
+- You want a single thing to start, monitor, and restart.
+
+Stick with one-process-per-profile when you want hard process-level isolation
+between profiles (separate memory footprints, independent crash domains, the
+ability to restart one profile without touching the others).
+
+### How to opt in
+
+Set the flag on the **default profile** (it owns the multiplexer) and restart
+its gateway:
+
+```bash
+hermes config set gateway.multiplex_profiles true
+hermes gateway restart
+```
+
+Equivalently, in the default profile's `~/.hermes/config.yaml`:
+
+```yaml
+gateway:
+  multiplex_profiles: true
+```
+
+(The flag is also accepted as a top-level `multiplex_profiles: true` for
+convenience.) On the next start the default gateway enumerates every profile,
+brings up each profile's enabled platforms under that profile's own
+credentials, and routes each inbound message to the profile it belongs to. Each
+turn resolves the routed profile's config, skills, memory, SOUL, **and provider
+keys** — credentials are never shared across profiles.
+
+You do **not** run `hermes gateway start` for the secondary profiles — the
+default gateway serves them. See the contract changes below.
+
+### What changes when multiplexing is on
+
+Enabling the flag changes how a few things behave. All of these revert the
+moment the flag is off.
+
+#### 1. Secondary profiles must not start their own gateway
+
+With a multiplexer running, a named-profile `hermes gateway start` / `run` is a
+**hard error**, pointing you back at the multiplexer:
+
+```
+The default gateway is running as a profile multiplexer and already serves
+profile 'coder'. ...
+```
+
+The multiplexer is the single inbound process; a second profile gateway would
+double-bind that profile's platforms. Pass `--force` only if you deliberately
+want a separate process for that profile (not recommended while the multiplexer
+is running). The cross-profile lifecycle wrapper script earlier on this page is
+therefore **not** used in multiplex mode — you only manage the default gateway.
+
+#### 2. HTTP-inbound platforms are reached via a `/p/<profile>/` URL prefix
+
+Webhook (and other HTTP-inbound) traffic for a secondary profile arrives on the
+default listener under a profile prefix, **not** a second port:
+
+```
+# default profile
+POST http://host:8644/webhooks/<route>
+# the "coder" profile, same listener
+POST http://host:8644/p/coder/webhooks/<route>
+```
+
+An unknown or unconfigured profile in the prefix returns `404`. Because the one
+shared listener already serves every profile this way, a **secondary profile
+must not enable a port-binding platform itself** — doing so is a config error
+and the gateway refuses to start, naming the profile and platform:
+
+```
+Profile 'coder' enables the port-binding platform 'webhook', but
+gateway.multiplex_profiles is on. ... Remove platforms.webhook from profile
+'coder's config.yaml (configure it only on the default profile).
+```
+
+Port-binding platforms covered by this rule: `webhook`, `api_server`,
+`msgraph_webhook`, `feishu`, `wecom_callback`, `bluebubbles`, `sms`. Configure
+any of these **only on the default profile**; every profile is reachable through
+its `/p/<profile>/` prefix.
+
+#### 3. Per-credential platforms still need their own token per profile
+
+Polling/connection platforms (Telegram, Discord, Slack, Matrix, Signal, …) work
+fine multiplexed, but each profile that enables one must supply its **own** bot
+token — the same token cannot be polled by two profiles at once. If two profiles
+configure the same `(platform, token)`, startup fails fast naming both profiles
+(see [Token-conflict safety](#token-conflict-safety) — the rule is unchanged,
+it's just enforced inside the one process now).
+
+#### 4. Session keys are namespaced by profile
+
+Each profile's sessions live under an `agent:<profile>:…` namespace so two
+profiles on the same platform/chat never collide in the shared session store.
+The **default** profile keeps the historical `agent:main:…` namespace
+byte-for-byte, so existing default-profile sessions are unaffected — no
+migration, no orphaned history.
+
+#### 5. One PID/lock and one status surface
+
+There is a single process-level PID and lock (the multiplexer, under the default
+home). `hermes status` reports the multiplexer and the profiles it serves;
+`hermes status -p <name>` slices to one profile. Each profile still writes its
+own `runtime_status.json` under its own home, so existing per-profile readers
+keep working.
+
+#### What does **not** change
+
+Per-profile `.env` credential isolation is preserved and, if anything,
+stricter: a profile's keys are resolved from its own scope and are never unioned
+into a shared environment (this also means subprocesses like MCP servers and
+Kanban workers only ever see their own profile's secrets). Kanban,
+profile-scoped skills/memory/SOUL, and model routing all behave per-profile
+exactly as they do with separate gateways.
+
 ## Start, stop, or restart all gateways at once
 
 The CLI ships with single-profile lifecycle commands. To act across every