Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

2026-06-12 08:51:53 +00:00 · 2026-04-16 08:23:20 -05:00 · 2026-04-16 08:23:20 -05:00 · f81dba0da2
commit f81dba0da2
parent 8e06db56fd dc7d47a6b8
128 changed files with 8357 additions and 842 deletions
--- a/.env.example
+++ b/.env.example
@ -24,6 +24,15 @@
 # Optional base URL override (default: Google's OpenAI-compatible endpoint)
 # GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai

+# =============================================================================
+# LLM PROVIDER (Ollama Cloud)
+# =============================================================================
+# Cloud-hosted open models via Ollama's OpenAI-compatible endpoint.
+# Get your key at: https://ollama.com/settings
+# OLLAMA_API_KEY=your_ollama_key_here
+# Optional base URL override (default: https://ollama.com/v1)
+# OLLAMA_BASE_URL=https://ollama.com/v1
+
 # =============================================================================
 # LLM PROVIDER (z.ai / GLM)
 # =============================================================================
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -58,6 +58,9 @@ _PROVIDER_ALIASES = {
    "google": "gemini",
    "google-gemini": "gemini",
    "google-ai-studio": "gemini",
+    "x-ai": "xai",
+    "x.ai": "xai",
+    "grok": "xai",
    "glm": "zai",
    "z-ai": "zai",
    "z.ai": "zai",
@ -104,6 +107,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "opencode-zen": "gemini-3-flash",
    "opencode-go": "glm-5",
    "kilocode": "google/gemini-3-flash-preview",
+    "ollama-cloud": "nemotron-3-nano:30b",
 }

 # Vision-specific model overrides for direct providers.
--- a/agent/display.py
+++ b/agent/display.py
@ -600,6 +600,45 @@ class KawaiiSpinner:
        "analyzing", "computing", "synthesizing", "formulating", "brainstorming",
    ]

+    @classmethod
+    def get_waiting_faces(cls) -> list:
+        """Return waiting faces from the active skin, falling back to KAWAII_WAITING."""
+        try:
+            skin = _get_skin()
+            if skin:
+                faces = skin.spinner.get("waiting_faces", [])
+                if faces:
+                    return faces
+        except Exception:
+            pass
+        return cls.KAWAII_WAITING
+
+    @classmethod
+    def get_thinking_faces(cls) -> list:
+        """Return thinking faces from the active skin, falling back to KAWAII_THINKING."""
+        try:
+            skin = _get_skin()
+            if skin:
+                faces = skin.spinner.get("thinking_faces", [])
+                if faces:
+                    return faces
+        except Exception:
+            pass
+        return cls.KAWAII_THINKING
+
+    @classmethod
+    def get_thinking_verbs(cls) -> list:
+        """Return thinking verbs from the active skin, falling back to THINKING_VERBS."""
+        try:
+            skin = _get_skin()
+            if skin:
+                verbs = skin.spinner.get("thinking_verbs", [])
+                if verbs:
+                    return verbs
+        except Exception:
+            pass
+        return cls.THINKING_VERBS
+
    def __init__(self, message: str = "", spinner_type: str = 'dots', print_fn=None):
        self.message = message
        self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@ -28,6 +28,7 @@ Usage in run_agent.py:

 from __future__ import annotations

+import json
 import logging
 import re
 from typing import Any, Dict, List, Optional
@ -43,11 +44,22 @@ logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------

 _FENCE_TAG_RE = re.compile(r'</?\s*memory-context\s*>', re.IGNORECASE)
+_INTERNAL_CONTEXT_RE = re.compile(
+    r'<\s*memory-context\s*>[\s\S]*?</\s*memory-context\s*>',
+    re.IGNORECASE,
+)
+_INTERNAL_NOTE_RE = re.compile(
+    r'\[System note:\s*The following is recalled memory context,\s*NOT new user input\.\s*Treat as informational background data\.\]\s*',
+    re.IGNORECASE,
+)


 def sanitize_context(text: str) -> str:
-    """Strip fence-escape sequences from provider output."""
-    return _FENCE_TAG_RE.sub('', text)
+    """Strip fence tags, injected context blocks, and system notes from provider output."""
+    text = _INTERNAL_CONTEXT_RE.sub('', text)
+    text = _INTERNAL_NOTE_RE.sub('', text)
+    text = _FENCE_TAG_RE.sub('', text)
+    return text


 def build_memory_context_block(raw_context: str) -> str:
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -23,7 +23,7 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
@ -33,6 +33,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "google", "google-gemini", "google-ai-studio",
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
    "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
+    "ollama",
    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
@ -239,6 +240,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.x.ai": "xai",
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
+    "ollama.com": "ollama-cloud",
 }


--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@ -169,6 +169,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "togetherai": "togetherai",
    "perplexity": "perplexity",
    "cohere": "cohere",
+    "ollama-cloud": "ollama-cloud",
 }

 # Reverse mapping: models.dev → Hermes (built lazily)
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@ -295,7 +295,9 @@ PLATFORM_HINTS = {
    ),
    "telegram": (
        "You are on a text messaging communication platform, Telegram. "
-        "Please do not use markdown as it does not render. "
+        "Standard markdown is automatically converted to Telegram format. "
+        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
+        "`inline code`, ```code blocks```, [links](url), and ## headers. "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@ -72,7 +72,14 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
    skill_name = str(loaded_skill.get("name") or normalized)
    skill_path = str(loaded_skill.get("path") or "")
    skill_dir = None
-    if skill_path:
+    # Prefer the absolute skill_dir returned by skill_view() — this is
+    # correct for both local and external skills.  Fall back to the old
+    # SKILLS_DIR-relative reconstruction only when skill_dir is absent
+    # (e.g. legacy skill_view responses).
+    abs_skill_dir = loaded_skill.get("skill_dir")
+    if abs_skill_dir:
+        skill_dir = Path(abs_skill_dir)
+    elif skill_path:
        try:
            skill_dir = SKILLS_DIR / Path(skill_path).parent
        except Exception:
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@ -16,7 +16,7 @@ model:
  #   "nous"         - Nous Portal OAuth (requires: hermes login)
  #   "nous-api"     - Nous Portal API key (requires: NOUS_API_KEY)
  #   "anthropic"    - Direct Anthropic API (requires: ANTHROPIC_API_KEY)
-  #   "openai-codex" - OpenAI Codex (requires: hermes login --provider openai-codex)
+  #   "openai-codex" - OpenAI Codex (requires: hermes auth)
  #   "copilot"      - GitHub Copilot / GitHub Models (requires: GITHUB_TOKEN)
  #   "gemini"      - Use Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
  #   "zai"         - Use z.ai / ZhipuAI GLM models (requires: GLM_API_KEY)
@ -26,6 +26,7 @@ model:
  #   "huggingface"  - Hugging Face Inference (requires: HF_TOKEN)
  #   "xiaomi"       - Xiaomi MiMo (requires: XIAOMI_API_KEY)
  #   "arcee"        - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
+  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
  #
@ -37,12 +38,6 @@ model:
  #     base_url: "http://localhost:1234/v1"
  #   No API key needed — local servers typically ignore auth.
  #
-  #   For Ollama Cloud (https://ollama.com/pricing):
-  #     provider: "custom"
-  #     base_url: "https://ollama.com/v1"
-  #   Set OLLAMA_API_KEY in .env — automatically picked up when base_url
-  #   points to ollama.com.
-  #
  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  provider: "auto"
  
@ -337,6 +332,7 @@ compression:
 #   "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY)
 #   "nous"       - Force Nous Portal (requires: hermes login)
 #   "gemini"      - Force Google AI Studio direct (requires: GOOGLE_API_KEY or GEMINI_API_KEY)
+#   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY)
 #   "codex"       - Force Codex OAuth (requires: hermes model → Codex).
 #                  Uses gpt-5.3-codex which supports vision.
 #   "main"       - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY).
@ -564,6 +560,18 @@ platform_toolsets:
  homeassistant: [hermes-homeassistant]
  qqbot: [hermes-qqbot]

+# =============================================================================
+# Gateway Platform Settings
+# =============================================================================
+# Optional per-platform messaging settings.
+# Platform-specific knobs live under `extra`.
+#
+# platforms:
+#   telegram:
+#     reply_to_mode: "first"  # off | first | all
+#     extra:
+#       disable_link_previews: false  # Set true to suppress Telegram URL previews in bot messages
+
 # ─────────────────────────────────────────────────────────────────────────────
 # Available toolsets (use these names in platform_toolsets or the toolsets list)
 #
--- a/cli.py
+++ b/cli.py
@ -2057,7 +2057,17 @@ class HermesCLI:
        """Return the visible height for the spinner/status text line above the status bar."""
        if not getattr(self, "_spinner_text", ""):
            return 0
-        return 0 if self._use_minimal_tui_chrome(width=width) else 1
+        if self._use_minimal_tui_chrome(width=width):
+            return 0
+        # Compute how many lines the spinner text needs when wrapped.
+        # The rendered text is "  {emoji} {label}  ({elapsed})" — about
+        # len(_spinner_text) + 16 chars for indent + timer suffix.
+        width = width or self._get_tui_terminal_width()
+        if width and width > 10:
+            import math
+            text_len = len(self._spinner_text) + 16  # indent + timer
+            return max(1, math.ceil(text_len / width))
+        return 1

    def _get_voice_status_fragments(self, width: Optional[int] = None):
        """Return the voice status bar fragments for the interactive TUI."""
@ -4001,23 +4011,14 @@ class HermesCLI:
    
    def _handle_profile_command(self):
        """Display active profile name and home directory."""
-        from hermes_constants import get_hermes_home, display_hermes_home
+        from hermes_constants import display_hermes_home
+        from hermes_cli.profiles import get_active_profile_name

-        home = get_hermes_home()
        display = display_hermes_home()
-
-        profiles_parent = Path.home() / ".hermes" / "profiles"
-        try:
-            rel = home.relative_to(profiles_parent)
-            profile_name = str(rel).split("/")[0]
-        except ValueError:
-            profile_name = None
+        profile_name = get_active_profile_name()

        print()
-        if profile_name:
-            print(f"  Profile: {profile_name}")
-        else:
-            print("  Profile: default")
+        print(f"  Profile: {profile_name}")
        print(f"  Home:    {display}")
        print()

@ -5599,7 +5600,8 @@ class HermesCLI:
                        version = f" v{p['version']}" if p["version"] else ""
                        tools = f"{p['tools']} tools" if p["tools"] else ""
                        hooks = f"{p['hooks']} hooks" if p["hooks"] else ""
-                        parts = [x for x in [tools, hooks] if x]
+                        commands = f"{p['commands']} commands" if p.get("commands") else ""
+                        parts = [x for x in [tools, hooks, commands] if x]
                        detail = f" ({', '.join(parts)})" if parts else ""
                        error = f" — {p['error']}" if p["error"] else ""
                        print(f"  {status} {p['name']}{version}{detail}{error}")
@ -7866,7 +7868,33 @@ class HermesCLI:
                    # Fallback for non-interactive mode (e.g., single-query)
                    agent_thread.join(0.1)

-            agent_thread.join()  # Ensure agent thread completes
+            # Wait for the agent thread to finish.  After an interrupt the
+            # agent may take a few seconds to clean up (kill subprocess, persist
+            # session).  Poll instead of a blocking join so the process_loop
+            # stays responsive — if the user sent another interrupt or the
+            # agent gets stuck, we can break out instead of freezing forever.
+            if interrupt_msg is not None:
+                # Interrupt path: poll briefly, then move on.  The agent
+                # thread is daemon — it dies on process exit regardless.
+                for _wait_tick in range(50):  # 50 * 0.2s = 10s max
+                    agent_thread.join(timeout=0.2)
+                    if not agent_thread.is_alive():
+                        break
+                    # Check if user fired ANOTHER interrupt (Ctrl+C sets
+                    # _should_exit which process_loop checks on next pass).
+                    if getattr(self, '_should_exit', False):
+                        break
+                if agent_thread.is_alive():
+                    logger.warning(
+                        "Agent thread still alive after interrupt "
+                        "(thread %s). Daemon thread will be cleaned up "
+                        "on exit.",
+                        agent_thread.ident,
+                    )
+            else:
+                # Normal completion: agent thread should be done already,
+                # but guard against edge cases.
+                agent_thread.join(timeout=30)

            # Proactively clean up async clients whose event loop is dead.
            # The agent thread may have created AsyncOpenAI clients bound
@ -9159,6 +9187,7 @@ class HermesCLI:
        spinner_widget = Window(
            content=FormattedTextControl(get_spinner_text),
            height=get_spinner_height,
+            wrap_lines=True,
        )

        spacer = Window(
@ -10118,6 +10147,11 @@ def main(
                ):
                    cli.agent.quiet_mode = True
                    cli.agent.suppress_status_output = True
+                    # Suppress streaming display callbacks so stdout stays
+                    # machine-readable (no styled "Hermes" box, no tool-gen
+                    # status lines).  The response is printed once below.
+                    cli.agent.stream_delta_callback = None
+                    cli.agent.tool_gen_callback = None
                    result = cli.agent.run_conversation(
                        user_message=effective_query,
                        conversation_history=cli.conversation_history,
@ -10125,7 +10159,8 @@ def main(
                    response = result.get("final_response", "") if isinstance(result, dict) else str(result)
                    if response:
                        print(response)
-                    print(f"\nsession_id: {cli.session_id}")
+                    # Session ID goes to stderr so piped stdout is clean.
+                    print(f"\nsession_id: {cli.session_id}", file=sys.stderr)
                    
                    # Ensure proper exit code for automation wrappers
                    sys.exit(1 if isinstance(result, dict) and result.get("failed") else 0)
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@ -837,6 +837,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            )

        final_response = result.get("final_response", "") or ""
+        # Strip leaked placeholder text that upstream may inject on empty completions.
+        if final_response.strip() == "(No response generated)":
+            final_response = ""
        # Use a separate variable for log display; keep final_response clean
        # for delivery logic (empty response = no delivery).
        logged_response = final_response if final_response else "(No response generated)"
--- a/gateway/config.py
+++ b/gateway/config.py
@ -638,6 +638,18 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_IGNORED_THREADS"] = str(ignored_threads)
                if "reactions" in telegram_cfg and not os.getenv("TELEGRAM_REACTIONS"):
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
+                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
+                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
+                if "disable_link_previews" in telegram_cfg:
+                    plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
+                    if not isinstance(plat_data, dict):
+                        plat_data = {}
+                        platforms_data[Platform.TELEGRAM.value] = plat_data
+                    extra = plat_data.setdefault("extra", {})
+                    if not isinstance(extra, dict):
+                        extra = {}
+                        plat_data["extra"] = extra
+                    extra["disable_link_previews"] = telegram_cfg["disable_link_previews"]

            whatsapp_cfg = yaml_cfg.get("whatsapp", {})
            if isinstance(whatsapp_cfg, dict):
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@ -902,7 +902,7 @@ class APIServerAdapter(BasePlatformAdapter):
                return time.monotonic()

            # Stream content chunks as they arrive from the agent
-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
            while True:
                try:
                    delta = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
@ -1241,7 +1241,7 @@ class APIServerAdapter(BasePlatformAdapter):
                    await _emit_text_delta(it)
                # Other types (non-string, non-tuple) are silently dropped.

-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
            while True:
                try:
                    item = await loop.run_in_executor(None, lambda: stream_q.get(timeout=0.5))
@ -2004,7 +2004,7 @@ class APIServerAdapter(BasePlatformAdapter):
        callers (e.g. the SSE writer) to call ``agent.interrupt()`` from
        another thread to stop in-progress LLM calls.
        """
-        loop = asyncio.get_event_loop()
+        loop = asyncio.get_running_loop()

        def _run():
            agent = self._create_agent(
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -734,25 +734,56 @@ def merge_pending_message_event(
    pending_messages: Dict[str, MessageEvent],
    session_key: str,
    event: MessageEvent,
+    *,
+    merge_text: bool = False,
 ) -> None:
    """Store or merge a pending event for a session.

    Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
    events. Merge those into the existing queued event so the next turn sees
-    the whole burst, while non-photo follow-ups still replace the pending
-    event normally.
+    the whole burst.
+
+    When ``merge_text`` is enabled, rapid follow-up TEXT events are appended
+    instead of replacing the pending turn. This is used for Telegram bursty
+    follow-ups so a multi-part user thought is not silently truncated to only
+    the last queued fragment.
    """
    existing = pending_messages.get(session_key)
-    if (
-        existing
-        and getattr(existing, "message_type", None) == MessageType.PHOTO
-        and event.message_type == MessageType.PHOTO
-    ):
-        existing.media_urls.extend(event.media_urls)
-        existing.media_types.extend(event.media_types)
-        if event.text:
-            existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
-        return
+    if existing:
+        existing_is_photo = getattr(existing, "message_type", None) == MessageType.PHOTO
+        incoming_is_photo = event.message_type == MessageType.PHOTO
+        existing_has_media = bool(existing.media_urls)
+        incoming_has_media = bool(event.media_urls)
+
+        if existing_is_photo and incoming_is_photo:
+            existing.media_urls.extend(event.media_urls)
+            existing.media_types.extend(event.media_types)
+            if event.text:
+                existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
+            return
+
+        if existing_has_media or incoming_has_media:
+            if incoming_has_media:
+                existing.media_urls.extend(event.media_urls)
+                existing.media_types.extend(event.media_types)
+            if event.text:
+                if existing.text:
+                    existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
+                else:
+                    existing.text = event.text
+            if existing_is_photo or incoming_is_photo:
+                existing.message_type = MessageType.PHOTO
+            return
+
+        if (
+            merge_text
+            and getattr(existing, "message_type", None) == MessageType.TEXT
+            and event.message_type == MessageType.TEXT
+        ):
+            if event.text:
+                existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
+            return
+
    pending_messages[session_key] = event


@ -839,6 +870,11 @@ class BasePlatformAdapter(ABC):
        # Gateway shutdown cancels these so an old gateway instance doesn't keep
        # working on a task after --replace or manual restarts.
        self._background_tasks: set[asyncio.Task] = set()
+        # One-shot callbacks to fire after the main response is delivered.
+        # Keyed by session_key.  GatewayRunner uses this to defer
+        # background-review notifications ("💾 Skill created") until the
+        # primary reply has been sent.
+        self._post_delivery_callbacks: Dict[str, Callable] = {}
        self._expected_cancelled_tasks: set[asyncio.Task] = set()
        self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
        # Chats where auto-TTS on voice input is disabled (set by /voice off)
@ -1905,6 +1941,14 @@ class BasePlatformAdapter(ABC):
            except Exception:
                pass  # Last resort — don't let error reporting crash the handler
        finally:
+            # Fire any one-shot post-delivery callback registered for this
+            # session (e.g. deferred background-review notifications).
+            _post_cb = getattr(self, "_post_delivery_callbacks", {}).pop(session_key, None)
+            if callable(_post_cb):
+                try:
+                    _post_cb()
+                except Exception:
+                    pass
            # Stop typing indicator
            typing_task.cancel()
            try:
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@ -366,6 +366,20 @@ class SlackAdapter(BasePlatformAdapter):
            # in an assistant-enabled context. Falls back to reactions.
            logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)

+    def _dm_top_level_threads_as_sessions(self) -> bool:
+        """Whether top-level Slack DMs get per-message session threads.
+
+        Defaults to ``True`` so each visible DM reply thread is isolated as its
+        own Hermes session — matching the per-thread behavior channels already
+        have.  Set ``platforms.slack.extra.dm_top_level_threads_as_sessions``
+        to ``false`` in config.yaml to revert to the legacy behavior where all
+        top-level DMs share one continuous session.
+        """
+        raw = self.config.extra.get("dm_top_level_threads_as_sessions")
+        if raw is None:
+            return True  # default: each DM thread is its own session
+        return str(raw).strip().lower() in ("1", "true", "yes", "on")
+
    def _resolve_thread_ts(
        self,
        reply_to: Optional[str] = None,
@ -996,10 +1010,14 @@ class SlackAdapter(BasePlatformAdapter):
        # Build thread_ts for session keying.
        # In channels: fall back to ts so each top-level @mention starts a
        #   new thread/session (the bot always replies in a thread).
-        # In DMs: only use the real thread_ts — top-level DMs should share
-        #   one continuous session, threaded DMs get their own session.
+        # In DMs: fall back to ts so each top-level DM reply thread gets
+        #   its own session key (matching channel behavior). Set
+        #   dm_top_level_threads_as_sessions: false in config to revert to
+        #   legacy single-session-per-DM-channel behavior.
        if is_dm:
-            thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts")  # None for top-level DMs
+            thread_ts = event.get("thread_ts") or assistant_meta.get("thread_ts")
+            if not thread_ts and self._dm_top_level_threads_as_sessions():
+                thread_ts = ts
        else:
            thread_ts = event.get("thread_ts") or ts  # ts fallback for channels

--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -18,6 +18,10 @@ logger = logging.getLogger(__name__)

 try:
    from telegram import Update, Bot, Message, InlineKeyboardButton, InlineKeyboardMarkup
+    try:
+        from telegram import LinkPreviewOptions
+    except ImportError:
+        LinkPreviewOptions = None
    from telegram.ext import (
        Application,
        CommandHandler,
@ -36,6 +40,7 @@ except ImportError:
    Message = Any
    InlineKeyboardButton = Any
    InlineKeyboardMarkup = Any
+    LinkPreviewOptions = None
    Application = Any
    CommandHandler = Any
    CallbackQueryHandler = Any
@ -129,6 +134,7 @@ class TelegramAdapter(BasePlatformAdapter):
    # When a chunk is near this limit, a continuation is almost certain.
    _SPLIT_THRESHOLD = 4000
    MEDIA_GROUP_WAIT_SECONDS = 0.8
+    _GENERAL_TOPIC_THREAD_ID = "1"
    
    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.TELEGRAM)
@ -137,6 +143,7 @@ class TelegramAdapter(BasePlatformAdapter):
        self._webhook_mode: bool = False
        self._mention_patterns = self._compile_mention_patterns()
        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
+        self._disable_link_previews: bool = self._coerce_bool_extra("disable_link_previews", False)
        # Buffer rapid/album photo updates so Telegram image bursts are handled
        # as a single MessageEvent instead of self-interrupting multiple turns.
        self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
@ -172,6 +179,29 @@ class TelegramAdapter(BasePlatformAdapter):
        allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
        return "*" in allowed_ids or user_id in allowed_ids

+    @classmethod
+    def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]:
+        if not metadata:
+            return None
+        thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
+        return str(thread_id) if thread_id is not None else None
+
+    @classmethod
+    def _message_thread_id_for_send(cls, thread_id: Optional[str]) -> Optional[int]:
+        if not thread_id or str(thread_id) == cls._GENERAL_TOPIC_THREAD_ID:
+            return None
+        return int(thread_id)
+
+    @classmethod
+    def _message_thread_id_for_typing(cls, thread_id: Optional[str]) -> Optional[int]:
+        if not thread_id:
+            return None
+        return int(thread_id)
+
+    @staticmethod
+    def _is_thread_not_found_error(error: Exception) -> bool:
+        return "thread not found" in str(error).lower()
+
    def _fallback_ips(self) -> list[str]:
        """Return validated fallback IPs from config (populated by _apply_env_overrides)."""
        configured = self.config.extra.get("fallback_ips", []) if getattr(self.config, "extra", None) else []
@ -202,6 +232,26 @@ class TelegramAdapter(BasePlatformAdapter):
            pass
        return isinstance(error, OSError)

+    def _coerce_bool_extra(self, key: str, default: bool = False) -> bool:
+        value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
+        if value is None:
+            return default
+        if isinstance(value, str):
+            lowered = value.strip().lower()
+            if lowered in ("true", "1", "yes", "on"):
+                return True
+            if lowered in ("false", "0", "no", "off"):
+                return False
+            return default
+        return bool(value)
+
+    def _link_preview_kwargs(self) -> Dict[str, Any]:
+        if not getattr(self, "_disable_link_previews", False):
+            return {}
+        if LinkPreviewOptions is not None:
+            return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
+        return {"disable_web_page_preview": True}
+
    async def _handle_polling_network_error(self, error: Exception) -> None:
        """Reconnect polling after a transient network interruption.

@ -549,7 +599,7 @@ class TelegramAdapter(BasePlatformAdapter):
                "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
            }

-            proxy_url = resolve_proxy_url()
+            proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
            disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
            fallback_ips = self._fallback_ips()
            if not fallback_ips:
@ -615,14 +665,14 @@ class TelegramAdapter(BasePlatformAdapter):
                from telegram.error import NetworkError, TimedOut
            except ImportError:
                NetworkError = TimedOut = OSError  # type: ignore[misc,assignment]
-            _max_connect = 3
+            _max_connect = 8
            for _attempt in range(_max_connect):
                try:
                    await self._app.initialize()
                    break
                except (NetworkError, TimedOut, OSError) as init_err:
                    if _attempt < _max_connect - 1:
-                        wait = 2 ** _attempt
+                        wait = min(2 ** _attempt, 15)
                        logger.warning(
                            "[%s] Connect attempt %d/%d failed: %s — retrying in %ds",
                            self.name, _attempt + 1, _max_connect, init_err, wait,
@ -823,7 +873,7 @@ class TelegramAdapter(BasePlatformAdapter):
                ]
            
            message_ids = []
-            thread_id = metadata.get("thread_id") if metadata else None
+            thread_id = self._metadata_thread_id(metadata)
            
            try:
                from telegram.error import NetworkError as _NetErr
@ -843,7 +893,7 @@ class TelegramAdapter(BasePlatformAdapter):
            for i, chunk in enumerate(chunks):
                should_thread = self._should_thread_reply(reply_to, i)
                reply_to_id = int(reply_to) if should_thread else None
-                effective_thread_id = int(thread_id) if thread_id else None
+                effective_thread_id = self._message_thread_id_for_send(thread_id)

                msg = None
                for _send_attempt in range(3):
@ -856,6 +906,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                parse_mode=ParseMode.MARKDOWN_V2,
                                reply_to_message_id=reply_to_id,
                                message_thread_id=effective_thread_id,
+                                **self._link_preview_kwargs(),
                            )
                        except Exception as md_error:
                            # Markdown parsing failed, try plain text
@ -868,6 +919,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                    parse_mode=None,
                                    reply_to_message_id=reply_to_id,
                                    message_thread_id=effective_thread_id,
+                                    **self._link_preview_kwargs(),
                                )
                            else:
                                raise
@ -878,8 +930,7 @@ class TelegramAdapter(BasePlatformAdapter):
                        # (not transient network issues). Detect and handle
                        # specific cases instead of blindly retrying.
                        if _BadReq and isinstance(send_err, _BadReq):
-                            err_lower = str(send_err).lower()
-                            if "thread not found" in err_lower and effective_thread_id is not None:
+                            if self._is_thread_not_found_error(send_err) and effective_thread_id is not None:
                                # Thread doesn't exist — retry without
                                # message_thread_id so the message still
                                # reaches the chat.
@ -889,6 +940,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                )
                                effective_thread_id = None
                                continue
+                            err_lower = str(send_err).lower()
                            if "message to be replied not found" in err_lower and reply_to_id is not None:
                                # Original message was deleted before we
                                # could reply — clear reply target and retry
@ -1055,6 +1107,7 @@ class TelegramAdapter(BasePlatformAdapter):
                text=text,
                parse_mode=ParseMode.MARKDOWN,
                reply_markup=keyboard,
+                **self._link_preview_kwargs(),
            )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@ -1076,16 +1129,17 @@ class TelegramAdapter(BasePlatformAdapter):

        try:
            cmd_preview = command[:3800] + "..." if len(command) > 3800 else command
+            # Escape backticks that would break Markdown v1 inline code parsing
+            safe_cmd = cmd_preview.replace("`", "'")
+            safe_desc = description.replace("`", "'").replace("*", "∗")
            text = (
                f"⚠️ *Command Approval Required*\n\n"
-                f"`{cmd_preview}`\n\n"
-                f"Reason: {description}"
+                f"`{safe_cmd}`\n\n"
+                f"Reason: {safe_desc}"
            )

            # Resolve thread context for thread replies
-            thread_id = None
-            if metadata:
-                thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
+            thread_id = self._metadata_thread_id(metadata)

            # We'll use the message_id as part of callback_data to look up session_key
            # Send a placeholder first, then update — or use a counter.
@ -1111,9 +1165,11 @@ class TelegramAdapter(BasePlatformAdapter):
                "text": text,
                "parse_mode": ParseMode.MARKDOWN,
                "reply_markup": keyboard,
+                **self._link_preview_kwargs(),
            }
-            if thread_id:
-                kwargs["message_thread_id"] = int(thread_id)
+            message_thread_id = self._message_thread_id_for_send(thread_id)
+            if message_thread_id is not None:
+                kwargs["message_thread_id"] = message_thread_id

            msg = await self._bot.send_message(**kwargs)

@ -1181,6 +1237,7 @@ class TelegramAdapter(BasePlatformAdapter):
                parse_mode=ParseMode.MARKDOWN,
                reply_markup=keyboard,
                message_thread_id=int(thread_id) if thread_id else None,
+                **self._link_preview_kwargs(),
            )

            # Store picker state keyed by chat_id
@ -1545,23 +1602,23 @@ class TelegramAdapter(BasePlatformAdapter):
            with open(audio_path, "rb") as audio_file:
                # .ogg files -> send as voice (round playable bubble)
                if audio_path.endswith((".ogg", ".opus")):
-                    _voice_thread = metadata.get("thread_id") if metadata else None
+                    _voice_thread = self._metadata_thread_id(metadata)
                    msg = await self._bot.send_voice(
                        chat_id=int(chat_id),
                        voice=audio_file,
                        caption=caption[:1024] if caption else None,
                        reply_to_message_id=int(reply_to) if reply_to else None,
-                        message_thread_id=int(_voice_thread) if _voice_thread else None,
+                        message_thread_id=self._message_thread_id_for_send(_voice_thread),
                    )
                else:
                    # .mp3 and others -> send as audio file
-                    _audio_thread = metadata.get("thread_id") if metadata else None
+                    _audio_thread = self._metadata_thread_id(metadata)
                    msg = await self._bot.send_audio(
                        chat_id=int(chat_id),
                        audio=audio_file,
                        caption=caption[:1024] if caption else None,
                        reply_to_message_id=int(reply_to) if reply_to else None,
-                        message_thread_id=int(_audio_thread) if _audio_thread else None,
+                        message_thread_id=self._message_thread_id_for_send(_audio_thread),
                    )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@ -1591,14 +1648,14 @@ class TelegramAdapter(BasePlatformAdapter):
            if not os.path.exists(image_path):
                return SendResult(success=False, error=f"Image file not found: {image_path}")

-            _thread = metadata.get("thread_id") if metadata else None
+            _thread = self._metadata_thread_id(metadata)
            with open(image_path, "rb") as image_file:
                msg = await self._bot.send_photo(
                    chat_id=int(chat_id),
                    photo=image_file,
                    caption=caption[:1024] if caption else None,
                    reply_to_message_id=int(reply_to) if reply_to else None,
-                    message_thread_id=int(_thread) if _thread else None,
+                    message_thread_id=self._message_thread_id_for_send(_thread),
                )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@ -1629,7 +1686,7 @@ class TelegramAdapter(BasePlatformAdapter):
                return SendResult(success=False, error=f"File not found: {file_path}")

            display_name = file_name or os.path.basename(file_path)
-            _thread = metadata.get("thread_id") if metadata else None
+            _thread = self._metadata_thread_id(metadata)

            with open(file_path, "rb") as f:
                msg = await self._bot.send_document(
@ -1638,7 +1695,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    filename=display_name,
                    caption=caption[:1024] if caption else None,
                    reply_to_message_id=int(reply_to) if reply_to else None,
-                    message_thread_id=int(_thread) if _thread else None,
+                    message_thread_id=self._message_thread_id_for_send(_thread),
                )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@ -1662,14 +1719,14 @@ class TelegramAdapter(BasePlatformAdapter):
            if not os.path.exists(video_path):
                return SendResult(success=False, error=f"Video file not found: {video_path}")

-            _thread = metadata.get("thread_id") if metadata else None
+            _thread = self._metadata_thread_id(metadata)
            with open(video_path, "rb") as f:
                msg = await self._bot.send_video(
                    chat_id=int(chat_id),
                    video=f,
                    caption=caption[:1024] if caption else None,
                    reply_to_message_id=int(reply_to) if reply_to else None,
-                    message_thread_id=int(_thread) if _thread else None,
+                    message_thread_id=self._message_thread_id_for_send(_thread),
                )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@ -1699,13 +1756,13 @@ class TelegramAdapter(BasePlatformAdapter):

        try:
            # Telegram can send photos directly from URLs (up to ~5MB)
-            _photo_thread = metadata.get("thread_id") if metadata else None
+            _photo_thread = self._metadata_thread_id(metadata)
            msg = await self._bot.send_photo(
                chat_id=int(chat_id),
                photo=image_url,
                caption=caption[:1024] if caption else None,  # Telegram caption limit
                reply_to_message_id=int(reply_to) if reply_to else None,
-                message_thread_id=int(_photo_thread) if _photo_thread else None,
+                message_thread_id=self._message_thread_id_for_send(_photo_thread),
            )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@ -1728,6 +1785,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    photo=image_data,
                    caption=caption[:1024] if caption else None,
                    reply_to_message_id=int(reply_to) if reply_to else None,
+                    message_thread_id=self._message_thread_id_for_send(_photo_thread),
                )
                return SendResult(success=True, message_id=str(msg.message_id))
            except Exception as e2:
@ -1753,13 +1811,13 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")
        
        try:
-            _anim_thread = metadata.get("thread_id") if metadata else None
+            _anim_thread = self._metadata_thread_id(metadata)
            msg = await self._bot.send_animation(
                chat_id=int(chat_id),
                animation=animation_url,
                caption=caption[:1024] if caption else None,
                reply_to_message_id=int(reply_to) if reply_to else None,
-                message_thread_id=int(_anim_thread) if _anim_thread else None,
+                message_thread_id=self._message_thread_id_for_send(_anim_thread),
            )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
@ -1776,12 +1834,23 @@ class TelegramAdapter(BasePlatformAdapter):
        """Send typing indicator."""
        if self._bot:
            try:
-                _typing_thread = metadata.get("thread_id") if metadata else None
-                await self._bot.send_chat_action(
-                    chat_id=int(chat_id),
-                    action="typing",
-                    message_thread_id=int(_typing_thread) if _typing_thread else None,
-                )
+                _typing_thread = self._metadata_thread_id(metadata)
+                message_thread_id = self._message_thread_id_for_typing(_typing_thread)
+                try:
+                    await self._bot.send_chat_action(
+                        chat_id=int(chat_id),
+                        action="typing",
+                        message_thread_id=message_thread_id,
+                    )
+                except Exception as e:
+                    if message_thread_id is not None and self._is_thread_not_found_error(e):
+                        await self._bot.send_chat_action(
+                            chat_id=int(chat_id),
+                            action="typing",
+                            message_thread_id=None,
+                        )
+                    else:
+                        raise
            except Exception as e:
                # Typing failures are non-fatal; log at debug level only.
                logger.debug(
@ -2726,7 +2795,9 @@ class TelegramAdapter(BasePlatformAdapter):

        # Resolve DM topic name and skill binding
        thread_id_raw = message.message_thread_id
-        thread_id_str = str(thread_id_raw) if thread_id_raw else None
+        thread_id_str = str(thread_id_raw) if thread_id_raw is not None else None
+        if chat_type == "group" and thread_id_str is None and getattr(chat, "is_forum", False):
+            thread_id_str = self._GENERAL_TOPIC_THREAD_ID
        chat_topic = None
        topic_skill = None

--- a/gateway/platforms/telegram_network.py
+++ b/gateway/platforms/telegram_network.py
@ -46,7 +46,7 @@ _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
 def _resolve_proxy_url() -> str | None:
    # Delegate to shared implementation (env vars + macOS system proxy detection)
    from gateway.platforms.base import resolve_proxy_url
-    return resolve_proxy_url()
+    return resolve_proxy_url("TELEGRAM_PROXY")


 class TelegramFallbackTransport(httpx.AsyncBaseTransport):
--- a/gateway/platforms/wecom_callback.py
+++ b/gateway/platforms/wecom_callback.py
@ -258,6 +258,20 @@ class WecomCallbackAdapter(BasePlatformAdapter):
                )
                event = self._build_event(app, decrypted)
                if event is not None:
+                    # Deduplicate: WeCom retries callbacks on timeout,
+                    # producing duplicate inbound messages (#10305).
+                    if event.message_id:
+                        now = time.time()
+                        if event.message_id in self._seen_messages:
+                            if now - self._seen_messages[event.message_id] < MESSAGE_DEDUP_TTL_SECONDS:
+                                logger.debug("[WecomCallback] Duplicate MsgId %s, skipping", event.message_id)
+                                return web.Response(text="success", content_type="text/plain")
+                            del self._seen_messages[event.message_id]
+                        self._seen_messages[event.message_id] = now
+                        # Prune expired entries when cache grows large
+                        if len(self._seen_messages) > 2000:
+                            cutoff = now - MESSAGE_DEDUP_TTL_SECONDS
+                            self._seen_messages = {k: v for k, v in self._seen_messages.items() if v > cutoff}
                    # Record which app this user belongs to.
                    if event.source and event.source.user_id:
                        map_key = self._user_app_key(
--- a/gateway/run.py
+++ b/gateway/run.py
@ -24,6 +24,7 @@ import signal
 import tempfile
 import threading
 import time
+from contextvars import copy_context
 from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List
@ -834,7 +835,7 @@ class GatewayRunner:
        session_key: Optional[str] = None,
    ):
        """Run the sync memory flush in a thread pool so it won't block the event loop."""
-        loop = asyncio.get_event_loop()
+        loop = asyncio.get_running_loop()
        await loop.run_in_executor(
            None,
            self._flush_memories_for_session,
@ -2925,6 +2926,32 @@ class GatewayRunner:
                    merge_pending_message_event(adapter._pending_messages, _quick_key, event)
                return None

+            _telegram_followup_grace = float(
+                os.getenv("HERMES_TELEGRAM_FOLLOWUP_GRACE_SECONDS", "3.0")
+            )
+            _started_at = self._running_agents_ts.get(_quick_key, 0)
+            if (
+                source.platform == Platform.TELEGRAM
+                and event.message_type == MessageType.TEXT
+                and _telegram_followup_grace > 0
+                and _started_at
+                and (time.time() - _started_at) <= _telegram_followup_grace
+            ):
+                logger.debug(
+                    "Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt",
+                    time.time() - _started_at,
+                    _quick_key[:20],
+                )
+                adapter = self.adapters.get(source.platform)
+                if adapter:
+                    merge_pending_message_event(
+                        adapter._pending_messages,
+                        _quick_key,
+                        event,
+                        merge_text=True,
+                    )
+                return None
+
            running_agent = self._running_agents.get(_quick_key)
            if running_agent is _AGENT_PENDING_SENTINEL:
                # Agent is being set up but not ready yet.
@ -2938,7 +2965,12 @@ class GatewayRunner:
                # agent starts.
                adapter = self.adapters.get(source.platform)
                if adapter:
-                    adapter._pending_messages[_quick_key] = event
+                    merge_pending_message_event(
+                        adapter._pending_messages,
+                        _quick_key,
+                        event,
+                        merge_text=True,
+                    )
                return None
            if self._draining:
                if self._queue_during_drain_enabled():
@ -3746,12 +3778,13 @@ class GatewayRunner:
                                    model=_hyg_model,
                                    max_iterations=4,
                                    quiet_mode=True,
+                                    skip_memory=True,
                                    enabled_toolsets=["memory"],
                                    session_id=session_entry.session_id,
                                )
                                _hyg_agent._print_fn = lambda *a, **kw: None

-                                loop = asyncio.get_event_loop()
+                                loop = asyncio.get_running_loop()
                                _compressed, _ = await loop.run_in_executor(
                                    None,
                                    lambda: _hyg_agent._compress_context(
@ -4400,31 +4433,16 @@ class GatewayRunner:
    
    async def _handle_profile_command(self, event: MessageEvent) -> str:
        """Handle /profile — show active profile name and home directory."""
-        from hermes_constants import get_hermes_home, display_hermes_home
-        from pathlib import Path
+        from hermes_constants import display_hermes_home
+        from hermes_cli.profiles import get_active_profile_name

-        home = get_hermes_home()
        display = display_hermes_home()
+        profile_name = get_active_profile_name()

-        # Detect profile name from HERMES_HOME path
-        # Profile paths look like: ~/.hermes/profiles/<name>
-        profiles_parent = Path.home() / ".hermes" / "profiles"
-        try:
-            rel = home.relative_to(profiles_parent)
-            profile_name = str(rel).split("/")[0]
-        except ValueError:
-            profile_name = None
-
-        if profile_name:
-            lines = [
-                f"👤 **Profile:** `{profile_name}`",
-                f"📂 **Home:** `{display}`",
-            ]
-        else:
-            lines = [
-                "👤 **Profile:** default",
-                f"📂 **Home:** `{display}`",
-            ]
+        lines = [
+            f"👤 **Profile:** `{profile_name}`",
+            f"📂 **Home:** `{display}`",
+        ]

        return "\n".join(lines)

@ -5087,6 +5105,7 @@ class GatewayRunner:
    async def _handle_personality_command(self, event: MessageEvent) -> str:
        """Handle /personality command - list or set a personality."""
        import yaml
+        from hermes_constants import display_hermes_home

        args = event.get_command_args().strip().lower()
        config_path = _hermes_home / 'config.yaml'
@ -5104,7 +5123,7 @@ class GatewayRunner:
            personalities = {}

        if not personalities:
-            return "No personalities configured in `~/.hermes/config.yaml`"
+            return f"No personalities configured in `{display_hermes_home()}/config.yaml`"

        if not args:
            lines = ["🎭 **Available Personalities**\n"]
@ -5825,8 +5844,7 @@ class GatewayRunner:
                    task_id=task_id,
                )

-            loop = asyncio.get_event_loop()
-            result = await loop.run_in_executor(None, run_sync)
+            result = await self._run_in_executor_with_context(run_sync)

            response = result.get("final_response", "") if result else ""
            if not response and result and result.get("error"):
@ -6008,8 +6026,7 @@ class GatewayRunner:
                    task_id=task_id,
                )

-            loop = asyncio.get_event_loop()
-            result = await loop.run_in_executor(None, run_sync)
+            result = await self._run_in_executor_with_context(run_sync)

            response = (result.get("final_response") or "") if result else ""
            if not response and result and result.get("error"):
@ -6332,6 +6349,7 @@ class GatewayRunner:
                model=model,
                max_iterations=4,
                quiet_mode=True,
+                skip_memory=True,
                enabled_toolsets=["memory"],
                session_id=session_entry.session_id,
            )
@ -6344,7 +6362,7 @@ class GatewayRunner:
            if compress_start >= compress_end:
                return "Nothing to compress yet (the transcript is still all protected context)."

-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
            compressed, _ = await loop.run_in_executor(
                None,
                lambda: tmp_agent._compress_context(msgs, "", approx_tokens=approx_tokens, focus_topic=focus_topic)
@ -6697,6 +6715,11 @@ class GatewayRunner:
        import asyncio as _asyncio

        args = event.get_command_args().strip()
+
+        # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
+        import re as _re
+        args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
+
        days = 30
        source = None

@ -6724,7 +6747,7 @@ class GatewayRunner:
            from hermes_state import SessionDB
            from agent.insights import InsightsEngine

-            loop = _asyncio.get_event_loop()
+            loop = _asyncio.get_running_loop()

            def _run_insights():
                db = SessionDB()
@ -6741,7 +6764,7 @@ class GatewayRunner:

    async def _handle_reload_mcp_command(self, event: MessageEvent) -> str:
        """Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
-        loop = asyncio.get_event_loop()
+        loop = asyncio.get_running_loop()
        try:
            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _servers, _lock

@ -7422,7 +7445,13 @@ class GatewayRunner:
        """Restore session context variables to their pre-handler values."""
        from gateway.session_context import clear_session_vars
        clear_session_vars(tokens)
-    
+
+    async def _run_in_executor_with_context(self, func, *args):
+        """Run blocking work in the thread pool while preserving session contextvars."""
+        loop = asyncio.get_running_loop()
+        ctx = copy_context()
+        return await loop.run_in_executor(None, ctx.run, func, *args)
+
    async def _enrich_message_with_vision(
        self,
        user_text: str,
@ -8456,7 +8485,7 @@ class GatewayRunner:
        stream_consumer_holder = [None]  # Mutable container for stream consumer
        
        # Bridge sync step_callback → async hooks.emit for agent:step events
-        _loop_for_step = asyncio.get_event_loop()
+        _loop_for_step = asyncio.get_running_loop()
        _hooks_ref = self.hooks

        def _step_callback_sync(iteration: int, prev_tools: list) -> None:
@ -8694,6 +8723,7 @@ class GatewayRunner:
                    session_id=session_id,
                    platform=platform_key,
                    user_id=source.user_id,
+                    gateway_session_key=session_key,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
@ -8713,8 +8743,11 @@ class GatewayRunner:
            agent.service_tier = self._service_tier
            agent.request_overrides = turn_route.get("request_overrides")

-            # Background review delivery — send "💾 Memory updated" etc. to user
-            def _bg_review_send(message: str) -> None:
+            _bg_review_release = threading.Event()
+            _bg_review_pending: list[str] = []
+            _bg_review_pending_lock = threading.Lock()
+
+            def _deliver_bg_review_message(message: str) -> None:
                if not _status_adapter:
                    return
                try:
@ -8729,7 +8762,32 @@ class GatewayRunner:
                except Exception as _e:
                    logger.debug("background_review_callback error: %s", _e)

+            def _release_bg_review_messages() -> None:
+                _bg_review_release.set()
+                with _bg_review_pending_lock:
+                    pending = list(_bg_review_pending)
+                    _bg_review_pending.clear()
+                for queued in pending:
+                    _deliver_bg_review_message(queued)
+
+            # Background review delivery — send "💾 Memory updated" etc. to user
+            def _bg_review_send(message: str) -> None:
+                if not _status_adapter:
+                    return
+                if not _bg_review_release.is_set():
+                    with _bg_review_pending_lock:
+                        if not _bg_review_release.is_set():
+                            _bg_review_pending.append(message)
+                            return
+                _deliver_bg_review_message(message)
+
            agent.background_review_callback = _bg_review_send
+            # Register the release hook on the adapter so base.py's finally
+            # block can fire it after delivering the main response.
+            if _status_adapter and session_key:
+                _pdc = getattr(_status_adapter, "_post_delivery_callbacks", None)
+                if _pdc is not None:
+                    _pdc[session_key] = _release_bg_review_messages

            # Store agent reference for interrupt support
            agent_holder[0] = agent
@ -8925,7 +8983,7 @@ class GatewayRunner:
            _resolved_model = getattr(_agent, "model", None) if _agent else None

            if not final_response:
-                error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)"
+                error_msg = f"⚠️ {result['error']}" if result.get("error") else ""
                return {
                    "final_response": error_msg,
                    "messages": result.get("messages", []),
@ -9169,9 +9227,8 @@ class GatewayRunner:
            _agent_warning_raw = float(os.getenv("HERMES_AGENT_TIMEOUT_WARNING", 900))
            _agent_warning = _agent_warning_raw if _agent_warning_raw > 0 else None
            _warning_fired = False
-            loop = asyncio.get_event_loop()
            _executor_task = asyncio.ensure_future(
-                loop.run_in_executor(None, run_sync)
+                self._run_in_executor_with_context(run_sync)
            )

            _inactivity_timeout = False
@ -9436,16 +9493,18 @@ class GatewayRunner:
                                pass
                        except Exception as e:
                            logger.debug("Stream consumer wait before queued message failed: %s", e)
+                    _previewed = bool(result.get("response_previewed"))
                    _already_streamed = bool(
-                        _sc
-                        and (
-                            getattr(_sc, "final_response_sent", False)
-                            or getattr(_sc, "already_sent", False)
-                        )
+                        (_sc and getattr(_sc, "final_response_sent", False))
+                        or _previewed
                    )
                    first_response = result.get("final_response", "")
                    if first_response and not _already_streamed:
                        try:
+                            logger.info(
+                                "Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
+                                session_key[:20] if session_key else "?",
+                            )
                            await adapter.send(
                                source.chat_id,
                                first_response,
@ -9453,6 +9512,22 @@ class GatewayRunner:
                            )
                        except Exception as e:
                            logger.warning("Failed to send first response before queued message: %s", e)
+                    elif first_response:
+                        logger.info(
+                            "Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.",
+                            session_key[:20] if session_key else "?",
+                        )
+                    # Release deferred bg-review notifications now that the
+                    # first response has been delivered.  Pop from the
+                    # adapter's callback dict (prevents double-fire in
+                    # base.py's finally block) and call it.
+                    if adapter and hasattr(adapter, "_post_delivery_callbacks"):
+                        _bg_cb = adapter._post_delivery_callbacks.pop(session_key, None)
+                        if callable(_bg_cb):
+                            try:
+                                _bg_cb()
+                            except Exception:
+                                pass
                # else: interrupted — discard the interrupted response ("Operation
                # interrupted." is just noise; the user already knows they sent a
                # new message).
@ -9472,6 +9547,19 @@ class GatewayRunner:
                        return result
                    next_message_id = getattr(pending_event, "message_id", None)

+                # Restart typing indicator so the user sees activity while
+                # the follow-up turn runs.  The outer _process_message_background
+                # typing task is still alive but may be stale.
+                _followup_adapter = self.adapters.get(source.platform)
+                if _followup_adapter:
+                    try:
+                        await _followup_adapter.send_typing(
+                            source.chat_id,
+                            metadata=_status_thread_metadata,
+                        )
+                    except Exception:
+                        pass
+
                return await self._run_agent(
                    message=next_message,
                    context_prompt=context_prompt,
@ -9532,13 +9620,22 @@ class GatewayRunner:
        # final answer.  Suppressing delivery here leaves the user staring
        # at silence.  (#10xxx — "agent stops after web search")
        _sc = stream_consumer_holder[0]
-        if _sc and isinstance(response, dict) and not response.get("failed"):
+        if isinstance(response, dict) and not response.get("failed"):
            _final = response.get("final_response") or ""
            _is_empty_sentinel = not _final or _final == "(empty)"
-            if not _is_empty_sentinel and (
-                getattr(_sc, "final_response_sent", False)
-                or getattr(_sc, "already_sent", False)
-            ):
+            _streamed = bool(
+                _sc and getattr(_sc, "final_response_sent", False)
+            )
+            # response_previewed means the interim_assistant_callback already
+            # sent the final text via the adapter (non-streaming path).
+            _previewed = bool(response.get("response_previewed"))
+            if not _is_empty_sentinel and (_streamed or _previewed):
+                logger.info(
+                    "Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s).",
+                    session_key[:20] if session_key else "?",
+                    _streamed,
+                    _previewed,
+                )
                response["already_sent"] = True
        
        return response
@ -9752,7 +9849,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    def restart_signal_handler():
        runner.request_restart(detached=False, via_service=True)
    
-    loop = asyncio.get_event_loop()
+    loop = asyncio.get_running_loop()
    if threading.current_thread() is threading.main_thread():
        for sig in (signal.SIGINT, signal.SIGTERM):
            try:
--- a/gateway/session.py
+++ b/gateway/session.py
@ -301,6 +301,8 @@ def build_session_context_prompt(
    lines.append("")
    lines.append("**Delivery options for scheduled tasks:**")
    
+    from hermes_constants import display_hermes_home
+
    # Origin delivery
    if context.source.platform == Platform.LOCAL:
        lines.append("- `\"origin\"` → Local output (saved to files)")
@ -309,9 +311,11 @@ def build_session_context_prompt(
            _hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id
        )
        lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})")
-    
+
    # Local always available
-    lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)")
+    lines.append(
+        f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
+    )
    
    # Platform home channels
    for platform, home in context.home_channels.items():
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@ -403,18 +403,20 @@ class GatewayStreamConsumer:

        except asyncio.CancelledError:
            # Best-effort final edit on cancellation
+            _best_effort_ok = False
            if self._accumulated and self._message_id:
                try:
-                    await self._send_or_edit(self._accumulated)
+                    _best_effort_ok = bool(await self._send_or_edit(self._accumulated))
                except Exception:
                    pass
-            # If we delivered any content before being cancelled, mark the
-            # final response as sent so the gateway's already_sent check
-            # doesn't trigger a duplicate message.  The 5-second
-            # stream_task timeout (gateway/run.py) can cancel us while
-            # waiting on a slow Telegram API call — without this flag the
-            # gateway falls through to the normal send path.
-            if self._already_sent:
+            # Only confirm final delivery if the best-effort send above
+            # actually succeeded OR if the final response was already
+            # confirmed before we were cancelled.  Previously this
+            # promoted any partial send (already_sent=True) to
+            # final_response_sent — which suppressed the gateway's
+            # fallback send even when only intermediate text (e.g.
+            # "Let me search…") had been delivered, not the real answer.
+            if _best_effort_ok and not self._final_response_sent:
                self._final_response_sent = True
        except Exception as e:
            logger.error("Stream consumer error: %s", e)
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -70,6 +70,7 @@ DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
 DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
+DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@ -274,6 +275,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("XIAOMI_API_KEY",),
        base_url_env_var="XIAOMI_BASE_URL",
    ),
+    "ollama-cloud": ProviderConfig(
+        id="ollama-cloud",
+        name="Ollama Cloud",
+        auth_type="api_key",
+        inference_base_url=DEFAULT_OLLAMA_CLOUD_BASE_URL,
+        api_key_env_vars=("OLLAMA_API_KEY",),
+        base_url_env_var="OLLAMA_BASE_URL",
+    ),
    "bedrock": ProviderConfig(
        id="bedrock",
        name="AWS Bedrock",
@ -919,6 +928,7 @@ def resolve_provider(
    _PROVIDER_ALIASES = {
        "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
        "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
+        "x-ai": "xai", "x.ai": "xai", "grok": "xai",
        "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
        "arcee-ai": "arcee", "arceeai": "arcee",
@ -937,7 +947,8 @@ def resolve_provider(
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
        # Local server aliases — route through the generic custom provider
        "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
-        "ollama": "custom", "vllm": "custom", "llamacpp": "custom",
+        "ollama": "custom", "ollama_cloud": "ollama-cloud",
+        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
    }
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@ -4,6 +4,7 @@ from __future__ import annotations

 from getpass import getpass
 import math
+import sys
 import time
 from types import SimpleNamespace
 import uuid
@ -160,7 +161,10 @@ def auth_add_command(args) -> None:
        default_label = _api_key_default_label(len(pool.entries()) + 1)
        label = (getattr(args, "label", None) or "").strip()
        if not label:
-            label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
+            if sys.stdin.isatty():
+                label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
+            else:
+                label = default_label
        entry = PooledCredential(
            provider=provider,
            id=uuid.uuid4().hex[:6],
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@ -454,7 +454,7 @@ def _collect_gateway_skill_entries(
            name = sanitize_name(cmd_name) if sanitize_name else cmd_name
            if not name:
                continue
-            desc = "Plugin command"
+            desc = plugin_cmds[cmd_name].get("description", "Plugin command")
            if len(desc) > desc_limit:
                desc = desc[:desc_limit - 3] + "..."
            plugin_pairs.append((name, desc))
@ -1195,6 +1195,22 @@ class SlashCommandCompleter(Completer):
                    display_meta=f"⚡ {short_desc}",
                )

+        # Plugin-registered slash commands
+        try:
+            from hermes_cli.plugins import get_plugin_commands
+            for cmd_name, cmd_info in get_plugin_commands().items():
+                if cmd_name.startswith(word):
+                    desc = str(cmd_info.get("description", "Plugin command"))
+                    short_desc = desc[:50] + ("..." if len(desc) > 50 else "")
+                    yield Completion(
+                        self._completion_text(cmd_name, word),
+                        start_position=-len(word),
+                        display=f"/{cmd_name}",
+                        display_meta=f"🔌 {short_desc}",
+                    )
+        except Exception:
+            pass
+

 # ---------------------------------------------------------------------------
 # Inline auto-suggest (ghost text) for slash commands
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -241,13 +241,41 @@ def _secure_dir(path):
        pass


+def _is_container() -> bool:
+    """Detect if we're running inside a Docker/Podman/LXC container.
+
+    When Hermes runs in a container with volume-mounted config files, forcing
+    0o600 permissions breaks multi-process setups where the gateway and
+    dashboard run as different UIDs or the volume mount requires broader
+    permissions.
+    """
+    # Explicit opt-out
+    if os.environ.get("HERMES_CONTAINER") or os.environ.get("HERMES_SKIP_CHMOD"):
+        return True
+    # Docker / Podman marker file
+    if os.path.exists("/.dockerenv"):
+        return True
+    # LXC / cgroup-based detection
+    try:
+        with open("/proc/1/cgroup", "r") as f:
+            cgroup_content = f.read()
+        if "docker" in cgroup_content or "lxc" in cgroup_content or "kubepods" in cgroup_content:
+            return True
+    except (OSError, IOError):
+        pass
+    return False
+
+
 def _secure_file(path):
    """Set file to owner-only read/write (0600). No-op on Windows.

    Skipped in managed mode — the NixOS activation script sets
    group-readable permissions (0640) on config files.
+
+    Skipped in containers — Docker/Podman volume mounts often need broader
+    permissions.  Set HERMES_SKIP_CHMOD=1 to force-skip on other systems.
    """
-    if is_managed():
+    if is_managed() or _is_container():
        return
    try:
        if os.path.exists(str(path)):
@ -392,8 +420,7 @@ DEFAULT_CONFIG = {
        "allow_private_urls": False,  # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
        "camofox": {
            # When true, Hermes sends a stable profile-scoped userId to Camofox
-            # so the server can map it to a persistent browser profile directory.
-            # Requires Camofox server to be configured with CAMOFOX_PROFILE_DIR.
+            # so the server maps it to a persistent Firefox profile automatically.
            # When false (default), each session gets a random userId (ephemeral).
            "managed_persistence": False,
        },
@ -531,6 +558,11 @@ DEFAULT_CONFIG = {
        "platforms": {},  # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
    },

+    # Web dashboard settings
+    "dashboard": {
+        "theme": "default",  # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose"
+    },
+
    # Privacy settings
    "privacy": {
        "redact_pii": False,  # When True, hash user IDs and strip phone numbers from LLM context
@ -538,7 +570,7 @@ DEFAULT_CONFIG = {
    
    # Text-to-speech configuration
    "tts": {
-        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local)
+        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
        "edge": {
            "voice": "en-US-AriaNeural",
            # Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
@ -552,6 +584,12 @@ DEFAULT_CONFIG = {
            "voice": "alloy",
            # Voices: alloy, echo, fable, onyx, nova, shimmer
        },
+        "xai": {
+            "voice_id": "eve",
+            "language": "en",
+            "sample_rate": 24000,
+            "bit_rate": 128000,
+        },
        "mistral": {
            "model": "voxtral-mini-tts-2603",
            "voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8",  # Paul - Neutral
@ -808,6 +846,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "XAI_API_KEY": {
+        "description": "xAI API key",
+        "prompt": "xAI API key",
+        "url": "https://console.x.ai/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "XAI_BASE_URL": {
+        "description": "xAI base URL override",
+        "prompt": "xAI base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "GLM_API_KEY": {
        "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
        "prompt": "Z.AI / GLM API key",
@ -996,6 +1050,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "OLLAMA_API_KEY": {
+        "description": "Ollama Cloud API key (ollama.com — cloud-hosted open models)",
+        "prompt": "Ollama Cloud API key",
+        "url": "https://ollama.com/settings",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "OLLAMA_BASE_URL": {
+        "description": "Ollama Cloud base URL override (default: https://ollama.com/v1)",
+        "prompt": "Ollama base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "XIAOMI_API_KEY": {
        "description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
        "prompt": "Xiaomi MiMo API Key",
@ -1224,6 +1294,12 @@ OPTIONAL_ENV_VARS = {
        "password": False,
        "category": "messaging",
    },
+    "TELEGRAM_PROXY": {
+        "description": "Proxy URL for Telegram connections (overrides HTTPS_PROXY). Supports http://, https://, socks5://",
+        "prompt": "Telegram proxy URL (optional)",
+        "password": False,
+        "category": "messaging",
+    },
    "DISCORD_BOT_TOKEN": {
        "description": "Discord bot token from Developer Portal",
        "prompt": "Discord bot token",
@ -2900,12 +2976,25 @@ def save_env_value(key: str, value: str):
        lines.append(f"{key}={value}\n")
    
    fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
+    # Preserve original permissions so Docker volume mounts aren't clobbered.
+    original_mode = None
+    if env_path.exists():
+        try:
+            original_mode = stat.S_IMODE(env_path.stat().st_mode)
+        except OSError:
+            pass
    try:
        with os.fdopen(fd, 'w', **write_kw) as f:
            f.writelines(lines)
            f.flush()
            os.fsync(f.fileno())
        os.replace(tmp_path, env_path)
+        # Restore original permissions before _secure_file may tighten them.
+        if original_mode is not None:
+            try:
+                os.chmod(env_path, original_mode)
+            except OSError:
+                pass
    except BaseException:
        try:
            os.unlink(tmp_path)
@ -2916,13 +3005,6 @@ def save_env_value(key: str, value: str):

    os.environ[key] = value

-    # Restrict .env permissions to owner-only (contains API keys)
-    if not _IS_WINDOWS:
-        try:
-            os.chmod(env_path, stat.S_IRUSR | stat.S_IWUSR)
-        except OSError:
-            pass
-

 def remove_env_value(key: str) -> bool:
    """Remove a key from ~/.hermes/.env and os.environ.
@ -2951,12 +3033,23 @@ def remove_env_value(key: str) -> bool:

    if found:
        fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
+        # Preserve original permissions so Docker volume mounts aren't clobbered.
+        original_mode = None
+        try:
+            original_mode = stat.S_IMODE(env_path.stat().st_mode)
+        except OSError:
+            pass
        try:
            with os.fdopen(fd, 'w', **write_kw) as f:
                f.writelines(new_lines)
                f.flush()
                os.fsync(f.fileno())
            os.replace(tmp_path, env_path)
+            if original_mode is not None:
+                try:
+                    os.chmod(env_path, original_mode)
+                except OSError:
+                    pass
        except BaseException:
            try:
                os.unlink(tmp_path)
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -1372,7 +1372,7 @@ def select_provider_and_model(args=None):
        _model_flow_kimi(config, current_model)
    elif selected_provider == "bedrock":
        _model_flow_bedrock(config, current_model)
-    elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
+    elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee", "ollama-cloud"):
        _model_flow_api_key_provider(config, selected_provider, current_model)

    # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@ -1799,6 +1799,27 @@ def _model_flow_custom(config):

    effective_key = api_key or current_key

+    # Hint: most local model servers (Ollama, vLLM, llama.cpp) require /v1
+    # in the base URL for OpenAI-compatible chat completions.  Prompt the
+    # user if the URL looks like a local server without /v1.
+    _url_lower = effective_url.rstrip("/").lower()
+    _looks_local = any(h in _url_lower for h in ("localhost", "127.0.0.1", "0.0.0.0", ":11434", ":8080", ":5000"))
+    if _looks_local and not _url_lower.endswith("/v1"):
+        print()
+        print(f"  Hint: Did you mean to add /v1 at the end?")
+        print(f"  Most local model servers (Ollama, vLLM, llama.cpp) require it.")
+        print(f"  e.g. {effective_url.rstrip('/')}/v1")
+        try:
+            _add_v1 = input("  Add /v1? [Y/n]: ").strip().lower()
+        except (KeyboardInterrupt, EOFError):
+            _add_v1 = "n"
+        if _add_v1 in ("", "y", "yes"):
+            effective_url = effective_url.rstrip("/") + "/v1"
+            if base_url:
+                base_url = effective_url
+            print(f"  Updated URL: {effective_url}")
+        print()
+
    from hermes_cli.models import probe_api_models

    probe = probe_api_models(effective_key, effective_url)
@ -2965,34 +2986,43 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
    #   1. models.dev registry (cached, filtered for agentic/tool-capable models)
    #   2. Curated static fallback list (offline insurance)
    #   3. Live /models endpoint probe (small providers without models.dev data)
-    curated = _PROVIDER_MODELS.get(provider_id, [])
-
-    # Try models.dev first — returns tool-capable models, filtered for noise
-    mdev_models: list = []
-    try:
-        from agent.models_dev import list_agentic_models
-        mdev_models = list_agentic_models(provider_id)
-    except Exception:
-        pass
-
-    if mdev_models:
-        model_list = mdev_models
-        print(f"  Found {len(model_list)} model(s) from models.dev registry")
-    elif curated and len(curated) >= 8:
-        # Curated list is substantial — use it directly, skip live probe
-        model_list = curated
-        print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
-    else:
+    #
+    # Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache)
+    if provider_id == "ollama-cloud":
+        from hermes_cli.models import fetch_ollama_cloud_models
        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
-        live_models = fetch_api_models(api_key_for_probe, effective_base)
-        if live_models and len(live_models) >= len(curated):
-            model_list = live_models
-            print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
-        else:
+        model_list = fetch_ollama_cloud_models(api_key=api_key_for_probe, base_url=effective_base)
+        if model_list:
+            print(f"  Found {len(model_list)} model(s) from Ollama Cloud")
+    else:
+        curated = _PROVIDER_MODELS.get(provider_id, [])
+
+        # Try models.dev first — returns tool-capable models, filtered for noise
+        mdev_models: list = []
+        try:
+            from agent.models_dev import list_agentic_models
+            mdev_models = list_agentic_models(provider_id)
+        except Exception:
+            pass
+
+        if mdev_models:
+            model_list = mdev_models
+            print(f"  Found {len(model_list)} model(s) from models.dev registry")
+        elif curated and len(curated) >= 8:
+            # Curated list is substantial — use it directly, skip live probe
            model_list = curated
-            if model_list:
-                print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
-        # else: no defaults either, will fall through to raw input
+            print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
+        else:
+            api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
+            live_models = fetch_api_models(api_key_for_probe, effective_base)
+            if live_models and len(live_models) >= len(curated):
+                model_list = live_models
+                print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
+            else:
+                model_list = curated
+                if model_list:
+                    print(f"  Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
+            # else: no defaults either, will fall through to raw input

    if provider_id in {"opencode-zen", "opencode-go"}:
        model_list = [normalize_opencode_model_id(provider_id, mid) for mid in model_list]
@ -5130,7 +5160,7 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
+        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "xai", "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
        default=None,
        help="Inference provider (default: auto)"
    )
@ -6608,8 +6638,13 @@ Examples:
            sys.stderr = _io.StringIO()
            args = parser.parse_args(_processed_argv)
            sys.stderr = _saved_stderr
-        except SystemExit:
+        except SystemExit as exc:
            sys.stderr = _saved_stderr
+            # Help/version flags (exit code 0) already printed output —
+            # re-raise immediately to avoid a second parse_args printing
+            # the same help text again (#10230).
+            if exc.code == 0:
+                raise
            # Subcommand name was consumed as a flag value (e.g. -c model).
            # Fall back to optional subparsers so argparse handles it normally.
            subparsers.required = False
--- a/hermes_cli/model_normalize.py
+++ b/hermes_cli/model_normalize.py
@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "qwen-oauth",
    "xiaomi",
    "arcee",
+    "ollama-cloud",
    "custom",
 })

--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@ -274,6 +274,11 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
    is_global = False
    explicit_provider = ""

+    # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
+    # A single Unicode dash before a flag keyword becomes "--"
+    import re as _re
+    raw_args = _re.sub(r'[\u2012\u2013\u2014\u2015](provider|global)', r'--\1', raw_args)
+
    # Extract --global
    if "--global" in raw_args:
        is_global = True
@ -452,6 +457,7 @@ def switch_model(
        ModelSwitchResult with all information the caller needs.
    """
    from hermes_cli.models import (
+        copilot_model_api_mode,
        detect_provider_for_model,
        validate_requested_model,
        opencode_model_api_mode,
@ -709,8 +715,12 @@ def switch_model(
    if validation.get("corrected_model"):
        new_model = validation["corrected_model"]

+    # --- Copilot api_mode override ---
+    if target_provider in {"copilot", "github-copilot"}:
+        api_mode = copilot_model_api_mode(new_model, api_key=api_key)
+
    # --- OpenCode api_mode override ---
-    if target_provider in {"opencode-zen", "opencode-go", "opencode", "opencode-go"}:
+    if target_provider in {"opencode-zen", "opencode-go", "opencode"}:
        api_mode = opencode_model_api_mode(target_provider, new_model)

    # --- Determine api_mode if not already set ---
@ -786,7 +796,8 @@ def list_authenticated_providers(
    from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS

    results: List[dict] = []
-    seen_slugs: set = set()
+    seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
+    seen_mdev_ids: set = set()  # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn)

    data = fetch_models_dev()

@ -799,6 +810,11 @@ def list_authenticated_providers(

    # --- 1. Check Hermes-mapped providers ---
    for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
+        # Skip aliases that map to the same models.dev provider (e.g.
+        # kimi-coding and kimi-coding-cn both → kimi-for-coding).
+        # The first one with valid credentials wins (#10526).
+        if mdev_id in seen_mdev_ids:
+            continue
        pdata = data.get(mdev_id)
        if not isinstance(pdata, dict):
            continue
@ -837,7 +853,8 @@ def list_authenticated_providers(
            "total_models": total,
            "source": "built-in",
        })
-        seen_slugs.add(slug)
+        seen_slugs.add(slug.lower())
+        seen_mdev_ids.add(mdev_id)

    # --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
    from hermes_cli.providers import HERMES_OVERLAYS
@ -849,12 +866,12 @@ def list_authenticated_providers(
    _mdev_to_hermes = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}

    for pid, overlay in HERMES_OVERLAYS.items():
-        if pid in seen_slugs:
+        if pid.lower() in seen_slugs:
            continue

        # Resolve Hermes slug — e.g. "github-copilot" → "copilot"
        hermes_slug = _mdev_to_hermes.get(pid, pid)
-        if hermes_slug in seen_slugs:
+        if hermes_slug.lower() in seen_slugs:
            continue

        # Check if credentials exist
@ -935,8 +952,8 @@ def list_authenticated_providers(
            "total_models": total,
            "source": "hermes",
        })
-        seen_slugs.add(pid)
-        seen_slugs.add(hermes_slug)
+        seen_slugs.add(pid.lower())
+        seen_slugs.add(hermes_slug.lower())

    # --- 2b. Cross-check canonical provider list ---
    # Catches providers that are in CANONICAL_PROVIDERS but weren't found
@ -948,7 +965,7 @@ def list_authenticated_providers(
        _canon_provs = []

    for _cp in _canon_provs:
-        if _cp.slug in seen_slugs:
+        if _cp.slug.lower() in seen_slugs:
            continue

        # Check credentials via PROVIDER_REGISTRY (auth.py)
@ -995,7 +1012,7 @@ def list_authenticated_providers(
            "total_models": _cp_total,
            "source": "canonical",
        })
-        seen_slugs.add(_cp.slug)
+        seen_slugs.add(_cp.slug.lower())

    # --- 3. User-defined endpoints from config ---
    if user_providers and isinstance(user_providers, dict):
@ -1068,7 +1085,7 @@ def list_authenticated_providers(
                groups[slug]["models"].append(default_model)

        for slug, grp in groups.items():
-            if slug in seen_slugs:
+            if slug.lower() in seen_slugs:
                continue
            results.append({
                "slug": slug,
@ -1080,11 +1097,9 @@ def list_authenticated_providers(
                "source": "user-config",
                "api_url": grp["api_url"],
            })
-            seen_slugs.add(slug)
+            seen_slugs.add(slug.lower())

    # Sort: current provider first, then by model count descending
    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))

    return results
-
-
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -11,7 +11,9 @@ import json
 import os
 import urllib.request
 import urllib.error
+import time
 from difflib import get_close_matches
+from pathlib import Path
 from typing import Any, NamedTuple, Optional

 COPILOT_BASE_URL = "https://api.githubcopilot.com"
@ -143,17 +145,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "glm-4.5-flash",
    ],
    "xai": [
-        "grok-4.20-0309-reasoning",
-        "grok-4.20-0309-non-reasoning",
-        "grok-4.20-multi-agent-0309",
+        "grok-4.20-reasoning",
        "grok-4-1-fast-reasoning",
-        "grok-4-1-fast-non-reasoning",
-        "grok-4-fast-reasoning",
-        "grok-4-fast-non-reasoning",
-        "grok-4-0709",
-        "grok-code-fast-1",
-        "grok-3",
-        "grok-3-mini",
    ],
    "kimi-coding": [
        "kimi-for-coding",
@ -547,6 +540,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
+    ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
    ProviderEntry("arcee",          "Arcee AI",                 "Arcee AI (Trinity models — direct API)"),
    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
@ -559,6 +553,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
 _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider

+
 _PROVIDER_ALIASES = {
    "glm": "zai",
    "z-ai": "zai",
@ -611,6 +606,8 @@ _PROVIDER_ALIASES = {
    "grok": "xai",
    "x-ai": "xai",
    "x.ai": "xai",
+    "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
+    "ollama_cloud": "ollama-cloud",
 }


@ -1064,7 +1061,8 @@ def detect_provider_for_model(
            break

    if direct_match:
-        # Check if we have credentials for this provider
+        # Check if we have credentials for this provider — env vars,
+        # credential pool, or auth store entries.
        has_creds = False
        try:
            from hermes_cli.auth import PROVIDER_REGISTRY
@ -1077,16 +1075,28 @@ def detect_provider_for_model(
                        break
        except Exception:
            pass
+        # Also check credential pool and auth store — covers OAuth,
+        # Claude Code tokens, and other non-env-var credentials (#10300).
+        if not has_creds:
+            try:
+                from agent.credential_pool import load_pool
+                pool = load_pool(direct_match)
+                if pool.has_credentials():
+                    has_creds = True
+            except Exception:
+                pass
+        if not has_creds:
+            try:
+                from hermes_cli.auth import _load_auth_store
+                store = _load_auth_store()
+                if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
+                    has_creds = True
+            except Exception:
+                pass

-        if has_creds:
-            return (direct_match, name)
-
-        # No direct creds — try to find this model on OpenRouter instead
-        or_slug = _find_openrouter_slug(name)
-        if or_slug:
-            return ("openrouter", or_slug)
-        # Still return the direct provider — credential resolution will
-        # give a clear error rather than silently using the wrong provider
+        # Always return the direct provider match.  If credentials are
+        # missing, the client init will give a clear error rather than
+        # silently routing through the wrong provider (#10300).
        return (direct_match, name)

    # --- Step 2: check OpenRouter catalog ---
@ -1560,6 +1570,11 @@ def copilot_model_api_mode(
    primary signal.  Falls back to the catalog's ``supported_endpoints``
    only for models not covered by the pattern check.
    """
+    # Fetch the catalog once so normalize + endpoint check share it
+    # (avoids two redundant network calls for non-GPT-5 models).
+    if catalog is None and api_key:
+        catalog = fetch_github_model_catalog(api_key=api_key)
+
    normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
    if not normalized:
        return "chat_completions"
@ -1569,9 +1584,6 @@ def copilot_model_api_mode(
        return "codex_responses"

    # Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.)
-    if catalog is None and api_key:
-        catalog = fetch_github_model_catalog(api_key=api_key)
-
    if catalog:
        catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
        if isinstance(catalog_entry, dict):
@ -1786,6 +1798,125 @@ def fetch_api_models(
    return probe_api_models(api_key, base_url, timeout=timeout).get("models")


+# ---------------------------------------------------------------------------
+# Ollama Cloud — merged model discovery with disk cache
+# ---------------------------------------------------------------------------
+
+
+
+_OLLAMA_CLOUD_CACHE_TTL = 3600  # 1 hour
+
+
+def _ollama_cloud_cache_path() -> Path:
+    """Return the path for the Ollama Cloud model cache."""
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "ollama_cloud_models_cache.json"
+
+
+def _load_ollama_cloud_cache(*, ignore_ttl: bool = False) -> Optional[dict]:
+    """Load cached Ollama Cloud models from disk.
+
+    Args:
+        ignore_ttl: If True, return data even if the TTL has expired (stale fallback).
+    """
+    try:
+        cache_path = _ollama_cloud_cache_path()
+        if not cache_path.exists():
+            return None
+        with open(cache_path, encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return None
+        models = data.get("models")
+        if not (isinstance(models, list) and models):
+            return None
+        if not ignore_ttl:
+            cached_at = data.get("cached_at", 0)
+            if (time.time() - cached_at) > _OLLAMA_CLOUD_CACHE_TTL:
+                return None  # stale
+        return data
+    except Exception:
+        pass
+    return None
+
+
+def _save_ollama_cloud_cache(models: list[str]) -> None:
+    """Persist the merged Ollama Cloud model list to disk."""
+    try:
+        from utils import atomic_json_write
+        cache_path = _ollama_cloud_cache_path()
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        atomic_json_write(cache_path, {"models": models, "cached_at": time.time()}, indent=None)
+    except Exception:
+        pass
+
+
+def fetch_ollama_cloud_models(
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    *,
+    force_refresh: bool = False,
+) -> list[str]:
+    """Fetch Ollama Cloud models by merging live API + models.dev, with disk cache.
+
+    Resolution order:
+      1. Disk cache (if fresh, < 1 hour, and not force_refresh)
+      2. Live ``/v1/models`` endpoint (primary — freshest source)
+      3. models.dev registry (secondary — fills gaps for unlisted models)
+      4. Merge: live models first, then models.dev additions (deduped)
+
+    Returns a list of model IDs (never None — empty list on total failure).
+    """
+    # 1. Check disk cache
+    if not force_refresh:
+        cached = _load_ollama_cloud_cache()
+        if cached is not None:
+            return cached["models"]
+
+    # 2. Live API probe
+    if not api_key:
+        api_key = os.getenv("OLLAMA_API_KEY", "")
+    if not base_url:
+        base_url = os.getenv("OLLAMA_BASE_URL", "") or "https://ollama.com/v1"
+
+    live_models: list[str] = []
+    if api_key:
+        result = fetch_api_models(api_key, base_url, timeout=8.0)
+        if result:
+            live_models = result
+
+    # 3. models.dev registry
+    mdev_models: list[str] = []
+    try:
+        from agent.models_dev import list_agentic_models
+        mdev_models = list_agentic_models("ollama-cloud")
+    except Exception:
+        pass
+
+    # 4. Merge: live first, then models.dev additions (deduped, order-preserving)
+    if live_models or mdev_models:
+        seen: set[str] = set()
+        merged: list[str] = []
+        for m in live_models:
+            if m and m not in seen:
+                seen.add(m)
+                merged.append(m)
+        for m in mdev_models:
+            if m and m not in seen:
+                seen.add(m)
+                merged.append(m)
+        if merged:
+            _save_ollama_cloud_cache(merged)
+            return merged
+
+    # Total failure — return stale cache if available (ignore TTL)
+    stale = _load_ollama_cloud_cache(ignore_ttl=True)
+    if stale is not None:
+        return stale["models"]
+
+    return []
+
+
 def validate_requested_model(
    model_name: str,
    provider: Optional[str],
--- a/hermes_cli/nous_subscription.py
+++ b/hermes_cli/nous_subscription.py
@ -143,6 +143,7 @@ def _tts_label(current_provider: str) -> str:
        "openai": "OpenAI TTS",
        "elevenlabs": "ElevenLabs",
        "edge": "Edge TTS",
+        "xai": "xAI TTS",
        "mistral": "Mistral Voxtral TTS",
        "neutts": "NeuTTS",
    }
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@ -112,6 +112,7 @@ class LoadedPlugin:
    module: Optional[types.ModuleType] = None
    tools_registered: List[str] = field(default_factory=list)
    hooks_registered: List[str] = field(default_factory=list)
+    commands_registered: List[str] = field(default_factory=list)
    enabled: bool = False
    error: Optional[str] = None

@ -211,6 +212,84 @@ class PluginContext:
        }
        logger.debug("Plugin %s registered CLI command: %s", self.manifest.name, name)

+    # -- slash command registration -------------------------------------------
+
+    def register_command(
+        self,
+        name: str,
+        handler: Callable,
+        description: str = "",
+    ) -> None:
+        """Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.
+
+        The handler signature is ``fn(raw_args: str) -> str | None``.
+        It may also be an async callable — the gateway dispatch handles both.
+
+        Unlike ``register_cli_command()`` (which creates ``hermes <subcommand>``
+        terminal commands), this registers in-session slash commands that users
+        invoke during a conversation.
+
+        Names conflicting with built-in commands are rejected with a warning.
+        """
+        clean = name.lower().strip().lstrip("/").replace(" ", "-")
+        if not clean:
+            logger.warning(
+                "Plugin '%s' tried to register a command with an empty name.",
+                self.manifest.name,
+            )
+            return
+
+        # Reject if it conflicts with a built-in command
+        try:
+            from hermes_cli.commands import resolve_command
+            if resolve_command(clean) is not None:
+                logger.warning(
+                    "Plugin '%s' tried to register command '/%s' which conflicts "
+                    "with a built-in command. Skipping.",
+                    self.manifest.name, clean,
+                )
+                return
+        except Exception:
+            pass  # If commands module isn't available, skip the check
+
+        self._manager._plugin_commands[clean] = {
+            "handler": handler,
+            "description": description or "Plugin command",
+            "plugin": self.manifest.name,
+        }
+        logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)
+
+    # -- tool dispatch -------------------------------------------------------
+
+    def dispatch_tool(self, tool_name: str, args: dict, **kwargs) -> str:
+        """Dispatch a tool call through the registry, with parent agent context.
+
+        This is the public interface for plugin slash commands that need to call
+        tools like ``delegate_task`` without reaching into framework internals.
+        The parent agent (if available) is resolved automatically — plugins never
+        need to access the agent directly.
+
+        Args:
+            tool_name: Registry name of the tool (e.g. ``"delegate_task"``).
+            args: Tool arguments dict (same as what the model would pass).
+            **kwargs: Extra keyword args forwarded to the registry dispatch.
+
+        Returns:
+            JSON string from the tool handler (same format as model tool calls).
+        """
+        from tools.registry import registry
+
+        # Wire up parent agent context when available (CLI mode).
+        # In gateway mode _cli_ref is None — tools degrade gracefully
+        # (workspace hints fall back to TERMINAL_CWD, no spinner).
+        if "parent_agent" not in kwargs:
+            cli = self._manager._cli_ref
+            agent = getattr(cli, "agent", None) if cli else None
+            if agent is not None:
+                kwargs["parent_agent"] = agent
+
+        return registry.dispatch(tool_name, args, **kwargs)
+
    # -- context engine registration -----------------------------------------

    def register_context_engine(self, engine) -> None:
@ -323,6 +402,7 @@ class PluginManager:
        self._plugin_tool_names: Set[str] = set()
        self._cli_commands: Dict[str, dict] = {}
        self._context_engine = None  # Set by a plugin via register_context_engine()
+        self._plugin_commands: Dict[str, dict] = {}  # Slash commands registered by plugins
        self._discovered: bool = False
        self._cli_ref = None  # Set by CLI after plugin discovery
        # Plugin skill registry: qualified name → metadata dict.
@ -485,6 +565,10 @@ class PluginManager:
                        for h in p.hooks_registered
                    }
                )
+                loaded.commands_registered = [
+                    c for c in self._plugin_commands
+                    if self._plugin_commands[c].get("plugin") == manifest.name
+                ]
                loaded.enabled = True

        except Exception as exc:
@ -598,6 +682,7 @@ class PluginManager:
                    "enabled": loaded.enabled,
                    "tools": len(loaded.tools_registered),
                    "hooks": len(loaded.hooks_registered),
+                    "commands": len(loaded.commands_registered),
                    "error": loaded.error,
                }
            )
@ -699,6 +784,20 @@ def get_plugin_context_engine():
    return get_plugin_manager()._context_engine


+def get_plugin_command_handler(name: str) -> Optional[Callable]:
+    """Return the handler for a plugin-registered slash command, or ``None``."""
+    entry = get_plugin_manager()._plugin_commands.get(name)
+    return entry["handler"] if entry else None
+
+
+def get_plugin_commands() -> Dict[str, dict]:
+    """Return the full plugin commands dict (name → {handler, description, plugin}).
+
+    Safe to call before discovery — returns an empty dict if no plugins loaded.
+    """
+    return get_plugin_manager()._plugin_commands
+
+
 def get_plugin_toolsets() -> List[tuple]:
    """Return plugin toolsets as ``(key, label, description)`` tuples.

--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@ -128,7 +128,7 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        base_url_env_var="HF_BASE_URL",
    ),
    "xai": HermesOverlay(
-        transport="openai_chat",
+        transport="codex_responses",
        base_url_override="https://api.x.ai/v1",
        base_url_env_var="XAI_BASE_URL",
    ),
@ -141,6 +141,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        base_url_override="https://api.arcee.ai/api/v1",
        base_url_env_var="ARCEE_BASE_URL",
    ),
+    "ollama-cloud": HermesOverlay(
+        transport="openai_chat",
+        base_url_env_var="OLLAMA_BASE_URL",
+    ),
 }


@ -180,6 +184,7 @@ ALIASES: Dict[str, str] = {
    # xai
    "x-ai": "xai",
    "x.ai": "xai",
+    "grok": "xai",

    # kimi-for-coding (models.dev ID)
    "kimi": "kimi-for-coding",
@ -250,7 +255,7 @@ ALIASES: Dict[str, str] = {
    "lmstudio": "lmstudio",
    "lm-studio": "lmstudio",
    "lm_studio": "lmstudio",
-    "ollama": "ollama-cloud",
+    "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
    "vllm": "local",
    "llamacpp": "local",
    "llama.cpp": "local",
@ -269,6 +274,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "xiaomi": "Xiaomi MiMo",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
+    "ollama-cloud": "Ollama Cloud",
 }


--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -41,6 +41,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
    tool calls with reasoning (chat/completions returns 400).
    """
    normalized = (base_url or "").strip().lower().rstrip("/")
+    if "api.x.ai" in normalized:
+        return "codex_responses"
    if "api.openai.com" in normalized and "openrouter" not in normalized:
        return "codex_responses"
    return None
@ -163,10 +165,13 @@ def _resolve_runtime_from_pool_entry(
        base_url = cfg_base_url or base_url or "https://api.anthropic.com"
    elif provider == "openrouter":
        base_url = base_url or OPENROUTER_BASE_URL
+    elif provider == "xai":
+        api_mode = "codex_responses"
    elif provider == "nous":
        api_mode = "chat_completions"
    elif provider == "copilot":
        api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
+        base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
    else:
        configured_provider = str(model_cfg.get("provider") or "").strip().lower()
        # Honour model.base_url from config.yaml when the configured provider
@ -627,6 +632,8 @@ def _resolve_explicit_runtime(
        api_mode = "chat_completions"
        if provider == "copilot":
            api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
+        elif provider == "xai":
+            api_mode = "codex_responses"
        else:
            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
            if configured_mode:
@ -923,6 +930,8 @@ def resolve_runtime_provider(
        api_mode = "chat_completions"
        if provider == "copilot":
            api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
+        elif provider == "xai":
+            api_mode = "codex_responses"
        else:
            configured_provider = str(model_cfg.get("provider") or "").strip().lower()
            # Only honor persisted api_mode when it belongs to the same provider family.
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -920,6 +920,7 @@ def _setup_tts_provider(config: dict):
        "edge": "Edge TTS",
        "elevenlabs": "ElevenLabs",
        "openai": "OpenAI TTS",
+        "xai": "xAI TTS",
        "minimax": "MiniMax TTS",
        "mistral": "Mistral Voxtral TTS",
        "neutts": "NeuTTS",
@ -941,12 +942,13 @@ def _setup_tts_provider(config: dict):
            "Edge TTS (free, cloud-based, no setup needed)",
            "ElevenLabs (premium quality, needs API key)",
            "OpenAI TTS (good quality, needs API key)",
+            "xAI TTS (Grok voices, needs API key)",
            "MiniMax TTS (high quality with voice cloning, needs API key)",
            "Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
            "NeuTTS (local on-device, free, ~300MB model download)",
        ]
    )
-    providers.extend(["edge", "elevenlabs", "openai", "minimax", "mistral", "neutts"])
+    providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "neutts"])
    choices.append(f"Keep current ({current_label})")
    keep_current_idx = len(choices) - 1
    idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@ -1012,6 +1014,23 @@ def _setup_tts_provider(config: dict):
                print_warning("No API key provided. Falling back to Edge TTS.")
                selected = "edge"

+    elif selected == "xai":
+        existing = get_env_value("XAI_API_KEY")
+        if not existing:
+            print()
+            api_key = prompt("xAI API key for TTS", password=True)
+            if api_key:
+                save_env_value("XAI_API_KEY", api_key)
+                print_success("xAI TTS API key saved")
+            else:
+                from hermes_constants import display_hermes_home as _dhh
+                print_warning(
+                    "No xAI API key provided for TTS. Configure XAI_API_KEY via "
+                    f"hermes setup model or {_dhh()}/.env to use xAI TTS. "
+                    "Falling back to Edge TTS."
+                )
+                selected = "edge"
+
    elif selected == "minimax":
        existing = get_env_value("MINIMAX_API_KEY")
        if not existing:
@ -1611,9 +1630,19 @@ def _setup_telegram():
            return

    print_info("Create a bot via @BotFather on Telegram")
-    token = prompt("Telegram bot token", password=True)
-    if not token:
-        return
+    import re
+
+    while True:
+        token = prompt("Telegram bot token", password=True)
+        if not token:
+            return
+        if not re.match(r"^\d+:[A-Za-z0-9_-]{30,}$", token):
+            print_error(
+                "Invalid token format. Expected: <numeric_id>:<alphanumeric_hash> "
+                "(e.g., 123456789:ABCdefGHI-jklMNOpqrSTUvwxYZ)"
+            )
+            continue
+        break
    save_env_value("TELEGRAM_BOT_TOKEN", token)
    print_success("Telegram token saved")

--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@ -146,6 +146,14 @@ TOOL_CATEGORIES = {
                ],
                "tts_provider": "openai",
            },
+            {
+                "name": "xAI TTS",
+                "tag": "Grok voices - requires xAI API key",
+                "env_vars": [
+                    {"key": "XAI_API_KEY", "prompt": "xAI API key", "url": "https://console.x.ai/"},
+                ],
+                "tts_provider": "xai",
+            },
            {
                "name": "ElevenLabs",
                "badge": "paid",
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@ -11,6 +11,7 @@ Usage:

 import asyncio
 import hmac
+import importlib.util
 import json
 import logging
 import os
@ -96,6 +97,9 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
    "/api/config/defaults",
    "/api/config/schema",
    "/api/model/info",
+    "/api/dashboard/themes",
+    "/api/dashboard/plugins",
+    "/api/dashboard/plugins/rescan",
 })


@ -114,7 +118,7 @@ def _require_token(request: Request) -> None:
 async def auth_middleware(request: Request, call_next):
    """Require the session token on all /api/ routes except the public list."""
    path = request.url.path
-    if path.startswith("/api/") and path not in _PUBLIC_API_PATHS:
+    if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"):
        auth = request.headers.get("authorization", "")
        expected = f"Bearer {_SESSION_TOKEN}"
        if not hmac.compare_digest(auth.encode(), expected.encode()):
@ -166,6 +170,11 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
        "description": "CLI visual theme",
        "options": ["default", "ares", "mono", "slate"],
    },
+    "dashboard.theme": {
+        "type": "select",
+        "description": "Web dashboard visual theme",
+        "options": ["default", "midnight", "ember", "mono", "cyberpunk", "rose"],
+    },
    "display.resume_display": {
        "type": "select",
        "description": "How resumed sessions display history",
@ -224,6 +233,7 @@ _CATEGORY_MERGE: Dict[str, str] = {
    "approvals": "security",
    "human_delay": "display",
    "smart_model_routing": "agent",
+    "dashboard": "display",
 }

 # Display order for tabs — unlisted categories sort alphabetically after these.
@ -2068,6 +2078,237 @@ def mount_spa(application: FastAPI):
        return _serve_index()


+# ---------------------------------------------------------------------------
+# Dashboard theme endpoints
+# ---------------------------------------------------------------------------
+
+# Built-in dashboard themes — label + description only.  The actual color
+# definitions live in the frontend (web/src/themes/presets.ts).
+_BUILTIN_DASHBOARD_THEMES = [
+    {"name": "default",   "label": "Hermes Teal",  "description": "Classic dark teal — the canonical Hermes look"},
+    {"name": "midnight",  "label": "Midnight",      "description": "Deep blue-violet with cool accents"},
+    {"name": "ember",     "label": "Ember",          "description": "Warm crimson and bronze — forge vibes"},
+    {"name": "mono",      "label": "Mono",           "description": "Clean grayscale — minimal and focused"},
+    {"name": "cyberpunk", "label": "Cyberpunk",      "description": "Neon green on black — matrix terminal"},
+    {"name": "rose",      "label": "Rosé",           "description": "Soft pink and warm ivory — easy on the eyes"},
+]
+
+
+def _discover_user_themes() -> list:
+    """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes."""
+    themes_dir = get_hermes_home() / "dashboard-themes"
+    if not themes_dir.is_dir():
+        return []
+    result = []
+    for f in sorted(themes_dir.glob("*.yaml")):
+        try:
+            data = yaml.safe_load(f.read_text(encoding="utf-8"))
+            if isinstance(data, dict) and data.get("name"):
+                result.append({
+                    "name": data["name"],
+                    "label": data.get("label", data["name"]),
+                    "description": data.get("description", ""),
+                })
+        except Exception:
+            continue
+    return result
+
+
+@app.get("/api/dashboard/themes")
+async def get_dashboard_themes():
+    """Return available themes and the currently active one."""
+    config = load_config()
+    active = config.get("dashboard", {}).get("theme", "default")
+    user_themes = _discover_user_themes()
+    # Merge built-in + user, user themes override built-in by name.
+    seen = set()
+    themes = []
+    for t in _BUILTIN_DASHBOARD_THEMES:
+        seen.add(t["name"])
+        themes.append(t)
+    for t in user_themes:
+        if t["name"] not in seen:
+            themes.append(t)
+            seen.add(t["name"])
+    return {"themes": themes, "active": active}
+
+
+class ThemeSetBody(BaseModel):
+    name: str
+
+
+@app.put("/api/dashboard/theme")
+async def set_dashboard_theme(body: ThemeSetBody):
+    """Set the active dashboard theme (persists to config.yaml)."""
+    config = load_config()
+    if "dashboard" not in config:
+        config["dashboard"] = {}
+    config["dashboard"]["theme"] = body.name
+    save_config(config)
+    return {"ok": True, "theme": body.name}
+
+
+# ---------------------------------------------------------------------------
+# Dashboard plugin system
+# ---------------------------------------------------------------------------
+
+def _discover_dashboard_plugins() -> list:
+    """Scan plugins/*/dashboard/manifest.json for dashboard extensions.
+
+    Checks three plugin sources (same as hermes_cli.plugins):
+    1. User plugins:    ~/.hermes/plugins/<name>/dashboard/manifest.json
+    2. Bundled plugins: <repo>/plugins/<name>/dashboard/manifest.json  (memory/, etc.)
+    3. Project plugins: ./.hermes/plugins/  (only if HERMES_ENABLE_PROJECT_PLUGINS)
+    """
+    plugins = []
+    seen_names: set = set()
+
+    search_dirs = [
+        (get_hermes_home() / "plugins", "user"),
+        (PROJECT_ROOT / "plugins" / "memory", "bundled"),
+        (PROJECT_ROOT / "plugins", "bundled"),
+    ]
+    if os.environ.get("HERMES_ENABLE_PROJECT_PLUGINS"):
+        search_dirs.append((Path.cwd() / ".hermes" / "plugins", "project"))
+
+    for plugins_root, source in search_dirs:
+        if not plugins_root.is_dir():
+            continue
+        for child in sorted(plugins_root.iterdir()):
+            if not child.is_dir():
+                continue
+            manifest_file = child / "dashboard" / "manifest.json"
+            if not manifest_file.exists():
+                continue
+            try:
+                data = json.loads(manifest_file.read_text(encoding="utf-8"))
+                name = data.get("name", child.name)
+                if name in seen_names:
+                    continue
+                seen_names.add(name)
+                plugins.append({
+                    "name": name,
+                    "label": data.get("label", name),
+                    "description": data.get("description", ""),
+                    "icon": data.get("icon", "Puzzle"),
+                    "version": data.get("version", "0.0.0"),
+                    "tab": data.get("tab", {"path": f"/{name}", "position": "end"}),
+                    "entry": data.get("entry", "dist/index.js"),
+                    "css": data.get("css"),
+                    "has_api": bool(data.get("api")),
+                    "source": source,
+                    "_dir": str(child / "dashboard"),
+                    "_api_file": data.get("api"),
+                })
+            except Exception as exc:
+                _log.warning("Bad dashboard plugin manifest %s: %s", manifest_file, exc)
+                continue
+    return plugins
+
+
+# Cache discovered plugins per-process (refresh on explicit re-scan).
+_dashboard_plugins_cache: Optional[list] = None
+
+
+def _get_dashboard_plugins(force_rescan: bool = False) -> list:
+    global _dashboard_plugins_cache
+    if _dashboard_plugins_cache is None or force_rescan:
+        _dashboard_plugins_cache = _discover_dashboard_plugins()
+    return _dashboard_plugins_cache
+
+
+@app.get("/api/dashboard/plugins")
+async def get_dashboard_plugins():
+    """Return discovered dashboard plugins."""
+    plugins = _get_dashboard_plugins()
+    # Strip internal fields before sending to frontend.
+    return [
+        {k: v for k, v in p.items() if not k.startswith("_")}
+        for p in plugins
+    ]
+
+
+@app.get("/api/dashboard/plugins/rescan")
+async def rescan_dashboard_plugins():
+    """Force re-scan of dashboard plugins."""
+    plugins = _get_dashboard_plugins(force_rescan=True)
+    return {"ok": True, "count": len(plugins)}
+
+
+@app.get("/dashboard-plugins/{plugin_name}/{file_path:path}")
+async def serve_plugin_asset(plugin_name: str, file_path: str):
+    """Serve static assets from a dashboard plugin directory.
+
+    Only serves files from the plugin's ``dashboard/`` subdirectory.
+    Path traversal is blocked by checking ``resolve().is_relative_to()``.
+    """
+    plugins = _get_dashboard_plugins()
+    plugin = next((p for p in plugins if p["name"] == plugin_name), None)
+    if not plugin:
+        raise HTTPException(status_code=404, detail="Plugin not found")
+
+    base = Path(plugin["_dir"])
+    target = (base / file_path).resolve()
+
+    if not target.is_relative_to(base.resolve()):
+        raise HTTPException(status_code=403, detail="Path traversal blocked")
+    if not target.exists() or not target.is_file():
+        raise HTTPException(status_code=404, detail="File not found")
+
+    # Guess content type
+    suffix = target.suffix.lower()
+    content_types = {
+        ".js": "application/javascript",
+        ".mjs": "application/javascript",
+        ".css": "text/css",
+        ".json": "application/json",
+        ".html": "text/html",
+        ".svg": "image/svg+xml",
+        ".png": "image/png",
+        ".jpg": "image/jpeg",
+        ".woff2": "font/woff2",
+        ".woff": "font/woff",
+    }
+    media_type = content_types.get(suffix, "application/octet-stream")
+    return FileResponse(target, media_type=media_type)
+
+
+def _mount_plugin_api_routes():
+    """Import and mount backend API routes from plugins that declare them.
+
+    Each plugin's ``api`` field points to a Python file that must expose
+    a ``router`` (FastAPI APIRouter).  Routes are mounted under
+    ``/api/plugins/<name>/``.
+    """
+    for plugin in _get_dashboard_plugins():
+        api_file_name = plugin.get("_api_file")
+        if not api_file_name:
+            continue
+        api_path = Path(plugin["_dir"]) / api_file_name
+        if not api_path.exists():
+            _log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
+            continue
+        try:
+            spec = importlib.util.spec_from_file_location(
+                f"hermes_dashboard_plugin_{plugin['name']}", api_path,
+            )
+            if spec is None or spec.loader is None:
+                continue
+            mod = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(mod)
+            router = getattr(mod, "router", None)
+            if router is None:
+                _log.warning("Plugin %s api file has no 'router' attribute", plugin["name"])
+                continue
+            app.include_router(router, prefix=f"/api/plugins/{plugin['name']}")
+            _log.info("Mounted plugin API routes: /api/plugins/%s/", plugin["name"])
+        except Exception as exc:
+            _log.warning("Failed to load plugin %s API routes: %s", plugin["name"], exc)
+
+
+# Mount plugin API routes before the SPA catch-all.
+_mount_plugin_api_routes()
+
 mount_spa(app)


--- a/optional-skills/autonomous-ai-agents/honcho/SKILL.md
+++ b/optional-skills/autonomous-ai-agents/honcho/SKILL.md
@ -1,12 +1,12 @@
 ---
 name: honcho
-description: Configure and use Honcho memory with Hermes -- cross-session user modeling, multi-profile peer isolation, observation config, and dialectic reasoning. Use when setting up Honcho, troubleshooting memory, managing profiles with Honcho peers, or tuning observation and recall settings.
-version: 1.0.0
+description: Configure and use Honcho memory with Hermes -- cross-session user modeling, multi-profile peer isolation, observation config, dialectic reasoning, session summaries, and context budget enforcement. Use when setting up Honcho, troubleshooting memory, managing profiles with Honcho peers, or tuning observation, recall, and dialectic settings.
+version: 2.0.0
 author: Hermes Agent
 license: MIT
 metadata:
  hermes:
-    tags: [Honcho, Memory, Profiles, Observation, Dialectic, User-Modeling]
+    tags: [Honcho, Memory, Profiles, Observation, Dialectic, User-Modeling, Session-Summary]
    homepage: https://docs.honcho.dev
    related_skills: [hermes-agent]
 prerequisites:
@ -22,8 +22,9 @@ Honcho provides AI-native cross-session user modeling. It learns who the user is
 - Setting up Honcho (cloud or self-hosted)
 - Troubleshooting memory not working / peers not syncing
 - Creating multi-profile setups where each agent has its own Honcho peer
- Tuning observation, recall, or write frequency settings
- Understanding what the 4 Honcho tools do and when to use them
+- Tuning observation, recall, dialectic depth, or write frequency settings
+- Understanding what the 5 Honcho tools do and when to use them
+- Configuring context budgets and session summary injection

 ## Setup

@ -51,6 +52,27 @@ hermes honcho status    # shows resolved config, connection test, peer info

 ## Architecture

+### Base Context Injection
+
+When Honcho injects context into the system prompt (in `hybrid` or `context` recall modes), it assembles the base context block in this order:
+
+1. **Session summary** -- a short digest of the current session so far (placed first so the model has immediate conversational continuity)
+2. **User representation** -- Honcho's accumulated model of the user (preferences, facts, patterns)
+3. **AI peer card** -- the identity card for this Hermes profile's AI peer
+
+The session summary is generated automatically by Honcho at the start of each turn (when a prior session exists). It gives the model a warm start without replaying full history.
+
+### Cold / Warm Prompt Selection
+
+Honcho automatically selects between two prompt strategies:
+
+| Condition | Strategy | What happens |
+|-----------|----------|--------------|
+| No prior session or empty representation | **Cold start** | Lightweight intro prompt; skips summary injection; encourages the model to learn about the user |
+| Existing representation and/or session history | **Warm start** | Full base context injection (summary → representation → card); richer system prompt |
+
+You do not need to configure this -- it is automatic based on session state.
+
 ### Peers

 Honcho models conversations as interactions between **peers**. Hermes creates two peers per session:
@ -112,6 +134,63 @@ How the agent accesses Honcho memory:
 | `context` | Yes | No (hidden) | Minimal token cost, no tool calls |
 | `tools` | No | Yes | Agent controls all memory access explicitly |

+## Three Orthogonal Knobs
+
+Honcho's dialectic behavior is controlled by three independent dimensions. Each can be tuned without affecting the others:
+
+### Cadence (when)
+
+Controls **how often** dialectic and context calls happen.
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `contextCadence` | `1` | Min turns between context API calls |
+| `dialecticCadence` | `3` | Min turns between dialectic API calls |
+| `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` for base context injection |
+
+Higher cadence values reduce API calls and cost. `dialecticCadence: 3` (default) means the dialectic engine fires at most every 3rd turn.
+
+### Depth (how many)
+
+Controls **how many rounds** of dialectic reasoning Honcho performs per query.
+
+| Key | Default | Range | Description |
+|-----|---------|-------|-------------|
+| `dialecticDepth` | `1` | 1-3 | Number of dialectic reasoning rounds per query |
+| `dialecticDepthLevels` | -- | array | Optional per-depth-round level overrides (see below) |
+
+`dialecticDepth: 2` means Honcho runs two rounds of dialectic synthesis. The first round produces an initial answer; the second refines it.
+
+`dialecticDepthLevels` lets you set the reasoning level for each round independently:
+
+```json
+{
+  "dialecticDepth": 3,
+  "dialecticDepthLevels": ["low", "medium", "high"]
+}
+```
+
+If `dialecticDepthLevels` is omitted, rounds use **proportional levels** derived from `dialecticReasoningLevel` (the base):
+
+| Depth | Pass levels |
+|-------|-------------|
+| 1 | [base] |
+| 2 | [minimal, base] |
+| 3 | [minimal, base, low] |
+
+This keeps earlier passes cheap while using full depth on the final synthesis.
+
+### Level (how hard)
+
+Controls the **intensity** of each dialectic reasoning round.
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `dialecticReasoningLevel` | `low` | `minimal`, `low`, `medium`, `high`, `max` |
+| `dialecticDynamic` | `true` | When `true`, the model can pass `reasoning_level` to `honcho_reasoning` to override the default per-call. `false` = always use `dialecticReasoningLevel`, model overrides ignored |
+
+Higher levels produce richer synthesis but cost more tokens on Honcho's backend.
+
 ## Multi-Profile Setup

 Each Hermes profile gets its own Honcho AI peer while sharing the same workspace (user context). This means:
@ -149,6 +228,7 @@ Override any setting in the host block:
    "hermes.coder": {
      "aiPeer": "coder",
      "recallMode": "tools",
+      "dialecticDepth": 2,
      "observation": {
        "user": { "observeMe": true, "observeOthers": false },
        "ai": { "observeMe": true, "observeOthers": true }
@ -160,19 +240,97 @@ Override any setting in the host block:

 ## Tools

-The agent has 4 Honcho tools (hidden in `context` recall mode):
+The agent has 5 bidirectional Honcho tools (hidden in `context` recall mode):
+
+| Tool | LLM call? | Cost | Use when |
+|------|-----------|------|----------|
+| `honcho_profile` | No | minimal | Quick factual snapshot at conversation start or for fast name/role/pref lookups |
+| `honcho_search` | No | low | Fetch specific past facts to reason over yourself — raw excerpts, no synthesis |
+| `honcho_context` | No | low | Full session context snapshot: summary, representation, card, recent messages |
+| `honcho_reasoning` | Yes | medium–high | Natural language question synthesized by Honcho's dialectic engine |
+| `honcho_conclude` | No | minimal | Write or delete a persistent fact; pass `peer: "ai"` for AI self-knowledge |

 ### `honcho_profile`
-Quick factual snapshot of the user -- name, role, preferences, patterns. No LLM call, minimal cost. Use at conversation start or for fast lookups.
+Read or update a peer card — curated key facts (name, role, preferences, communication style). Pass `card: [...]` to update; omit to read. No LLM call.

 ### `honcho_search`
-Semantic search over stored context. Returns raw excerpts ranked by relevance, no LLM synthesis. Default 800 tokens, max 2000. Use when you want specific past facts to reason over yourself.
+Semantic search over stored context for a specific peer. Returns raw excerpts ranked by relevance, no synthesis. Default 800 tokens, max 2000. Good when you need specific past facts to reason over yourself rather than a synthesized answer.

 ### `honcho_context`
-Natural language question answered by Honcho's dialectic reasoning (LLM call on Honcho's backend). Higher cost, higher quality. Can query about user (default) or the AI peer.
+Full session context snapshot from Honcho — session summary, peer representation, peer card, and recent messages. No LLM call. Use when you want to see everything Honcho knows about the current session and peer in one shot.
+
+### `honcho_reasoning`
+Natural language question answered by Honcho's dialectic reasoning engine (LLM call on Honcho's backend). Higher cost, higher quality. Pass `reasoning_level` to control depth: `minimal` (fast/cheap) → `low` → `medium` → `high` → `max` (thorough). Omit to use the configured default (`low`). Use for synthesized understanding of the user's patterns, goals, or current state.

 ### `honcho_conclude`
-Write a persistent fact about the user. Conclusions build the user's profile over time. Use when the user states a preference, corrects you, or shares something to remember.
+Write or delete a persistent conclusion about a peer. Pass `conclusion: "..."` to create. Pass `delete_id: "..."` to remove a conclusion (for PII removal — Honcho self-heals incorrect conclusions over time, so deletion is only needed for PII). You MUST pass exactly one of the two.
+
+### Bidirectional peer targeting
+
+All 5 tools accept an optional `peer` parameter:
+- `peer: "user"` (default) — operates on the user peer
+- `peer: "ai"` — operates on this profile's AI peer
+- `peer: "<explicit-id>"` — any peer ID in the workspace
+
+Examples:
+```
+honcho_profile                        # read user's card
+honcho_profile peer="ai"              # read AI peer's card
+honcho_reasoning query="What does this user care about most?"
+honcho_reasoning query="What are my interaction patterns?" peer="ai" reasoning_level="medium"
+honcho_conclude conclusion="Prefers terse answers"
+honcho_conclude conclusion="I tend to over-explain code" peer="ai"
+honcho_conclude delete_id="abc123"    # PII removal
+```
+
+## Agent Usage Patterns
+
+Guidelines for Hermes when Honcho memory is active.
+
+### On conversation start
+
+```
+1. honcho_profile                  → fast warmup, no LLM cost
+2. If context looks thin → honcho_context  (full snapshot, still no LLM)
+3. If deep synthesis needed → honcho_reasoning  (LLM call, use sparingly)
+```
+
+Do NOT call `honcho_reasoning` on every turn. Auto-injection already handles ongoing context refresh. Use the reasoning tool only when you genuinely need synthesized insight the base context doesn't provide.
+
+### When the user shares something to remember
+
+```
+honcho_conclude conclusion="<specific, actionable fact>"
+```
+
+Good conclusions: "Prefers code examples over prose explanations", "Working on a Rust async project through April 2026"
+Bad conclusions: "User said something about Rust" (too vague), "User seems technical" (already in representation)
+
+### When the user asks about past context / you need to recall specifics
+
+```
+honcho_search query="<topic>"       → fast, no LLM, good for specific facts
+honcho_context                       → full snapshot with summary + messages
+honcho_reasoning query="<question>"  → synthesized answer, use when search isn't enough
+```
+
+### When to use `peer: "ai"`
+
+Use AI peer targeting to build and query the agent's own self-knowledge:
+- `honcho_conclude conclusion="I tend to be verbose when explaining architecture" peer="ai"` — self-correction
+- `honcho_reasoning query="How do I typically handle ambiguous requests?" peer="ai"` — self-audit
+- `honcho_profile peer="ai"` — review own identity card
+
+### When NOT to call tools
+
+In `hybrid` and `context` modes, base context (user representation + card + session summary) is auto-injected before every turn. Do not re-fetch what was already injected. Call tools only when:
+- You need something the injected context doesn't have
+- The user explicitly asks you to recall or check memory
+- You're writing a conclusion about something new
+
+### Cadence awareness
+
+`honcho_reasoning` on the tool side shares the same cost as auto-injection dialectic. After an explicit tool call, the auto-injection cadence resets — avoiding double-charging the same turn.

 ## Config Reference

@ -191,18 +349,39 @@ Config file: `$HERMES_HOME/honcho.json` (profile-local) or `~/.honcho/config.jso
 | `observation` | all on | Per-peer `observeMe`/`observeOthers` booleans |
 | `writeFrequency` | `async` | `async`, `turn`, `session`, or integer N |
 | `sessionStrategy` | `per-directory` | `per-directory`, `per-repo`, `per-session`, `global` |
-| `dialecticReasoningLevel` | `low` | `minimal`, `low`, `medium`, `high`, `max` |
-| `dialecticDynamic` | `true` | Auto-bump reasoning by query length. `false` = fixed level |
 | `messageMaxChars` | `25000` | Max chars per message (chunked if exceeded) |
-| `dialecticMaxInputChars` | `10000` | Max chars for dialectic query input |

-### Cost-awareness (advanced, root config only)
+### Dialectic settings

 | Key | Default | Description |
 |-----|---------|-------------|
+| `dialecticReasoningLevel` | `low` | `minimal`, `low`, `medium`, `high`, `max` |
+| `dialecticDynamic` | `true` | Auto-bump reasoning by query complexity. `false` = fixed level |
+| `dialecticDepth` | `1` | Number of dialectic rounds per query (1-3) |
+| `dialecticDepthLevels` | -- | Optional array of per-round levels, e.g. `["low", "high"]` |
+| `dialecticMaxInputChars` | `10000` | Max chars for dialectic query input |
+
+### Context budget and injection
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `contextTokens` | uncapped | Max tokens for the combined base context injection (summary + representation + card). Opt-in cap — omit to leave uncapped, set to an integer to bound injection size. |
 | `injectionFrequency` | `every-turn` | `every-turn` or `first-turn` |
 | `contextCadence` | `1` | Min turns between context API calls |
-| `dialecticCadence` | `1` | Min turns between dialectic API calls |
+| `dialecticCadence` | `3` | Min turns between dialectic LLM calls |
+
+The `contextTokens` budget is enforced at injection time. If the session summary + representation + card exceed the budget, Honcho trims the summary first, then the representation, preserving the card. This prevents context blowup in long sessions.
+
+### Memory-context sanitization
+
+Honcho sanitizes the `memory-context` block before injection to prevent prompt injection and malformed content:
+
+- Strips XML/HTML tags from user-authored conclusions
+- Normalizes whitespace and control characters
+- Truncates individual conclusions that exceed `messageMaxChars`
+- Escapes delimiter sequences that could break the system prompt structure
+
+This fix addresses edge cases where raw user conclusions containing markup or special characters could corrupt the injected context block.

 ## Troubleshooting

@ -221,6 +400,12 @@ Observation config is synced from the server on each session init. Start a new s
 ### Messages truncated
 Messages over `messageMaxChars` (default 25k) are automatically chunked with `[continued]` markers. If you're hitting this often, check if tool results or skill content is inflating message size.

+### Context injection too large
+If you see warnings about context budget exceeded, lower `contextTokens` or reduce `dialecticDepth`. The session summary is trimmed first when the budget is tight.
+
+### Session summary missing
+Session summary requires at least one prior turn in the current Honcho session. On cold start (new session, no history), the summary is omitted and Honcho uses the cold-start prompt strategy instead.
+
 ## CLI Commands

 | Command | Description |
--- a/plugins/example-dashboard/dashboard/dist/index.js
+++ b/plugins/example-dashboard/dashboard/dist/index.js
@ -0,0 +1,94 @@
+/**
+ * Example Dashboard Plugin
+ *
+ * Demonstrates how to build a dashboard plugin using the Hermes Plugin SDK.
+ * No build step needed — this is a plain IIFE that uses globals from the SDK.
+ */
+(function () {
+  "use strict";
+
+  const SDK = window.__HERMES_PLUGIN_SDK__;
+  const { React } = SDK;
+  const { Card, CardHeader, CardTitle, CardContent, Badge, Button } = SDK.components;
+  const { useState, useEffect } = SDK.hooks;
+  const { cn } = SDK.utils;
+
+  function ExamplePage() {
+    const [greeting, setGreeting] = useState(null);
+    const [loading, setLoading] = useState(false);
+
+    function fetchGreeting() {
+      setLoading(true);
+      SDK.fetchJSON("/api/plugins/example/hello")
+        .then(function (data) { setGreeting(data.message); })
+        .catch(function () { setGreeting("(backend not available)"); })
+        .finally(function () { setLoading(false); });
+    }
+
+    return React.createElement("div", { className: "flex flex-col gap-6" },
+      // Header card
+      React.createElement(Card, null,
+        React.createElement(CardHeader, null,
+          React.createElement("div", { className: "flex items-center gap-3" },
+            React.createElement(CardTitle, { className: "text-lg" }, "Example Plugin"),
+            React.createElement(Badge, { variant: "outline" }, "v1.0.0"),
+          ),
+        ),
+        React.createElement(CardContent, { className: "flex flex-col gap-4" },
+          React.createElement("p", { className: "text-sm text-muted-foreground" },
+            "This is an example dashboard plugin. It demonstrates using the Plugin SDK to build ",
+            "custom tabs with React components, connect to backend API routes, and integrate with ",
+            "the existing Hermes UI system.",
+          ),
+          React.createElement("div", { className: "flex items-center gap-3" },
+            React.createElement(Button, {
+              onClick: fetchGreeting,
+              disabled: loading,
+              className: cn(
+                "inline-flex items-center gap-2 border border-border bg-background/40 px-4 py-2",
+                "text-sm font-courier transition-colors hover:bg-foreground/10 cursor-pointer",
+              ),
+            }, loading ? "Loading..." : "Call Backend API"),
+            greeting && React.createElement("span", {
+              className: "text-sm font-courier text-muted-foreground",
+            }, greeting),
+          ),
+        ),
+      ),
+
+      // Info card about the SDK
+      React.createElement(Card, null,
+        React.createElement(CardHeader, null,
+          React.createElement(CardTitle, { className: "text-base" }, "Plugin SDK Reference"),
+        ),
+        React.createElement(CardContent, null,
+          React.createElement("div", { className: "grid gap-3 text-sm" },
+            React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" },
+              React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.React"),
+              React.createElement("span", { className: "text-muted-foreground text-xs" }, "React instance — use instead of importing react"),
+            ),
+            React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" },
+              React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.hooks"),
+              React.createElement("span", { className: "text-muted-foreground text-xs" }, "useState, useEffect, useCallback, useMemo, useRef, useContext, createContext"),
+            ),
+            React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" },
+              React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.components"),
+              React.createElement("span", { className: "text-muted-foreground text-xs" }, "Card, Badge, Button, Input, Label, Select, Separator, Tabs, etc."),
+            ),
+            React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" },
+              React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.api"),
+              React.createElement("span", { className: "text-muted-foreground text-xs" }, "Hermes API client — getStatus(), getSessions(), etc."),
+            ),
+            React.createElement("div", { className: "flex flex-col gap-1 border border-border p-3" },
+              React.createElement("span", { className: "font-medium" }, "window.__HERMES_PLUGIN_SDK__.utils"),
+              React.createElement("span", { className: "text-muted-foreground text-xs" }, "cn(), timeAgo(), isoTimeAgo()"),
+            ),
+          ),
+        ),
+      ),
+    );
+  }
+
+  // Register this plugin — the dashboard picks it up automatically.
+  window.__HERMES_PLUGINS__.register("example", ExamplePage);
+})();
--- a/plugins/example-dashboard/dashboard/manifest.json
+++ b/plugins/example-dashboard/dashboard/manifest.json
@ -0,0 +1,13 @@
+{
+  "name": "example",
+  "label": "Example",
+  "description": "Example dashboard plugin — demonstrates the plugin SDK",
+  "icon": "Sparkles",
+  "version": "1.0.0",
+  "tab": {
+    "path": "/example",
+    "position": "after:skills"
+  },
+  "entry": "dist/index.js",
+  "api": "plugin_api.py"
+}
--- a/plugins/example-dashboard/dashboard/plugin_api.py
+++ b/plugins/example-dashboard/dashboard/plugin_api.py
@ -0,0 +1,14 @@
+"""Example dashboard plugin — backend API routes.
+
+Mounted at /api/plugins/example/ by the dashboard plugin system.
+"""
+
+from fastapi import APIRouter
+
+router = APIRouter()
+
+
+@router.get("/hello")
+async def hello():
+    """Simple greeting endpoint to demonstrate plugin API routes."""
+    return {"message": "Hello from the example plugin!", "plugin": "example", "version": "1.0.0"}
--- a/plugins/memory/honcho/README.md
+++ b/plugins/memory/honcho/README.md
@ -1,6 +1,6 @@
 # Honcho Memory Provider

-AI-native cross-session user modeling with dialectic Q&A, semantic search, peer cards, and persistent conclusions.
+AI-native cross-session user modeling with multi-pass dialectic reasoning, session summaries, bidirectional peer tools, and persistent conclusions.

 > **Honcho docs:** <https://docs.honcho.dev/v3/guides/integrations/hermes>

@ -19,9 +19,86 @@ hermes memory setup    # generic picker, also works
 Or manually:
 ```bash
 hermes config set memory.provider honcho
-echo "HONCHO_API_KEY=your-key" >> ~/.hermes/.env
+echo "HONCHO_API_KEY=***" >> ~/.hermes/.env
 ```

+## Architecture Overview
+
+### Two-Layer Context Injection
+
+Context is injected into the **user message** at API-call time (not the system prompt) to preserve prompt caching. Only a static mode header goes in the system prompt. The injected block is wrapped in `<memory-context>` fences with a system note clarifying it's background data, not new user input.
+
+Two independent layers, each on its own cadence:
+
+**Layer 1 — Base context** (refreshed every `contextCadence` turns):
+1. **SESSION SUMMARY** — from `session.context(summary=True)`, placed first
+2. **User Representation** — Honcho's evolving model of the user
+3. **User Peer Card** — key facts snapshot
+4. **AI Self-Representation** — Honcho's model of the AI peer
+5. **AI Identity Card** — AI peer facts
+
+**Layer 2 — Dialectic supplement** (fired every `dialecticCadence` turns):
+Multi-pass `.chat()` reasoning about the user, appended after base context.
+
+Both layers are joined, then truncated to fit `contextTokens` budget via `_truncate_to_budget` (tokens × 4 chars, word-boundary safe).
+
+### Cold Start vs Warm Session Prompts
+
+Dialectic pass 0 automatically selects its prompt based on session state:
+
+- **Cold** (no base context cached): "Who is this person? What are their preferences, goals, and working style? Focus on facts that would help an AI assistant be immediately useful."
+- **Warm** (base context exists): "Given what's been discussed in this session so far, what context about this user is most relevant to the current conversation? Prioritize active context over biographical facts."
+
+Not configurable — determined automatically.
+
+### Dialectic Depth (Multi-Pass Reasoning)
+
+`dialecticDepth` (1–3, clamped) controls how many `.chat()` calls fire per dialectic cycle:
+
+| Depth | Passes | Behavior |
+|-------|--------|----------|
+| 1 | single `.chat()` | Base query only (cold or warm prompt) |
+| 2 | audit + synthesis | Pass 0 result is self-audited; pass 1 does targeted synthesis. Conditional bail-out if pass 0 returns strong signal (>300 chars or structured with bullets/sections >100 chars) |
+| 3 | audit + synthesis + reconciliation | Pass 2 reconciles contradictions across prior passes into a final synthesis |
+
+### Proportional Reasoning Levels
+
+When `dialecticDepthLevels` is not set, each pass uses a proportional level relative to `dialecticReasoningLevel` (the "base"):
+
+| Depth | Pass levels |
+|-------|-------------|
+| 1 | [base] |
+| 2 | [minimal, base] |
+| 3 | [minimal, base, low] |
+
+Override with `dialecticDepthLevels`: an explicit array of reasoning level strings per pass.
+
+### Three Orthogonal Dialectic Knobs
+
+| Knob | Controls | Type |
+|------|----------|------|
+| `dialecticCadence` | How often — minimum turns between dialectic firings | int |
+| `dialecticDepth` | How many — passes per firing (1–3) | int |
+| `dialecticReasoningLevel` | How hard — reasoning ceiling per `.chat()` call | string |
+
+### Input Sanitization
+
+`run_conversation` strips leaked `<memory-context>` blocks from user input before processing. When `saveMessages` persists a turn that included injected context, the block can reappear in subsequent turns via message history. The sanitizer removes `<memory-context>` blocks plus associated system notes.
+
+## Tools
+
+Five bidirectional tools. All accept an optional `peer` parameter (`"user"` or `"ai"`, default `"user"`).
+
+| Tool | LLM call? | Description |
+|------|-----------|-------------|
+| `honcho_profile` | No | Peer card — key facts snapshot |
+| `honcho_search` | No | Semantic search over stored context (800 tok default, 2000 max) |
+| `honcho_context` | No | Full session context: summary, representation, card, messages |
+| `honcho_reasoning` | Yes | LLM-synthesized answer via dialectic `.chat()` |
+| `honcho_conclude` | No | Write a persistent fact/conclusion about the user |
+
+Tool visibility depends on `recallMode`: hidden in `context` mode, always present in `tools` and `hybrid`.
+
 ## Config Resolution

 Config is read from the first file that exists:
@ -34,42 +111,128 @@ Config is read from the first file that exists:

 Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.<profile>`.

-## Tools
-
-| Tool | LLM call? | Description |
-|------|-----------|-------------|
-| `honcho_profile` | No | User's peer card -- key facts snapshot |
-| `honcho_search` | No | Semantic search over stored context (800 tok default, 2000 max) |
-| `honcho_context` | Yes | LLM-synthesized answer via dialectic reasoning |
-| `honcho_conclude` | No | Write a persistent fact about the user |
-
-Tool availability depends on `recallMode`: hidden in `context` mode, always present in `tools` and `hybrid`.
+For every key, resolution order is: **host block > root > env var > default**.

 ## Full Configuration Reference

 ### Identity & Connection

-| Key | Type | Default | Scope | Description |
-|-----|------|---------|-------|-------------|
-| `apiKey` | string | -- | root / host | API key. Falls back to `HONCHO_API_KEY` env var |
-| `baseUrl` | string | -- | root | Base URL for self-hosted Honcho. Local URLs (`localhost`, `127.0.0.1`, `::1`) auto-skip API key auth |
-| `environment` | string | `"production"` | root / host | SDK environment mapping |
-| `enabled` | bool | auto | root / host | Master toggle. Auto-enables when `apiKey` or `baseUrl` present |
-| `workspace` | string | host key | root / host | Honcho workspace ID |
-| `peerName` | string | -- | root / host | User peer identity |
-| `aiPeer` | string | host key | root / host | AI peer identity |
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `apiKey` | string | — | API key. Falls back to `HONCHO_API_KEY` env var |
+| `baseUrl` | string | — | Base URL for self-hosted Honcho. Local URLs auto-skip API key auth |
+| `environment` | string | `"production"` | SDK environment mapping |
+| `enabled` | bool | auto | Master toggle. Auto-enables when `apiKey` or `baseUrl` present |
+| `workspace` | string | host key | Honcho workspace ID. Shared environment — all profiles in the same workspace can see the same user identity and related memories |
+| `peerName` | string | — | User peer identity |
+| `aiPeer` | string | host key | AI peer identity |

 ### Memory & Recall

-| Key | Type | Default | Scope | Description |
-|-----|------|---------|-------|-------------|
-| `recallMode` | string | `"hybrid"` | root / host | `"hybrid"` (auto-inject + tools), `"context"` (auto-inject only, tools hidden), `"tools"` (tools only, no injection). Legacy `"auto"` normalizes to `"hybrid"` |
-| `observationMode` | string | `"directional"` | root / host | Shorthand preset: `"directional"` (all on) or `"unified"` (shared pool). Use `observation` object for granular control |
-| `observation` | object | -- | root / host | Per-peer observation config (see below) |
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `recallMode` | string | `"hybrid"` | `"hybrid"` (auto-inject + tools), `"context"` (auto-inject only, tools hidden), `"tools"` (tools only, no injection). Legacy `"auto"` → `"hybrid"` |
+| `observationMode` | string | `"directional"` | Preset: `"directional"` (all on) or `"unified"` (shared pool). Use `observation` object for granular control |
+| `observation` | object | — | Per-peer observation config (see Observation section) |

-#### Observation (granular)
+### Write Behavior

-Maps 1:1 to Honcho's per-peer `SessionPeerConfig`. Set at root or per host block -- each profile can have different observation settings. When present, overrides `observationMode` preset.
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `writeFrequency` | string/int | `"async"` | `"async"` (background), `"turn"` (sync per turn), `"session"` (batch on end), or integer N (every N turns) |
+| `saveMessages` | bool | `true` | Persist messages to Honcho API |
+
+### Session Resolution
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `sessionStrategy` | string | `"per-directory"` | `"per-directory"`, `"per-session"`, `"per-repo"` (git root), `"global"` |
+| `sessionPeerPrefix` | bool | `false` | Prepend peer name to session keys |
+| `sessions` | object | `{}` | Manual directory-to-session-name mappings |
+
+#### Session Name Resolution
+
+The Honcho session name determines which conversation bucket memory lands in. Resolution follows a priority chain — first match wins:
+
+| Priority | Source | Example session name |
+|----------|--------|---------------------|
+| 1 | Manual map (`sessions` config) | `"myproject-main"` |
+| 2 | `/title` command (mid-session rename) | `"refactor-auth"` |
+| 3 | Gateway session key (Telegram, Discord, etc.) | `"agent-main-telegram-dm-8439114563"` |
+| 4 | `per-session` strategy | Hermes session ID (`20260415_a3f2b1`) |
+| 5 | `per-repo` strategy | Git root directory name (`hermes-agent`) |
+| 6 | `per-directory` strategy | Current directory basename (`src`) |
+| 7 | `global` strategy | Workspace name (`hermes`) |
+
+Gateway platforms always resolve via priority 3 (per-chat isolation) regardless of `sessionStrategy`. The strategy setting only affects CLI sessions.
+
+If `sessionPeerPrefix` is `true`, the peer name is prepended: `eri-hermes-agent`.
+
+#### What each strategy produces
+
+- **`per-directory`** — basename of `$PWD`. Opening hermes in `~/code/myapp` and `~/code/other` gives two separate sessions. Same directory = same session across runs.
+- **`per-repo`** — git root directory name. All subdirectories within a repo share one session. Falls back to `per-directory` if not inside a git repo.
+- **`per-session`** — Hermes session ID (timestamp + hex). Every `hermes` invocation starts a fresh Honcho session. Falls back to `per-directory` if no session ID is available.
+- **`global`** — workspace name. One session for everything. Memory accumulates across all directories and runs.
+
+### Multi-Profile Pattern
+
+Multiple Hermes profiles can share one workspace while maintaining separate AI identities. Config resolution is **host block > root > env var > default** — host blocks inherit from root, so shared settings only need to be declared once:
+
+```json
+{
+  "apiKey": "***",
+  "workspace": "hermes",
+  "peerName": "yourname",
+  "hosts": {
+    "hermes": {
+      "aiPeer": "hermes",
+      "recallMode": "hybrid",
+      "sessionStrategy": "per-directory"
+    },
+    "hermes.coder": {
+      "aiPeer": "coder",
+      "recallMode": "tools",
+      "sessionStrategy": "per-repo"
+    }
+  }
+}
+```
+
+Both profiles see the same user (`yourname`) in the same shared environment (`hermes`), but each AI peer builds its own observations, conclusions, and behavior patterns. The coder's memory stays code-oriented; the main agent's stays broad.
+
+Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.<profile>` (e.g. `hermes -p coder` → host key `hermes.coder`).
+
+### Dialectic & Reasoning
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `dialecticDepth` | int | `1` | Passes per dialectic cycle (1–3, clamped). 1=single query, 2=audit+synthesis, 3=audit+synthesis+reconciliation |
+| `dialecticDepthLevels` | array | — | Optional array of reasoning level strings per pass. Overrides proportional defaults. Example: `["minimal", "low", "medium"]` |
+| `dialecticReasoningLevel` | string | `"low"` | Base reasoning level for `.chat()`: `"minimal"`, `"low"`, `"medium"`, `"high"`, `"max"` |
+| `dialecticDynamic` | bool | `true` | When `true`, model can override reasoning level per-call via `honcho_reasoning` tool. When `false`, always uses `dialecticReasoningLevel` |
+| `dialecticMaxChars` | int | `600` | Max chars of dialectic result injected into system prompt |
+| `dialecticMaxInputChars` | int | `10000` | Max chars for dialectic query input to `.chat()`. Honcho cloud limit: 10k |
+
+### Token Budgets
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `contextTokens` | int | SDK default | Token budget for `context()` API calls. Also gates prefetch truncation (tokens × 4 chars) |
+| `messageMaxChars` | int | `25000` | Max chars per message sent via `add_messages()`. Exceeding this triggers chunking with `[continued]` markers. Honcho cloud limit: 25k |
+
+### Cadence (Cost Control)
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| `contextCadence` | int | `1` | Minimum turns between base context refreshes (session summary + representation + card) |
+| `dialecticCadence` | int | `1` | Minimum turns between dialectic `.chat()` firings |
+| `injectionFrequency` | string | `"every-turn"` | `"every-turn"` or `"first-turn"` (inject context on the first user message only, skip from turn 2 onward) |
+| `reasoningLevelCap` | string | — | Hard cap on reasoning level: `"minimal"`, `"low"`, `"medium"`, `"high"` |
+
+### Observation (Granular)
+
+Maps 1:1 to Honcho's per-peer `SessionPeerConfig`. When present, overrides `observationMode` preset.

 ```json
 "observation": {
@ -85,74 +248,16 @@ Maps 1:1 to Honcho's per-peer `SessionPeerConfig`. Set at root or per host block
 | `ai.observeMe` | `true` | AI peer self-observation (Honcho builds AI representation) |
 | `ai.observeOthers` | `true` | AI peer observes user messages (enables cross-peer dialectic) |

-Presets for `observationMode`:
- `"directional"` (default): all four booleans `true`
+Presets:
+- `"directional"` (default): all four `true`
 - `"unified"`: user `observeMe=true`, AI `observeOthers=true`, rest `false`

-Per-profile example -- coder profile observes the user but user doesn't observe coder:
+### Hardcoded Limits

-```json
-"hosts": {
-  "hermes.coder": {
-    "observation": {
-      "user": { "observeMe": true, "observeOthers": false },
-      "ai":   { "observeMe": true, "observeOthers": true }
-    }
-  }
-}
-```
-
-Settings changed in the [Honcho dashboard](https://app.honcho.dev) are synced back on session init.
-
-### Write Behavior
-
-| Key | Type | Default | Scope | Description |
-|-----|------|---------|-------|-------------|
-| `writeFrequency` | string or int | `"async"` | root / host | `"async"` (background thread), `"turn"` (sync per turn), `"session"` (batch on end), or integer N (every N turns) |
-| `saveMessages` | bool | `true` | root / host | Whether to persist messages to Honcho API |
-
-### Session Resolution
-
-| Key | Type | Default | Scope | Description |
-|-----|------|---------|-------|-------------|
-| `sessionStrategy` | string | `"per-directory"` | root / host | `"per-directory"`, `"per-session"` (new each run), `"per-repo"` (git root name), `"global"` (single session) |
-| `sessionPeerPrefix` | bool | `false` | root / host | Prepend peer name to session keys |
-| `sessions` | object | `{}` | root | Manual directory-to-session-name mappings: `{"/path/to/project": "my-session"}` |
-
-### Token Budgets & Dialectic
-
-| Key | Type | Default | Scope | Description |
-|-----|------|---------|-------|-------------|
-| `contextTokens` | int | SDK default | root / host | Token budget for `context()` API calls. Also gates prefetch truncation (tokens x 4 chars) |
-| `dialecticReasoningLevel` | string | `"low"` | root / host | Base reasoning level for `peer.chat()`: `"minimal"`, `"low"`, `"medium"`, `"high"`, `"max"` |
-| `dialecticDynamic` | bool | `true` | root / host | Auto-bump reasoning based on query length: `<120` chars = base level, `120-400` = +1, `>400` = +2 (capped at `"high"`). Set `false` to always use `dialecticReasoningLevel` as-is |
-| `dialecticMaxChars` | int | `600` | root / host | Max chars of dialectic result injected into system prompt |
-| `dialecticMaxInputChars` | int | `10000` | root / host | Max chars for dialectic query input to `peer.chat()`. Honcho cloud limit: 10k |
-| `messageMaxChars` | int | `25000` | root / host | Max chars per message sent via `add_messages()`. Messages exceeding this are chunked with `[continued]` markers. Honcho cloud limit: 25k |
-
-### Cost Awareness (Advanced)
-
-These are read from the root config object, not the host block. Must be set manually in `honcho.json`.
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| `injectionFrequency` | string | `"every-turn"` | `"every-turn"` or `"first-turn"` (inject context only on turn 0) |
-| `contextCadence` | int | `1` | Minimum turns between `context()` API calls |
-| `dialecticCadence` | int | `1` | Minimum turns between `peer.chat()` API calls |
-| `reasoningLevelCap` | string | -- | Hard cap on auto-bumped reasoning: `"minimal"`, `"low"`, `"mid"`, `"high"` |
-
-### Hardcoded Limits (Not Configurable)
-
-| Limit | Value | Location |
-|-------|-------|----------|
-| Search tool max tokens | 2000 (hard cap), 800 (default) | `__init__.py` handle_tool_call |
-| Peer card fetch tokens | 200 | `session.py` get_peer_card |
-
-## Config Precedence
-
-For every key, resolution order is: **host block > root > env var > default**.
-
-Host key derivation: `HERMES_HONCHO_HOST` env > active profile (`hermes.<profile>`) > `"hermes"`.
+| Limit | Value |
+|-------|-------|
+| Search tool max tokens | 2000 (hard cap), 800 (default) |
+| Peer card fetch tokens | 200 |

 ## Environment Variables

@ -182,15 +287,16 @@ Host key derivation: `HERMES_HONCHO_HOST` env > active profile (`hermes.<profile

 ```json
 {
-  "apiKey": "your-key",
+  "apiKey": "***",
  "workspace": "hermes",
-  "peerName": "eri",
+  "peerName": "username",
+  "contextCadence": 2,
+  "dialecticCadence": 3,
+  "dialecticDepth": 2,
  "hosts": {
    "hermes": {
      "enabled": true,
      "aiPeer": "hermes",
-      "workspace": "hermes",
-      "peerName": "eri",
      "recallMode": "hybrid",
      "observation": {
        "user": { "observeMe": true, "observeOthers": true },
@ -199,14 +305,16 @@ Host key derivation: `HERMES_HONCHO_HOST` env > active profile (`hermes.<profile
      "writeFrequency": "async",
      "sessionStrategy": "per-directory",
      "dialecticReasoningLevel": "low",
+      "dialecticDepth": 2,
      "dialecticMaxChars": 600,
      "saveMessages": true
    },
    "hermes.coder": {
      "enabled": true,
      "aiPeer": "coder",
-      "workspace": "hermes",
-      "peerName": "eri",
+      "sessionStrategy": "per-repo",
+      "dialecticDepth": 1,
+      "dialecticDepthLevels": ["low"],
      "observation": {
        "user": { "observeMe": true, "observeOthers": false },
        "ai": { "observeMe": true, "observeOthers": true }
--- a/plugins/memory/honcho/init.py
+++ b/plugins/memory/honcho/init.py
@ -17,6 +17,7 @@ from __future__ import annotations

 import json
 import logging
+import re
 import threading
 from typing import Any, Dict, List, Optional

@ -33,20 +34,33 @@ logger = logging.getLogger(__name__)
 PROFILE_SCHEMA = {
    "name": "honcho_profile",
    "description": (
-        "Retrieve the user's peer card from Honcho — a curated list of key facts "
-        "about them (name, role, preferences, communication style, patterns). "
-        "Fast, no LLM reasoning, minimal cost. "
-        "Use this at conversation start or when you need a quick factual snapshot."
+        "Retrieve or update a peer card from Honcho — a curated list of key facts "
+        "about that peer (name, role, preferences, communication style, patterns). "
+        "Pass `card` to update; omit `card` to read."
    ),
-    "parameters": {"type": "object", "properties": {}, "required": []},
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "peer": {
+                "type": "string",
+                "description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
+            },
+            "card": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "New peer card as a list of fact strings. Omit to read the current card.",
+            },
+        },
+        "required": [],
+    },
 }

 SEARCH_SCHEMA = {
    "name": "honcho_search",
    "description": (
-        "Semantic search over Honcho's stored context about the user. "
+        "Semantic search over Honcho's stored context about a peer. "
        "Returns raw excerpts ranked by relevance — no LLM synthesis. "
-        "Cheaper and faster than honcho_context. "
+        "Cheaper and faster than honcho_reasoning. "
        "Good when you want to find specific past facts and reason over them yourself."
    ),
    "parameters": {
@ -60,17 +74,23 @@ SEARCH_SCHEMA = {
                "type": "integer",
                "description": "Token budget for returned context (default 800, max 2000).",
            },
+            "peer": {
+                "type": "string",
+                "description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
+            },
        },
        "required": ["query"],
    },
 }

-CONTEXT_SCHEMA = {
-    "name": "honcho_context",
+REASONING_SCHEMA = {
+    "name": "honcho_reasoning",
    "description": (
        "Ask Honcho a natural language question and get a synthesized answer. "
        "Uses Honcho's LLM (dialectic reasoning) — higher cost than honcho_profile or honcho_search. "
-        "Can query about any peer: the user (default) or the AI assistant."
+        "Can query about any peer via alias or explicit peer ID. "
+        "Pass reasoning_level to control depth: minimal (fast/cheap), low (default), "
+        "medium, high, max (deep/expensive). Omit for configured default."
    ),
    "parameters": {
        "type": "object",
@ -79,37 +99,84 @@ CONTEXT_SCHEMA = {
                "type": "string",
                "description": "A natural language question.",
            },
+            "reasoning_level": {
+                "type": "string",
+                "description": (
+                    "Override the default reasoning depth. "
+                    "Omit to use the configured default (typically low). "
+                    "Guide:\n"
+                    "- minimal: quick factual lookups (name, role, simple preference)\n"
+                    "- low: straightforward questions with clear answers\n"
+                    "- medium: multi-aspect questions requiring synthesis across observations\n"
+                    "- high: complex behavioral patterns, contradictions, deep analysis\n"
+                    "- max: thorough audit-level analysis, leave no stone unturned"
+                ),
+                "enum": ["minimal", "low", "medium", "high", "max"],
+            },
            "peer": {
                "type": "string",
-                "description": "Which peer to query about: 'user' (default) or 'ai'.",
+                "description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
            },
        },
        "required": ["query"],
    },
 }

+CONTEXT_SCHEMA = {
+    "name": "honcho_context",
+    "description": (
+        "Retrieve full session context from Honcho — summary, peer representation, "
+        "peer card, and recent messages. No LLM synthesis. "
+        "Cheaper than honcho_reasoning. Use this to see what Honcho knows about "
+        "the current conversation and the specified peer."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "Optional focus query to filter context. Omit for full session context snapshot.",
+            },
+            "peer": {
+                "type": "string",
+                "description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
+            },
+        },
+        "required": [],
+    },
+}
+
 CONCLUDE_SCHEMA = {
    "name": "honcho_conclude",
    "description": (
-        "Write a conclusion about the user back to Honcho's memory. "
-        "Conclusions are persistent facts that build the user's profile. "
-        "Use when the user states a preference, corrects you, or shares "
-        "something to remember across sessions."
+        "Write or delete a conclusion about a peer in Honcho's memory. "
+        "Conclusions are persistent facts that build a peer's profile. "
+        "You MUST pass exactly one of: `conclusion` (to create) or `delete_id` (to delete). "
+        "Passing neither is an error. "
+        "Deletion is only for PII removal — Honcho self-heals incorrect conclusions over time."
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "conclusion": {
                "type": "string",
-                "description": "A factual statement about the user to persist.",
-            }
+                "description": "A factual statement to persist. Required when not using delete_id.",
+            },
+            "delete_id": {
+                "type": "string",
+                "description": "Conclusion ID to delete (for PII removal). Required when not using conclusion.",
+            },
+            "peer": {
+                "type": "string",
+                "description": "Peer to query. Built-in aliases: 'user' (default), 'ai'. Or pass any peer ID from this workspace.",
+            },
        },
-        "required": ["conclusion"],
+        "required": [],
    },
 }


-ALL_TOOL_SCHEMAS = [PROFILE_SCHEMA, SEARCH_SCHEMA, CONTEXT_SCHEMA, CONCLUDE_SCHEMA]
+ALL_TOOL_SCHEMAS = [PROFILE_SCHEMA, SEARCH_SCHEMA, REASONING_SCHEMA, CONTEXT_SCHEMA, CONCLUDE_SCHEMA]


 # ---------------------------------------------------------------------------
@ -131,16 +198,18 @@ class HonchoMemoryProvider(MemoryProvider):
        # B1: recall_mode — set during initialize from config
        self._recall_mode = "hybrid"  # "context", "tools", or "hybrid"

-        # B4: First-turn context baking
-        self._first_turn_context: Optional[str] = None
-        self._first_turn_lock = threading.Lock()
+        # Base context cache — refreshed on context_cadence, not frozen
+        self._base_context_cache: Optional[str] = None
+        self._base_context_lock = threading.Lock()

        # B5: Cost-awareness turn counting and cadence
        self._turn_count = 0
        self._injection_frequency = "every-turn"  # or "first-turn"
        self._context_cadence = 1   # minimum turns between context API calls
-        self._dialectic_cadence = 1  # minimum turns between dialectic API calls
-        self._reasoning_level_cap: Optional[str] = None  # "minimal", "low", "mid", "high"
+        self._dialectic_cadence = 3  # minimum turns between dialectic API calls
+        self._dialectic_depth = 1   # how many .chat() calls per dialectic cycle (1-3)
+        self._dialectic_depth_levels: list[str] | None = None  # per-pass reasoning levels
+        self._reasoning_level_cap: Optional[str] = None  # "minimal", "low", "medium", "high"
        self._last_context_turn = -999
        self._last_dialectic_turn = -999

@ -236,9 +305,11 @@ class HonchoMemoryProvider(MemoryProvider):
                raw = cfg.raw or {}
                self._injection_frequency = raw.get("injectionFrequency", "every-turn")
                self._context_cadence = int(raw.get("contextCadence", 1))
-                self._dialectic_cadence = int(raw.get("dialecticCadence", 1))
+                self._dialectic_cadence = int(raw.get("dialecticCadence", 3))
+                self._dialectic_depth = max(1, min(cfg.dialectic_depth, 3))
+                self._dialectic_depth_levels = cfg.dialectic_depth_levels
                cap = raw.get("reasoningLevelCap")
-                if cap and cap in ("minimal", "low", "mid", "high"):
+                if cap and cap in ("minimal", "low", "medium", "high"):
                    self._reasoning_level_cap = cap
            except Exception as e:
                logger.debug("Honcho cost-awareness config parse error: %s", e)
@ -251,9 +322,7 @@ class HonchoMemoryProvider(MemoryProvider):
            # ----- Port #1957: lazy session init for tools-only mode -----
            if self._recall_mode == "tools":
                if cfg.init_on_session_start:
-                    # Eager init: create session now so sync_turn() works from turn 1.
-                    # Does NOT enable auto-injection — prefetch() still returns empty.
-                    logger.debug("Honcho tools-only mode — eager session init (initOnSessionStart=true)")
+                    # Eager init even in tools mode (opt-in)
                    self._do_session_init(cfg, session_id, **kwargs)
                    return
                # Defer actual session creation until first tool call
@ -287,8 +356,13 @@ class HonchoMemoryProvider(MemoryProvider):

        # ----- B3: resolve_session_name -----
        session_title = kwargs.get("session_title")
+        gateway_session_key = kwargs.get("gateway_session_key")
        self._session_key = (
-            cfg.resolve_session_name(session_title=session_title, session_id=session_id)
+            cfg.resolve_session_name(
+                session_title=session_title,
+                session_id=session_id,
+                gateway_session_key=gateway_session_key,
+            )
            or session_id
            or "hermes-default"
        )
@ -299,12 +373,21 @@ class HonchoMemoryProvider(MemoryProvider):
        self._session_initialized = True

        # ----- B6: Memory file migration (one-time, for new sessions) -----
+        # Skip under per-session strategy: every Hermes run creates a fresh
+        # Honcho session by design, so uploading MEMORY.md/USER.md/SOUL.md to
+        # each one would flood the backend with short-lived duplicates instead
+        # of performing a one-time migration.
        try:
-            if not session.messages:
+            if not session.messages and cfg.session_strategy != "per-session":
                from hermes_constants import get_hermes_home
                mem_dir = str(get_hermes_home() / "memories")
                self._manager.migrate_memory_files(self._session_key, mem_dir)
                logger.debug("Honcho memory file migration attempted for new session: %s", self._session_key)
+            elif cfg.session_strategy == "per-session":
+                logger.debug(
+                    "Honcho memory file migration skipped: per-session strategy creates a fresh session per run (%s)",
+                    self._session_key,
+                )
        except Exception as e:
            logger.debug("Honcho memory file migration skipped: %s", e)

@ -347,6 +430,11 @@ class HonchoMemoryProvider(MemoryProvider):
        """Format the prefetch context dict into a readable system prompt block."""
        parts = []

+        # Session summary — session-scoped context, placed first for relevance
+        summary = ctx.get("summary", "")
+        if summary:
+            parts.append(f"## Session Summary\n{summary}")
+
        rep = ctx.get("representation", "")
        if rep:
            parts.append(f"## User Representation\n{rep}")
@ -370,9 +458,9 @@ class HonchoMemoryProvider(MemoryProvider):
    def system_prompt_block(self) -> str:
        """Return system prompt text, adapted by recall_mode.

-        B4: On the FIRST call, fetch and bake the full Honcho context
-        (user representation, peer card, AI representation, continuity synthesis).
-        Subsequent calls return the cached block for prompt caching stability.
+        Returns only the mode header and tool instructions — static text
+        that doesn't change between turns (prompt-cache friendly).
+        Live context (representation, card) is injected via prefetch().
        """
        if self._cron_skipped:
            return ""
@ -382,24 +470,10 @@ class HonchoMemoryProvider(MemoryProvider):
                return (
                    "# Honcho Memory\n"
                    "Active (tools-only mode). Use honcho_profile, honcho_search, "
-                    "honcho_context, and honcho_conclude tools to access user memory."
+                    "honcho_reasoning, honcho_context, and honcho_conclude tools to access user memory."
                )
            return ""

-        # ----- B4: First-turn context baking -----
-        first_turn_block = ""
-        if self._recall_mode in ("context", "hybrid"):
-            with self._first_turn_lock:
-                if self._first_turn_context is None:
-                    # First call — fetch and cache
-                    try:
-                        ctx = self._manager.get_prefetch_context(self._session_key)
-                        self._first_turn_context = self._format_first_turn_context(ctx) if ctx else ""
-                    except Exception as e:
-                        logger.debug("Honcho first-turn context fetch failed: %s", e)
-                        self._first_turn_context = ""
-                first_turn_block = self._first_turn_context
-
        # ----- B1: adapt text based on recall_mode -----
        if self._recall_mode == "context":
            header = (
@ -412,7 +486,8 @@ class HonchoMemoryProvider(MemoryProvider):
            header = (
                "# Honcho Memory\n"
                "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, "
-                "honcho_search for raw excerpts, honcho_context for synthesized answers, "
+                "honcho_search for raw excerpts, honcho_context for raw peer context, "
+                "honcho_reasoning for synthesized answers, "
                "honcho_conclude to save facts about the user. "
                "No automatic context injection — you must use tools to access memory."
            )
@ -421,16 +496,19 @@ class HonchoMemoryProvider(MemoryProvider):
                "# Honcho Memory\n"
                "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. "
                "Use honcho_profile for a quick factual snapshot, "
-                "honcho_search for raw excerpts, honcho_context for synthesized answers, "
+                "honcho_search for raw excerpts, honcho_context for raw peer context, "
+                "honcho_reasoning for synthesized answers, "
                "honcho_conclude to save facts about the user."
            )

-        if first_turn_block:
-            return f"{header}\n\n{first_turn_block}"
        return header

    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Return prefetched dialectic context from background thread.
+        """Return base context (representation + card) plus dialectic supplement.
+
+        Assembles two layers:
+        1. Base context from peer.context() — cached, refreshed on context_cadence
+        2. Dialectic supplement — cached, refreshed on dialectic_cadence

        B1: Returns empty when recall_mode is "tools" (no injection).
        B5: Respects injection_frequency — "first-turn" returns cached/empty after turn 0.
@ -443,22 +521,95 @@ class HonchoMemoryProvider(MemoryProvider):
        if self._recall_mode == "tools":
            return ""

-        # B5: injection_frequency — if "first-turn" and past first turn, return empty
-        if self._injection_frequency == "first-turn" and self._turn_count > 0:
+        # B5: injection_frequency — if "first-turn" and past first turn, return empty.
+        # _turn_count is 1-indexed (first user message = 1), so > 1 means "past first".
+        if self._injection_frequency == "first-turn" and self._turn_count > 1:
            return ""

+        parts = []
+
+        # ----- Layer 1: Base context (representation + card) -----
+        # On first call, fetch synchronously so turn 1 isn't empty.
+        # After that, serve from cache and refresh in background on cadence.
+        with self._base_context_lock:
+            if self._base_context_cache is None:
+                # First call — synchronous fetch
+                try:
+                    ctx = self._manager.get_prefetch_context(self._session_key)
+                    self._base_context_cache = self._format_first_turn_context(ctx) if ctx else ""
+                    self._last_context_turn = self._turn_count
+                except Exception as e:
+                    logger.debug("Honcho base context fetch failed: %s", e)
+                    self._base_context_cache = ""
+            base_context = self._base_context_cache
+
+        # Check if background context prefetch has a fresher result
+        if self._manager:
+            fresh_ctx = self._manager.pop_context_result(self._session_key)
+            if fresh_ctx:
+                formatted = self._format_first_turn_context(fresh_ctx)
+                if formatted:
+                    with self._base_context_lock:
+                        self._base_context_cache = formatted
+                    base_context = formatted
+
+        if base_context:
+            parts.append(base_context)
+
+        # ----- Layer 2: Dialectic supplement -----
+        # On the very first turn, no queue_prefetch() has run yet so the
+        # dialectic result is empty.  Run with a bounded timeout so a slow
+        # Honcho connection doesn't block the first response indefinitely.
+        # On timeout the result is skipped and queue_prefetch() will pick it
+        # up at the next cadence-allowed turn.
+        if self._last_dialectic_turn == -999 and query:
+            _first_turn_timeout = (
+                self._config.timeout if self._config and self._config.timeout else 8.0
+            )
+            _result_holder: list[str] = []
+
+            def _run_first_turn() -> None:
+                try:
+                    _result_holder.append(self._run_dialectic_depth(query))
+                except Exception as exc:
+                    logger.debug("Honcho first-turn dialectic failed: %s", exc)
+
+            _t = threading.Thread(target=_run_first_turn, daemon=True)
+            _t.start()
+            _t.join(timeout=_first_turn_timeout)
+            if not _t.is_alive():
+                first_turn_dialectic = _result_holder[0] if _result_holder else ""
+                if first_turn_dialectic and first_turn_dialectic.strip():
+                    with self._prefetch_lock:
+                        self._prefetch_result = first_turn_dialectic
+                self._last_dialectic_turn = self._turn_count
+            else:
+                logger.debug(
+                    "Honcho first-turn dialectic timed out (%.1fs) — "
+                    "will inject at next cadence-allowed turn",
+                    _first_turn_timeout,
+                )
+                # Don't update _last_dialectic_turn: queue_prefetch() will
+                # retry at the next cadence-allowed turn via the async path.
+
        if self._prefetch_thread and self._prefetch_thread.is_alive():
            self._prefetch_thread.join(timeout=3.0)
        with self._prefetch_lock:
-            result = self._prefetch_result
+            dialectic_result = self._prefetch_result
            self._prefetch_result = ""
-        if not result:
+
+        if dialectic_result and dialectic_result.strip():
+            parts.append(dialectic_result)
+
+        if not parts:
            return ""

+        result = "\n\n".join(parts)
+
        # ----- Port #3265: token budget enforcement -----
        result = self._truncate_to_budget(result)

-        return f"## Honcho Context\n{result}"
+        return result

    def _truncate_to_budget(self, text: str) -> str:
        """Truncate text to fit within context_tokens budget if set."""
@ -475,9 +626,11 @@ class HonchoMemoryProvider(MemoryProvider):
        return truncated + " …"

    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        """Fire a background dialectic query for the upcoming turn.
+        """Fire background prefetch threads for the upcoming turn.

-        B5: Checks cadence before firing background threads.
+        B5: Checks cadence independently for dialectic and context refresh.
+        Context refresh updates the base layer (representation + card).
+        Dialectic fires the LLM reasoning supplement.
        """
        if self._cron_skipped:
            return
@ -488,6 +641,15 @@ class HonchoMemoryProvider(MemoryProvider):
        if self._recall_mode == "tools":
            return

+        # ----- Context refresh (base layer) — independent cadence -----
+        if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
+            self._last_context_turn = self._turn_count
+            try:
+                self._manager.prefetch_context(self._session_key, query)
+            except Exception as e:
+                logger.debug("Honcho context prefetch failed: %s", e)
+
+        # ----- Dialectic prefetch (supplement layer) -----
        # B5: cadence check — skip if too soon since last dialectic call
        if self._dialectic_cadence > 1:
            if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
@ -499,9 +661,7 @@ class HonchoMemoryProvider(MemoryProvider):

        def _run():
            try:
-                result = self._manager.dialectic_query(
-                    self._session_key, query, peer="user"
-                )
+                result = self._run_dialectic_depth(query)
                if result and result.strip():
                    with self._prefetch_lock:
                        self._prefetch_result = result
@ -513,13 +673,140 @@ class HonchoMemoryProvider(MemoryProvider):
        )
        self._prefetch_thread.start()

-        # Also fire context prefetch if cadence allows
-        if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
-            self._last_context_turn = self._turn_count
-            try:
-                self._manager.prefetch_context(self._session_key, query)
-            except Exception as e:
-                logger.debug("Honcho context prefetch failed: %s", e)
+    # ----- Dialectic depth: multi-pass .chat() with cold/warm prompts -----
+
+    # Proportional reasoning levels per depth/pass when dialecticDepthLevels
+    # is not configured. The base level is dialecticReasoningLevel.
+    # Index: (depth, pass) → level relative to base.
+    _PROPORTIONAL_LEVELS: dict[tuple[int, int], str] = {
+        # depth 1: single pass at base level
+        (1, 0): "base",
+        # depth 2: pass 0 lighter, pass 1 at base
+        (2, 0): "minimal",
+        (2, 1): "base",
+        # depth 3: pass 0 lighter, pass 1 at base, pass 2 one above minimal
+        (3, 0): "minimal",
+        (3, 1): "base",
+        (3, 2): "low",
+    }
+
+    _LEVEL_ORDER = ("minimal", "low", "medium", "high", "max")
+
+    def _resolve_pass_level(self, pass_idx: int) -> str:
+        """Resolve reasoning level for a given pass index.
+
+        Uses dialecticDepthLevels if configured, otherwise proportional
+        defaults relative to dialecticReasoningLevel.
+        """
+        if self._dialectic_depth_levels and pass_idx < len(self._dialectic_depth_levels):
+            return self._dialectic_depth_levels[pass_idx]
+
+        base = (self._config.dialectic_reasoning_level if self._config else "low")
+        mapping = self._PROPORTIONAL_LEVELS.get((self._dialectic_depth, pass_idx))
+        if mapping is None or mapping == "base":
+            return base
+        return mapping
+
+    def _build_dialectic_prompt(self, pass_idx: int, prior_results: list[str], is_cold: bool) -> str:
+        """Build the prompt for a given dialectic pass.
+
+        Pass 0: cold start (general user query) or warm (session-scoped).
+        Pass 1: self-audit / targeted synthesis against gaps from pass 0.
+        Pass 2: reconciliation / contradiction check across prior passes.
+        """
+        if pass_idx == 0:
+            if is_cold:
+                return (
+                    "Who is this person? What are their preferences, goals, "
+                    "and working style? Focus on facts that would help an AI "
+                    "assistant be immediately useful."
+                )
+            return (
+                "Given what's been discussed in this session so far, what "
+                "context about this user is most relevant to the current "
+                "conversation? Prioritize active context over biographical facts."
+            )
+        elif pass_idx == 1:
+            prior = prior_results[-1] if prior_results else ""
+            return (
+                f"Given this initial assessment:\n\n{prior}\n\n"
+                "What gaps remain in your understanding that would help "
+                "going forward? Synthesize what you actually know about "
+                "the user's current state and immediate needs, grounded "
+                "in evidence from recent sessions."
+            )
+        else:
+            # pass 2: reconciliation
+            return (
+                f"Prior passes produced:\n\n"
+                f"Pass 1:\n{prior_results[0] if len(prior_results) > 0 else '(empty)'}\n\n"
+                f"Pass 2:\n{prior_results[1] if len(prior_results) > 1 else '(empty)'}\n\n"
+                "Do these assessments cohere? Reconcile any contradictions "
+                "and produce a final, concise synthesis of what matters most "
+                "for the current conversation."
+            )
+
+    @staticmethod
+    def _signal_sufficient(result: str) -> bool:
+        """Check if a dialectic pass returned enough signal to skip further passes.
+
+        Heuristic: a response longer than 100 chars with some structure
+        (section headers, bullets, or an ordered list) is considered sufficient.
+        """
+        if not result or len(result.strip()) < 100:
+            return False
+        # Structured output with sections/bullets is strong signal
+        if "\n" in result and (
+            "##" in result
+            or "•" in result
+            or re.search(r"^[*-] ", result, re.MULTILINE)
+            or re.search(r"^\s*\d+\. ", result, re.MULTILINE)
+        ):
+            return True
+        # Long enough even without structure
+        return len(result.strip()) > 300
+
+    def _run_dialectic_depth(self, query: str) -> str:
+        """Execute up to dialecticDepth .chat() calls with conditional bail-out.
+
+        Cold start (no base context): general user-oriented query.
+        Warm session (base context exists): session-scoped query.
+        Each pass is conditional — bails early if prior pass returned strong signal.
+        Returns the best (usually last) result.
+        """
+        if not self._manager or not self._session_key:
+            return ""
+
+        is_cold = not self._base_context_cache
+        results: list[str] = []
+
+        for i in range(self._dialectic_depth):
+            if i == 0:
+                prompt = self._build_dialectic_prompt(0, results, is_cold)
+            else:
+                # Skip further passes if prior pass delivered strong signal
+                if results and self._signal_sufficient(results[-1]):
+                    logger.debug("Honcho dialectic depth %d: pass %d skipped, prior signal sufficient",
+                                 self._dialectic_depth, i)
+                    break
+                prompt = self._build_dialectic_prompt(i, results, is_cold)
+
+            level = self._resolve_pass_level(i)
+            logger.debug("Honcho dialectic depth %d: pass %d, level=%s, cold=%s",
+                         self._dialectic_depth, i, level, is_cold)
+
+            result = self._manager.dialectic_query(
+                self._session_key, prompt,
+                reasoning_level=level,
+                peer="user",
+            )
+            results.append(result or "")
+
+        # Return the last non-empty result (deepest pass that ran)
+        for r in reversed(results):
+            if r and r.strip():
+                return r
+        return ""

    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
        """Track turn count for cadence and injection_frequency logic."""
@ -659,7 +946,14 @@ class HonchoMemoryProvider(MemoryProvider):

        try:
            if tool_name == "honcho_profile":
-                card = self._manager.get_peer_card(self._session_key)
+                peer = args.get("peer", "user")
+                card_update = args.get("card")
+                if card_update:
+                    result = self._manager.set_peer_card(self._session_key, card_update, peer=peer)
+                    if result is None:
+                        return tool_error("Failed to update peer card.")
+                    return json.dumps({"result": f"Peer card updated ({len(result)} facts).", "card": result})
+                card = self._manager.get_peer_card(self._session_key, peer=peer)
                if not card:
                    return json.dumps({"result": "No profile facts available yet."})
                return json.dumps({"result": card})
@ -669,30 +963,64 @@ class HonchoMemoryProvider(MemoryProvider):
                if not query:
                    return tool_error("Missing required parameter: query")
                max_tokens = min(int(args.get("max_tokens", 800)), 2000)
+                peer = args.get("peer", "user")
                result = self._manager.search_context(
-                    self._session_key, query, max_tokens=max_tokens
+                    self._session_key, query, max_tokens=max_tokens, peer=peer
                )
                if not result:
                    return json.dumps({"result": "No relevant context found."})
                return json.dumps({"result": result})

-            elif tool_name == "honcho_context":
+            elif tool_name == "honcho_reasoning":
                query = args.get("query", "")
                if not query:
                    return tool_error("Missing required parameter: query")
                peer = args.get("peer", "user")
+                reasoning_level = args.get("reasoning_level")
                result = self._manager.dialectic_query(
-                    self._session_key, query, peer=peer
+                    self._session_key, query,
+                    reasoning_level=reasoning_level,
+                    peer=peer,
                )
+                # Update cadence tracker so auto-injection respects the gap after an explicit call
+                self._last_dialectic_turn = self._turn_count
                return json.dumps({"result": result or "No result from Honcho."})

+            elif tool_name == "honcho_context":
+                peer = args.get("peer", "user")
+                ctx = self._manager.get_session_context(self._session_key, peer=peer)
+                if not ctx:
+                    return json.dumps({"result": "No context available yet."})
+                parts = []
+                if ctx.get("summary"):
+                    parts.append(f"## Summary\n{ctx['summary']}")
+                if ctx.get("representation"):
+                    parts.append(f"## Representation\n{ctx['representation']}")
+                if ctx.get("card"):
+                    parts.append(f"## Card\n{ctx['card']}")
+                if ctx.get("recent_messages"):
+                    msgs = ctx["recent_messages"]
+                    msg_str = "\n".join(
+                        f"  [{m['role']}] {m['content'][:200]}"
+                        for m in msgs[-5:]  # last 5 for brevity
+                    )
+                    parts.append(f"## Recent messages\n{msg_str}")
+                return json.dumps({"result": "\n\n".join(parts) or "No context available."})
+
            elif tool_name == "honcho_conclude":
+                delete_id = args.get("delete_id")
+                peer = args.get("peer", "user")
+                if delete_id:
+                    ok = self._manager.delete_conclusion(self._session_key, delete_id, peer=peer)
+                    if ok:
+                        return json.dumps({"result": f"Conclusion {delete_id} deleted."})
+                    return tool_error(f"Failed to delete conclusion {delete_id}.")
                conclusion = args.get("conclusion", "")
                if not conclusion:
-                    return tool_error("Missing required parameter: conclusion")
-                ok = self._manager.create_conclusion(self._session_key, conclusion)
+                    return tool_error("Missing required parameter: conclusion or delete_id")
+                ok = self._manager.create_conclusion(self._session_key, conclusion, peer=peer)
                if ok:
-                    return json.dumps({"result": f"Conclusion saved: {conclusion}"})
+                    return json.dumps({"result": f"Conclusion saved for {peer}: {conclusion}"})
                return tool_error("Failed to save conclusion.")

            return tool_error(f"Unknown tool: {tool_name}")
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@ -440,11 +440,43 @@ def cmd_setup(args) -> None:
    if new_recall in ("hybrid", "context", "tools"):
        hermes_host["recallMode"] = new_recall

-    # --- 7. Session strategy ---
-    current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-directory")
+    # --- 7. Context token budget ---
+    current_ctx_tokens = hermes_host.get("contextTokens") or cfg.get("contextTokens")
+    current_display = str(current_ctx_tokens) if current_ctx_tokens else "uncapped"
+    print("\n  Context injection per turn (hybrid/context recall modes only):")
+    print("    uncapped -- no limit (default)")
+    print("    N        -- token limit per turn (e.g. 1200)")
+    new_ctx_tokens = _prompt("Context tokens", default=current_display)
+    if new_ctx_tokens.strip().lower() in ("none", "uncapped", "no limit"):
+        hermes_host.pop("contextTokens", None)
+    elif new_ctx_tokens.strip() == "":
+        pass  # keep current
+    else:
+        try:
+            val = int(new_ctx_tokens)
+            if val >= 0:
+                hermes_host["contextTokens"] = val
+        except (ValueError, TypeError):
+            pass  # keep current
+
+    # --- 7b. Dialectic cadence ---
+    current_dialectic = str(hermes_host.get("dialecticCadence") or cfg.get("dialecticCadence") or "3")
+    print("\n  Dialectic cadence:")
+    print("    How often Honcho rebuilds its user model (LLM call on Honcho backend).")
+    print("    1 = every turn (aggressive), 3 = every 3 turns (recommended), 5+ = sparse.")
+    new_dialectic = _prompt("Dialectic cadence", default=current_dialectic)
+    try:
+        val = int(new_dialectic)
+        if val >= 1:
+            hermes_host["dialecticCadence"] = val
+    except (ValueError, TypeError):
+        hermes_host["dialecticCadence"] = 3
+
+    # --- 8. Session strategy ---
+    current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
    print("\n  Session strategy:")
-    print("    per-directory -- one session per working directory (default)")
-    print("    per-session   -- new Honcho session each run")
+    print("    per-session   -- each run starts clean, Honcho injects context automatically")
+    print("    per-directory -- reuses session per dir, prior context auto-injected each run")
    print("    per-repo      -- one session per git repository")
    print("    global        -- single session across all directories")
    new_strat = _prompt("Session strategy", default=current_strat)
@ -490,10 +522,11 @@ def cmd_setup(args) -> None:
    print(f"  Recall:    {hcfg.recall_mode}")
    print(f"  Sessions:  {hcfg.session_strategy}")
    print("\n  Honcho tools available in chat:")
-    print("    honcho_context   -- ask Honcho about the user (LLM-synthesized)")
-    print("    honcho_search    -- semantic search over history (no LLM)")
-    print("    honcho_profile   -- peer card, key facts (no LLM)")
-    print("    honcho_conclude  -- persist a user fact to memory (no LLM)")
+    print("    honcho_context   -- session context: summary, representation, card, messages")
+    print("    honcho_search    -- semantic search over history")
+    print("    honcho_profile   -- peer card, key facts")
+    print("    honcho_reasoning -- ask Honcho a question, synthesized answer")
+    print("    honcho_conclude  -- persist a user fact to memory")
    print("\n  Other commands:")
    print("    hermes honcho status     -- show full config")
    print("    hermes honcho mode       -- change recall/observation mode")
@ -585,13 +618,26 @@ def cmd_status(args) -> None:
    print(f"  Enabled:        {hcfg.enabled}")
    print(f"  API key:        {masked}")
    print(f"  Workspace:      {hcfg.workspace_id}")
-    print(f"  Config path:    {active_path}")
+
+    # Config paths — show where config was read from and where writes go
+    global_path = Path.home() / ".honcho" / "config.json"
+    print(f"  Config:         {active_path}")
    if write_path != active_path:
-        print(f"  Write path:     {write_path}  (instance-local)")
+        print(f"  Write to:       {write_path}  (profile-local)")
+    if active_path == global_path:
+        print(f"  Fallback:       (none — using global ~/.honcho/config.json)")
+    elif global_path.exists():
+        print(f"  Fallback:       {global_path}  (exists, cross-app interop)")
+
    print(f"  AI peer:        {hcfg.ai_peer}")
    print(f"  User peer:      {hcfg.peer_name or 'not set'}")
    print(f"  Session key:    {hcfg.resolve_session_name()}")
+    print(f"  Session strat:  {hcfg.session_strategy}")
    print(f"  Recall mode:    {hcfg.recall_mode}")
+    print(f"  Context budget: {hcfg.context_tokens or '(uncapped)'} tokens")
+    raw = getattr(hcfg, "raw", None) or {}
+    dialectic_cadence = raw.get("dialecticCadence") or 3
+    print(f"  Dialectic cad:  every {dialectic_cadence} turn{'s' if dialectic_cadence != 1 else ''}")
    print(f"  Observation:    user(me={hcfg.user_observe_me},others={hcfg.user_observe_others}) ai(me={hcfg.ai_observe_me},others={hcfg.ai_observe_others})")
    print(f"  Write freq:     {hcfg.write_frequency}")

@ -599,8 +645,8 @@ def cmd_status(args) -> None:
        print("\n  Connection... ", end="", flush=True)
        try:
            client = get_honcho_client(hcfg)
-            print("OK")
            _show_peer_cards(hcfg, client)
+            print("OK")
        except Exception as e:
            print(f"FAILED ({e})\n")
    else:
@ -824,6 +870,41 @@ def cmd_mode(args) -> None:
    print(f"  {label}Recall mode -> {mode_arg}  ({MODES[mode_arg]})\n")


+def cmd_strategy(args) -> None:
+    """Show or set the session strategy."""
+    STRATEGIES = {
+        "per-session": "each run starts clean, Honcho injects context automatically",
+        "per-directory": "reuses session per dir, prior context auto-injected each run",
+        "per-repo": "one session per git repository",
+        "global": "single session across all directories",
+    }
+    cfg = _read_config()
+    strat_arg = getattr(args, "strategy", None)
+
+    if strat_arg is None:
+        current = (
+            (cfg.get("hosts") or {}).get(_host_key(), {}).get("sessionStrategy")
+            or cfg.get("sessionStrategy")
+            or "per-session"
+        )
+        print("\nHoncho session strategy\n" + "─" * 40)
+        for s, desc in STRATEGIES.items():
+            marker = " <-" if s == current else ""
+            print(f"  {s:<15}  {desc}{marker}")
+        print(f"\n  Set with: hermes honcho strategy [per-session|per-directory|per-repo|global]\n")
+        return
+
+    if strat_arg not in STRATEGIES:
+        print(f"  Invalid strategy '{strat_arg}'. Options: {', '.join(STRATEGIES)}\n")
+        return
+
+    host = _host_key()
+    label = f"[{host}] " if host != "hermes" else ""
+    cfg.setdefault("hosts", {}).setdefault(host, {})["sessionStrategy"] = strat_arg
+    _write_config(cfg)
+    print(f"  {label}Session strategy -> {strat_arg}  ({STRATEGIES[strat_arg]})\n")
+
+
 def cmd_tokens(args) -> None:
    """Show or set token budget settings."""
    cfg = _read_config()
@ -1143,10 +1224,11 @@ def cmd_migrate(args) -> None:
    print("              automatically. Files become the seed, not the live store.")
    print()
    print("  Honcho tools (available to the agent during conversation)")
-    print("    honcho_context   — ask Honcho a question, get a synthesized answer (LLM)")
-    print("    honcho_search        — semantic search over stored context (no LLM)")
-    print("    honcho_profile       — fast peer card snapshot (no LLM)")
-    print("    honcho_conclude      — write a conclusion/fact back to memory (no LLM)")
+    print("    honcho_context   — session context: summary, representation, card, messages")
+    print("    honcho_search        — semantic search over stored context")
+    print("    honcho_profile       — fast peer card snapshot")
+    print("    honcho_reasoning     — ask Honcho a question, synthesized answer")
+    print("    honcho_conclude      — write a conclusion/fact back to memory")
    print()
    print("  Session naming")
    print("    OpenClaw: no persistent session concept — files are global.")
@ -1197,6 +1279,8 @@ def honcho_command(args) -> None:
        cmd_peer(args)
    elif sub == "mode":
        cmd_mode(args)
+    elif sub == "strategy":
+        cmd_strategy(args)
    elif sub == "tokens":
        cmd_tokens(args)
    elif sub == "identity":
@ -1211,7 +1295,7 @@ def honcho_command(args) -> None:
        cmd_sync(args)
    else:
        print(f"  Unknown honcho command: {sub}")
-        print("  Available: status, sessions, map, peer, mode, tokens, identity, migrate, enable, disable, sync\n")
+        print("  Available: status, sessions, map, peer, mode, strategy, tokens, identity, migrate, enable, disable, sync\n")


 def register_cli(subparser) -> None:
@ -1270,6 +1354,15 @@ def register_cli(subparser) -> None:
        help="Recall mode to set (hybrid/context/tools). Omit to show current.",
    )

+    strategy_parser = subs.add_parser(
+        "strategy", help="Show or set session strategy (per-session/per-directory/per-repo/global)",
+    )
+    strategy_parser.add_argument(
+        "strategy", nargs="?", metavar="STRATEGY",
+        choices=("per-session", "per-directory", "per-repo", "global"),
+        help="Session strategy to set. Omit to show current.",
+    )
+
    tokens_parser = subs.add_parser(
        "tokens", help="Show or set token budget for context and dialectic",
    )
--- a/plugins/memory/honcho/client.py
+++ b/plugins/memory/honcho/client.py
@ -58,7 +58,8 @@ def resolve_config_path() -> Path:

    Resolution order:
      1. $HERMES_HOME/honcho.json      (profile-local, if it exists)
-      2. ~/.honcho/config.json          (global, cross-app interop)
+      2. ~/.hermes/honcho.json          (default profile — shared host blocks live here)
+      3. ~/.honcho/config.json          (global, cross-app interop)

    Returns the global path if none exist (for first-time setup writes).
    """
@ -66,6 +67,11 @@ def resolve_config_path() -> Path:
    if local_path.exists():
        return local_path

+    # Default profile's config — host blocks accumulate here via setup/clone
+    default_path = Path.home() / ".hermes" / "honcho.json"
+    if default_path != local_path and default_path.exists():
+        return default_path
+
    return GLOBAL_CONFIG_PATH


@ -88,6 +94,68 @@ def _resolve_bool(host_val, root_val, *, default: bool) -> bool:
    return default


+def _parse_context_tokens(host_val, root_val) -> int | None:
+    """Parse contextTokens: host wins, then root, then None (uncapped)."""
+    for val in (host_val, root_val):
+        if val is not None:
+            try:
+                return int(val)
+            except (ValueError, TypeError):
+                pass
+    return None
+
+
+def _parse_dialectic_depth(host_val, root_val) -> int:
+    """Parse dialecticDepth: host wins, then root, then 1. Clamped to 1-3."""
+    for val in (host_val, root_val):
+        if val is not None:
+            try:
+                return max(1, min(int(val), 3))
+            except (ValueError, TypeError):
+                pass
+    return 1
+
+
+_VALID_REASONING_LEVELS = ("minimal", "low", "medium", "high", "max")
+
+
+def _parse_dialectic_depth_levels(host_val, root_val, depth: int) -> list[str] | None:
+    """Parse dialecticDepthLevels: optional array of reasoning levels per pass.
+
+    Returns None when not configured (use proportional defaults).
+    When configured, validates each level and truncates/pads to match depth.
+    """
+    for val in (host_val, root_val):
+        if val is not None and isinstance(val, list):
+            levels = [
+                lvl if lvl in _VALID_REASONING_LEVELS else "low"
+                for lvl in val[:depth]
+            ]
+            # Pad with "low" if array is shorter than depth
+            while len(levels) < depth:
+                levels.append("low")
+            return levels
+    return None
+
+
+def _resolve_optional_float(*values: Any) -> float | None:
+    """Return the first non-empty value coerced to a positive float."""
+    for value in values:
+        if value is None:
+            continue
+        if isinstance(value, str):
+            value = value.strip()
+            if not value:
+                continue
+        try:
+            parsed = float(value)
+        except (TypeError, ValueError):
+            continue
+        if parsed > 0:
+            return parsed
+    return None
+
+
 _VALID_OBSERVATION_MODES = {"unified", "directional"}
 _OBSERVATION_MODE_ALIASES = {"shared": "unified", "separate": "directional", "cross": "directional"}

@ -153,6 +221,8 @@ class HonchoClientConfig:
    environment: str = "production"
    # Optional base URL for self-hosted Honcho (overrides environment mapping)
    base_url: str | None = None
+    # Optional request timeout in seconds for Honcho SDK HTTP calls
+    timeout: float | None = None
    # Identity
    peer_name: str | None = None
    ai_peer: str = "hermes"
@ -162,17 +232,25 @@ class HonchoClientConfig:
    # Write frequency: "async" (background thread), "turn" (sync per turn),
    # "session" (flush on session end), or int (every N turns)
    write_frequency: str | int = "async"
-    # Prefetch budget
+    # Prefetch budget (None = no cap; set to an integer to bound auto-injected context)
    context_tokens: int | None = None
    # Dialectic (peer.chat) settings
    # reasoning_level: "minimal" | "low" | "medium" | "high" | "max"
    dialectic_reasoning_level: str = "low"
-    # dynamic: auto-bump reasoning level based on query length
-    #   true  — low->medium (120+ chars), low->high (400+ chars), capped at "high"
-    #   false — always use dialecticReasoningLevel as-is
+    # When true, the model can override reasoning_level per-call via the
+    # honcho_reasoning tool param (agentic). When false, always uses
+    # dialecticReasoningLevel and ignores model-provided overrides.
    dialectic_dynamic: bool = True
    # Max chars of dialectic result to inject into Hermes system prompt
    dialectic_max_chars: int = 600
+    # Dialectic depth: how many .chat() calls per dialectic cycle (1-3).
+    # Depth 1: single call. Depth 2: self-audit + targeted synthesis.
+    # Depth 3: self-audit + synthesis + reconciliation.
+    dialectic_depth: int = 1
+    # Optional per-pass reasoning level override. Array of reasoning levels
+    # matching dialectic_depth length. When None, uses proportional defaults
+    # derived from dialectic_reasoning_level.
+    dialectic_depth_levels: list[str] | None = None
    # Honcho API limits — configurable for self-hosted instances
    # Max chars per message sent via add_messages() (Honcho cloud: 25000)
    message_max_chars: int = 25000
@ -183,10 +261,8 @@ class HonchoClientConfig:
    # "context" — auto-injected context only, Honcho tools removed
    # "tools"   — Honcho tools only, no auto-injected context
    recall_mode: str = "hybrid"
-    # When True and recallMode is "tools", create the Honcho session eagerly
-    # during initialize() instead of deferring to the first tool call.
-    # This ensures sync_turn() can write from the very first turn.
-    # Does NOT enable automatic context injection — only changes init timing.
+    # Eager init in tools mode — when true, initializes session during
+    # initialize() instead of deferring to first tool call
    init_on_session_start: bool = False
    # Observation mode: legacy string shorthand ("directional" or "unified").
    # Kept for backward compat; granular per-peer booleans below are preferred.
@ -218,12 +294,14 @@ class HonchoClientConfig:
        resolved_host = host or resolve_active_host()
        api_key = os.environ.get("HONCHO_API_KEY")
        base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
+        timeout = _resolve_optional_float(os.environ.get("HONCHO_TIMEOUT"))
        return cls(
            host=resolved_host,
            workspace_id=workspace_id,
            api_key=api_key,
            environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
            base_url=base_url,
+            timeout=timeout,
            ai_peer=resolved_host,
            enabled=bool(api_key or base_url),
        )
@ -284,6 +362,11 @@ class HonchoClientConfig:
            or os.environ.get("HONCHO_BASE_URL", "").strip()
            or None
        )
+        timeout = _resolve_optional_float(
+            raw.get("timeout"),
+            raw.get("requestTimeout"),
+            os.environ.get("HONCHO_TIMEOUT"),
+        )

        # Auto-enable when API key or base_url is present (unless explicitly disabled)
        # Host-level enabled wins, then root-level, then auto-enable if key/url exists.
@ -329,12 +412,16 @@ class HonchoClientConfig:
            api_key=api_key,
            environment=environment,
            base_url=base_url,
+            timeout=timeout,
            peer_name=host_block.get("peerName") or raw.get("peerName"),
            ai_peer=ai_peer,
            enabled=enabled,
            save_messages=save_messages,
            write_frequency=write_frequency,
-            context_tokens=host_block.get("contextTokens") or raw.get("contextTokens"),
+            context_tokens=_parse_context_tokens(
+                host_block.get("contextTokens"),
+                raw.get("contextTokens"),
+            ),
            dialectic_reasoning_level=(
                host_block.get("dialecticReasoningLevel")
                or raw.get("dialecticReasoningLevel")
@ -350,6 +437,15 @@ class HonchoClientConfig:
                or raw.get("dialecticMaxChars")
                or 600
            ),
+            dialectic_depth=_parse_dialectic_depth(
+                host_block.get("dialecticDepth"),
+                raw.get("dialecticDepth"),
+            ),
+            dialectic_depth_levels=_parse_dialectic_depth_levels(
+                host_block.get("dialecticDepthLevels"),
+                raw.get("dialecticDepthLevels"),
+                depth=_parse_dialectic_depth(host_block.get("dialecticDepth"), raw.get("dialecticDepth")),
+            ),
            message_max_chars=int(
                host_block.get("messageMaxChars")
                or raw.get("messageMaxChars")
@ -416,16 +512,18 @@ class HonchoClientConfig:
        cwd: str | None = None,
        session_title: str | None = None,
        session_id: str | None = None,
+        gateway_session_key: str | None = None,
    ) -> str | None:
        """Resolve Honcho session name.

        Resolution order:
          1. Manual directory override from sessions map
          2. Hermes session title (from /title command)
-          3. per-session strategy — Hermes session_id ({timestamp}_{hex})
-          4. per-repo strategy — git repo root directory name
-          5. per-directory strategy — directory basename
-          6. global strategy — workspace name
+          3. Gateway session key (stable per-chat identifier from gateway platforms)
+          4. per-session strategy — Hermes session_id ({timestamp}_{hex})
+          5. per-repo strategy — git repo root directory name
+          6. per-directory strategy — directory basename
+          7. global strategy — workspace name
        """
        import re

@ -439,12 +537,22 @@ class HonchoClientConfig:

        # /title mid-session remap
        if session_title:
-            sanitized = re.sub(r'[^a-zA-Z0-9_-]', '-', session_title).strip('-')
+            sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', session_title).strip('-')
            if sanitized:
                if self.session_peer_prefix and self.peer_name:
                    return f"{self.peer_name}-{sanitized}"
                return sanitized

+        # Gateway session key: stable per-chat identifier passed by the gateway
+        # (e.g. "agent:main:telegram:dm:8439114563"). Sanitize colons to hyphens
+        # for Honcho session ID compatibility. This takes priority over strategy-
+        # based resolution because gateway platforms need per-chat isolation that
+        # cwd-based strategies cannot provide.
+        if gateway_session_key:
+            sanitized = re.sub(r'[^a-zA-Z0-9_-]+', '-', gateway_session_key).strip('-')
+            if sanitized:
+                return sanitized
+
        # per-session: inherit Hermes session_id (new Honcho session each run)
        if self.session_strategy == "per-session" and session_id:
            if self.session_peer_prefix and self.peer_name:
@ -506,13 +614,20 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
    # mapping, enabling remote self-hosted Honcho deployments without
    # requiring the server to live on localhost.
    resolved_base_url = config.base_url
-    if not resolved_base_url:
+    resolved_timeout = config.timeout
+    if not resolved_base_url or resolved_timeout is None:
        try:
            from hermes_cli.config import load_config
            hermes_cfg = load_config()
            honcho_cfg = hermes_cfg.get("honcho", {})
            if isinstance(honcho_cfg, dict):
-                resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
+                if not resolved_base_url:
+                    resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
+                if resolved_timeout is None:
+                    resolved_timeout = _resolve_optional_float(
+                        honcho_cfg.get("timeout"),
+                        honcho_cfg.get("request_timeout"),
+                    )
        except Exception:
            pass

@ -547,6 +662,8 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
    }
    if resolved_base_url:
        kwargs["base_url"] = resolved_base_url
+    if resolved_timeout is not None:
+        kwargs["timeout"] = resolved_timeout

    _honcho_client = Honcho(**kwargs)

--- a/plugins/memory/honcho/session.py
+++ b/plugins/memory/honcho/session.py
@ -486,36 +486,9 @@ class HonchoSessionManager:

    _REASONING_LEVELS = ("minimal", "low", "medium", "high", "max")

-    def _dynamic_reasoning_level(self, query: str) -> str:
-        """
-        Pick a reasoning level for a dialectic query.
-
-        When dialecticDynamic is true (default), auto-bumps based on query
-        length so Honcho applies more inference where it matters:
-
-          < 120 chars  -> configured default (typically "low")
-          120-400 chars -> +1 level above default (cap at "high")
-          > 400 chars  -> +2 levels above default (cap at "high")
-
-        "max" is never selected automatically -- reserve it for explicit config.
-
-        When dialecticDynamic is false, always returns the configured level.
-        """
-        if not self._dialectic_dynamic:
-            return self._dialectic_reasoning_level
-
-        levels = self._REASONING_LEVELS
-        default_idx = levels.index(self._dialectic_reasoning_level) if self._dialectic_reasoning_level in levels else 1
-        n = len(query)
-        if n < 120:
-            bump = 0
-        elif n < 400:
-            bump = 1
-        else:
-            bump = 2
-        # Cap at "high" (index 3) for auto-selection
-        idx = min(default_idx + bump, 3)
-        return levels[idx]
+    def _default_reasoning_level(self) -> str:
+        """Return the configured default reasoning level."""
+        return self._dialectic_reasoning_level

    def dialectic_query(
        self, session_key: str, query: str,
@ -532,8 +505,9 @@ class HonchoSessionManager:
        Args:
            session_key: The session key to query against.
            query: Natural language question.
-            reasoning_level: Override the config default. If None, uses
-                             _dynamic_reasoning_level(query).
+            reasoning_level: Override the configured default (dialecticReasoningLevel).
+                             Only honored when dialecticDynamic is true.
+                             If None or dialecticDynamic is false, uses the configured default.
            peer: Which peer to query — "user" (default) or "ai".

        Returns:
@ -543,29 +517,34 @@ class HonchoSessionManager:
        if not session:
            return ""

+        target_peer_id = self._resolve_peer_id(session, peer)
+        if target_peer_id is None:
+            return ""
+
        # Guard: truncate query to Honcho's dialectic input limit
        if len(query) > self._dialectic_max_input_chars:
            query = query[:self._dialectic_max_input_chars].rsplit(" ", 1)[0]

-        level = reasoning_level or self._dynamic_reasoning_level(query)
+        if self._dialectic_dynamic and reasoning_level:
+            level = reasoning_level
+        else:
+            level = self._default_reasoning_level()

        try:
            if self._ai_observe_others:
-                # AI peer can observe user — use cross-observation routing
-                if peer == "ai":
-                    ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id)
+                # AI peer can observe other peers — use assistant as observer.
+                ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id)
+                if target_peer_id == session.assistant_peer_id:
                    result = ai_peer_obj.chat(query, reasoning_level=level) or ""
                else:
-                    ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id)
                    result = ai_peer_obj.chat(
                        query,
-                        target=session.user_peer_id,
+                        target=target_peer_id,
                        reasoning_level=level,
                    ) or ""
            else:
-                # AI can't observe others — each peer queries self
-                peer_id = session.assistant_peer_id if peer == "ai" else session.user_peer_id
-                target_peer = self._get_or_create_peer(peer_id)
+                # Without cross-observation, each peer queries its own context.
+                target_peer = self._get_or_create_peer(target_peer_id)
                result = target_peer.chat(query, reasoning_level=level) or ""

            # Apply Hermes-side char cap before caching
@ -647,10 +626,11 @@ class HonchoSessionManager:
        """
        Pre-fetch user and AI peer context from Honcho.

-        Fetches peer_representation and peer_card for both peers. search_query
-        is intentionally omitted — it would only affect additional excerpts
-        that this code does not consume, and passing the raw message exposes
-        conversation content in server access logs.
+        Fetches peer_representation and peer_card for both peers, plus the
+        session summary when available. search_query is intentionally omitted
+        — it would only affect additional excerpts that this code does not
+        consume, and passing the raw message exposes conversation content in
+        server access logs.

        Args:
            session_key: The session key to get context for.
@ -658,15 +638,29 @@ class HonchoSessionManager:

        Returns:
            Dictionary with 'representation', 'card', 'ai_representation',
-            and 'ai_card' keys.
+            'ai_card', and optionally 'summary' keys.
        """
        session = self._cache.get(session_key)
        if not session:
            return {}

        result: dict[str, str] = {}
+
+        # Session summary — provides session-scoped context.
+        # Fresh sessions (per-session cold start, or first-ever per-directory)
+        # return null summary — the guard below handles that gracefully.
+        # Per-directory returning sessions get their accumulated summary.
        try:
-            user_ctx = self._fetch_peer_context(session.user_peer_id)
+            honcho_session = self._sessions_cache.get(session.honcho_session_id)
+            if honcho_session:
+                ctx = honcho_session.context(summary=True)
+                if ctx.summary and getattr(ctx.summary, "content", None):
+                    result["summary"] = ctx.summary.content
+        except Exception as e:
+            logger.debug("Failed to fetch session summary from Honcho: %s", e)
+
+        try:
+            user_ctx = self._fetch_peer_context(session.user_peer_id, target=session.user_peer_id)
            result["representation"] = user_ctx["representation"]
            result["card"] = "\n".join(user_ctx["card"])
        except Exception as e:
@ -674,7 +668,7 @@ class HonchoSessionManager:

        # Also fetch AI peer's own representation so Hermes knows itself.
        try:
-            ai_ctx = self._fetch_peer_context(session.assistant_peer_id)
+            ai_ctx = self._fetch_peer_context(session.assistant_peer_id, target=session.assistant_peer_id)
            result["ai_representation"] = ai_ctx["representation"]
            result["ai_card"] = "\n".join(ai_ctx["card"])
        except Exception as e:
@ -862,7 +856,7 @@ class HonchoSessionManager:
            return [str(item) for item in card if item]
        return [str(card)]

-    def _fetch_peer_card(self, peer_id: str) -> list[str]:
+    def _fetch_peer_card(self, peer_id: str, *, target: str | None = None) -> list[str]:
        """Fetch a peer card directly from the peer object.

        This avoids relying on session.context(), which can return an empty
@ -872,22 +866,33 @@ class HonchoSessionManager:
        peer = self._get_or_create_peer(peer_id)
        getter = getattr(peer, "get_card", None)
        if callable(getter):
-            return self._normalize_card(getter())
+            return self._normalize_card(getter(target=target) if target is not None else getter())

        legacy_getter = getattr(peer, "card", None)
        if callable(legacy_getter):
-            return self._normalize_card(legacy_getter())
+            return self._normalize_card(legacy_getter(target=target) if target is not None else legacy_getter())

        return []

-    def _fetch_peer_context(self, peer_id: str, search_query: str | None = None) -> dict[str, Any]:
+    def _fetch_peer_context(
+        self,
+        peer_id: str,
+        search_query: str | None = None,
+        *,
+        target: str | None = None,
+    ) -> dict[str, Any]:
        """Fetch representation + peer card directly from a peer object."""
        peer = self._get_or_create_peer(peer_id)
        representation = ""
        card: list[str] = []

        try:
-            ctx = peer.context(search_query=search_query) if search_query else peer.context()
+            context_kwargs: dict[str, Any] = {}
+            if target is not None:
+                context_kwargs["target"] = target
+            if search_query is not None:
+                context_kwargs["search_query"] = search_query
+            ctx = peer.context(**context_kwargs) if context_kwargs else peer.context()
            representation = (
                getattr(ctx, "representation", None)
                or getattr(ctx, "peer_representation", None)
@ -899,24 +904,111 @@ class HonchoSessionManager:

        if not representation:
            try:
-                representation = peer.representation() or ""
+                representation = (
+                    peer.representation(target=target) if target is not None else peer.representation()
+                ) or ""
            except Exception as e:
                logger.debug("Direct peer.representation() failed for '%s': %s", peer_id, e)

        if not card:
            try:
-                card = self._fetch_peer_card(peer_id)
+                card = self._fetch_peer_card(peer_id, target=target)
            except Exception as e:
                logger.debug("Direct peer card fetch failed for '%s': %s", peer_id, e)

        return {"representation": representation, "card": card}

-    def get_peer_card(self, session_key: str) -> list[str]:
+    def get_session_context(self, session_key: str, peer: str = "user") -> dict[str, Any]:
+        """Fetch full session context from Honcho including summary.
+
+        Uses the session-level context() API which returns summary,
+        peer_representation, peer_card, and messages.
        """
-        Fetch the user peer's card — a curated list of key facts.
+        session = self._cache.get(session_key)
+        if not session:
+            return {}
+
+        honcho_session = self._sessions_cache.get(session.honcho_session_id)
+        if not honcho_session:
+            # Fall back to peer-level context, respecting the requested peer
+            peer_id = self._resolve_peer_id(session, peer)
+            if peer_id is None:
+                peer_id = session.user_peer_id
+            return self._fetch_peer_context(peer_id, target=peer_id)
+
+        try:
+            peer_id = self._resolve_peer_id(session, peer)
+            ctx = honcho_session.context(
+                summary=True,
+                peer_target=peer_id,
+                peer_perspective=session.user_peer_id if peer == "user" else session.assistant_peer_id,
+            )
+
+            result: dict[str, Any] = {}
+
+            # Summary
+            if ctx.summary:
+                result["summary"] = ctx.summary.content
+
+            # Peer representation and card
+            if ctx.peer_representation:
+                result["representation"] = ctx.peer_representation
+            if ctx.peer_card:
+                result["card"] = "\n".join(ctx.peer_card)
+
+            # Messages (last N for context)
+            if ctx.messages:
+                recent = ctx.messages[-10:]  # last 10 messages
+                result["recent_messages"] = [
+                    {"role": getattr(m, "peer_id", "unknown"), "content": (m.content or "")[:500]}
+                    for m in recent
+                ]
+
+            return result
+        except Exception as e:
+            logger.debug("Session context fetch failed: %s", e)
+            return {}
+
+    def _resolve_peer_id(self, session: HonchoSession, peer: str | None) -> str:
+        """Resolve a peer alias or explicit peer ID to a concrete Honcho peer ID.
+
+        Always returns a non-empty string: either a known peer ID or a
+        sanitized version of the caller-supplied alias/ID.
+        """
+        candidate = (peer or "user").strip()
+        if not candidate:
+            return session.user_peer_id
+
+        normalized = self._sanitize_id(candidate)
+        if normalized == self._sanitize_id("user"):
+            return session.user_peer_id
+        if normalized == self._sanitize_id("ai"):
+            return session.assistant_peer_id
+
+        return normalized
+
+    def _resolve_observer_target(
+        self,
+        session: HonchoSession,
+        peer: str | None,
+    ) -> tuple[str, str | None]:
+        """Resolve observer and target peer IDs for context/search/profile queries."""
+        target_peer_id = self._resolve_peer_id(session, peer)
+
+        if target_peer_id == session.assistant_peer_id:
+            return session.assistant_peer_id, session.assistant_peer_id
+
+        if self._ai_observe_others:
+            return session.assistant_peer_id, target_peer_id
+
+        return target_peer_id, None
+
+    def get_peer_card(self, session_key: str, peer: str = "user") -> list[str]:
+        """
+        Fetch a peer card — a curated list of key facts.

        Fast, no LLM reasoning. Returns raw structured facts Honcho has
-        inferred about the user (name, role, preferences, patterns).
+        inferred about the target peer (name, role, preferences, patterns).
        Empty list if unavailable.
        """
        session = self._cache.get(session_key)
@ -924,12 +1016,19 @@ class HonchoSessionManager:
            return []

        try:
-            return self._fetch_peer_card(session.user_peer_id)
+            observer_peer_id, target_peer_id = self._resolve_observer_target(session, peer)
+            return self._fetch_peer_card(observer_peer_id, target=target_peer_id)
        except Exception as e:
            logger.debug("Failed to fetch peer card from Honcho: %s", e)
            return []

-    def search_context(self, session_key: str, query: str, max_tokens: int = 800) -> str:
+    def search_context(
+        self,
+        session_key: str,
+        query: str,
+        max_tokens: int = 800,
+        peer: str = "user",
+    ) -> str:
        """
        Semantic search over Honcho session context.

@ -941,6 +1040,7 @@ class HonchoSessionManager:
            session_key: Session to search against.
            query: Search query for semantic matching.
            max_tokens: Token budget for returned content.
+            peer: Peer alias or explicit peer ID to search about.

        Returns:
            Relevant context excerpts as a string, or empty string if none.
@ -950,7 +1050,13 @@ class HonchoSessionManager:
            return ""

        try:
-            ctx = self._fetch_peer_context(session.user_peer_id, search_query=query)
+            observer_peer_id, target = self._resolve_observer_target(session, peer)
+
+            ctx = self._fetch_peer_context(
+                observer_peer_id,
+                search_query=query,
+                target=target,
+            )
            parts = []
            if ctx["representation"]:
                parts.append(ctx["representation"])
@ -962,16 +1068,17 @@ class HonchoSessionManager:
            logger.debug("Honcho search_context failed: %s", e)
            return ""

-    def create_conclusion(self, session_key: str, content: str) -> bool:
-        """Write a conclusion about the user back to Honcho.
+    def create_conclusion(self, session_key: str, content: str, peer: str = "user") -> bool:
+        """Write a conclusion about a target peer back to Honcho.

-        Conclusions are facts the AI peer observes about the user —
-        preferences, corrections, clarifications, project context.
-        They feed into the user's peer card and representation.
+        Conclusions are facts a peer observes about another peer or itself —
+        preferences, corrections, clarifications, and project context.
+        They feed into the target peer's card and representation.

        Args:
            session_key: Session to associate the conclusion with.
-            content: The conclusion text (e.g. "User prefers dark mode").
+            content: The conclusion text.
+            peer: Peer alias or explicit peer ID. "user" is the default alias.

        Returns:
            True on success, False on failure.
@ -985,25 +1092,90 @@ class HonchoSessionManager:
            return False

        try:
-            if self._ai_observe_others:
-                # AI peer creates conclusion about user (cross-observation)
+            target_peer_id = self._resolve_peer_id(session, peer)
+            if target_peer_id is None:
+                logger.warning("Could not resolve conclusion peer '%s' for session '%s'", peer, session_key)
+                return False
+
+            if target_peer_id == session.assistant_peer_id:
                assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
-                conclusions_scope = assistant_peer.conclusions_of(session.user_peer_id)
+                conclusions_scope = assistant_peer.conclusions_of(session.assistant_peer_id)
+            elif self._ai_observe_others:
+                assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
+                conclusions_scope = assistant_peer.conclusions_of(target_peer_id)
            else:
-                # AI can't observe others — user peer creates self-conclusion
-                user_peer = self._get_or_create_peer(session.user_peer_id)
-                conclusions_scope = user_peer.conclusions_of(session.user_peer_id)
+                target_peer = self._get_or_create_peer(target_peer_id)
+                conclusions_scope = target_peer.conclusions_of(target_peer_id)

            conclusions_scope.create([{
                "content": content.strip(),
                "session_id": session.honcho_session_id,
            }])
-            logger.info("Created conclusion for %s: %s", session_key, content[:80])
+            logger.info("Created conclusion about %s for %s: %s", target_peer_id, session_key, content[:80])
            return True
        except Exception as e:
            logger.error("Failed to create conclusion: %s", e)
            return False

+    def delete_conclusion(self, session_key: str, conclusion_id: str, peer: str = "user") -> bool:
+        """Delete a conclusion by ID. Use only for PII removal.
+
+        Args:
+            session_key: Session key for peer resolution.
+            conclusion_id: The conclusion ID to delete.
+            peer: Peer alias or explicit peer ID.
+
+        Returns:
+            True on success, False on failure.
+        """
+        session = self._cache.get(session_key)
+        if not session:
+            return False
+        try:
+            target_peer_id = self._resolve_peer_id(session, peer)
+            if target_peer_id == session.assistant_peer_id:
+                observer = self._get_or_create_peer(session.assistant_peer_id)
+                scope = observer.conclusions_of(session.assistant_peer_id)
+            elif self._ai_observe_others:
+                observer = self._get_or_create_peer(session.assistant_peer_id)
+                scope = observer.conclusions_of(target_peer_id)
+            else:
+                target_peer = self._get_or_create_peer(target_peer_id)
+                scope = target_peer.conclusions_of(target_peer_id)
+            scope.delete(conclusion_id)
+            logger.info("Deleted conclusion %s for %s", conclusion_id, session_key)
+            return True
+        except Exception as e:
+            logger.error("Failed to delete conclusion %s: %s", conclusion_id, e)
+            return False
+
+    def set_peer_card(self, session_key: str, card: list[str], peer: str = "user") -> list[str] | None:
+        """Update a peer's card.
+
+        Args:
+            session_key: Session key for peer resolution.
+            card: New peer card as list of fact strings.
+            peer: Peer alias or explicit peer ID.
+
+        Returns:
+            Updated card on success, None on failure.
+        """
+        session = self._cache.get(session_key)
+        if not session:
+            return None
+        try:
+            peer_id = self._resolve_peer_id(session, peer)
+            if peer_id is None:
+                logger.warning("Could not resolve peer '%s' for set_peer_card in session '%s'", peer, session_key)
+                return None
+            peer_obj = self._get_or_create_peer(peer_id)
+            result = peer_obj.set_card(card)
+            logger.info("Updated peer card for %s (%d facts)", peer_id, len(card))
+            return result
+        except Exception as e:
+            logger.error("Failed to set peer card: %s", e)
+            return None
+
    def seed_ai_identity(self, session_key: str, content: str, source: str = "manual") -> bool:
        """
        Seed the AI peer's Honcho representation from text content.
@ -1061,7 +1233,7 @@ class HonchoSessionManager:
            return {"representation": "", "card": ""}

        try:
-            ctx = self._fetch_peer_context(session.assistant_peer_id)
+            ctx = self._fetch_peer_context(session.assistant_peer_id, target=session.assistant_peer_id)
            return {
                "representation": ctx["representation"] or "",
                "card": "\n".join(ctx["card"]),
--- a/pyproject.toml
+++ b/pyproject.toml
@ -68,6 +68,7 @@ termux = [
  # Tested Android / Termux path: keeps the core CLI feature-rich while
  # avoiding extras that currently depend on non-Android wheels (notably
  # faster-whisper -> ctranslate2 via the voice extra).
+  "python-telegram-bot[webhooks]>=22.6,<23",
  "hermes-agent[cron]",
  "hermes-agent[cli]",
  "hermes-agent[pty]",
--- a/run_agent.py
+++ b/run_agent.py
@ -75,7 +75,7 @@ from tools.browser_tool import cleanup_browser
 from hermes_constants import OPENROUTER_BASE_URL

 # Agent internals extracted to agent/ package for modularity
-from agent.memory_manager import build_memory_context_block
+from agent.memory_manager import build_memory_context_block, sanitize_context
 from agent.retry_utils import jittered_backoff
 from agent.error_classifier import classify_api_error, FailoverReason
 from agent.prompt_builder import (
@ -602,6 +602,7 @@ class AIAgent:
        prefill_messages: List[Dict[str, Any]] = None,
        platform: str = None,
        user_id: str = None,
+        gateway_session_key: str = None,
        skip_context_files: bool = False,
        skip_memory: bool = False,
        session_db=None,
@ -667,6 +668,7 @@ class AIAgent:
        self.ephemeral_system_prompt = ephemeral_system_prompt
        self.platform = platform  # "cli", "telegram", "discord", "whatsapp", etc.
        self._user_id = user_id  # Platform user identifier (gateway sessions)
+        self._gateway_session_key = gateway_session_key  # Stable per-chat key (e.g. agent:main:telegram:dm:123)
        # Pluggable print function — CLI replaces this with _cprint so that
        # raw ANSI status lines are routed through prompt_toolkit's renderer
        # instead of going directly to stdout where patch_stdout's StdoutProxy
@ -689,9 +691,14 @@ class AIAgent:
            self.api_mode = api_mode
        elif self.provider == "openai-codex":
            self.api_mode = "codex_responses"
+        elif self.provider == "xai":
+            self.api_mode = "codex_responses"
        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self._base_url_lower:
            self.api_mode = "codex_responses"
            self.provider = "openai-codex"
+        elif (provider_name is None) and "api.x.ai" in self._base_url_lower:
+            self.api_mode = "codex_responses"
+            self.provider = "xai"
        elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
            self.api_mode = "anthropic_messages"
            self.provider = "anthropic"
@ -1019,16 +1026,12 @@ class AIAgent:
                            f"was found. Set the {_env_hint} environment "
                            f"variable, or switch to a different provider with `hermes model`."
                        )
-                    # Final fallback: try raw OpenRouter key
-                    client_kwargs = {
-                        "api_key": os.getenv("OPENROUTER_API_KEY", ""),
-                        "base_url": OPENROUTER_BASE_URL,
-                        "default_headers": {
-                            "HTTP-Referer": "https://hermes-agent.nousresearch.com",
-                            "X-OpenRouter-Title": "Hermes Agent",
-                            "X-OpenRouter-Categories": "productivity,cli-agent",
-                        },
-                    }
+                    # No provider configured — reject with a clear message.
+                    raise RuntimeError(
+                        "No LLM provider configured. Run `hermes model` to "
+                        "select a provider, or run `hermes setup` for first-time "
+                        "configuration."
+                    )
            
            self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt

@ -1292,6 +1295,9 @@ class AIAgent:
                        # Thread gateway user identity for per-user memory scoping
                        if self._user_id:
                            _init_kwargs["user_id"] = self._user_id
+                        # Thread gateway session key for stable per-chat Honcho session isolation
+                        if self._gateway_session_key:
+                            _init_kwargs["gateway_session_key"] = self._gateway_session_key
                        # Profile identity for per-profile provider scoping
                        try:
                            from hermes_cli.profiles import get_active_profile_name
@ -2102,6 +2108,59 @@ class AIAgent:
        content = re.sub(r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\s*', '', content, flags=re.IGNORECASE)
        return content

+    @staticmethod
+    def _has_natural_response_ending(content: str) -> bool:
+        """Heuristic: does visible assistant text look intentionally finished?"""
+        if not content:
+            return False
+        stripped = content.rstrip()
+        if not stripped:
+            return False
+        if stripped.endswith("```"):
+            return True
+        return stripped[-1] in '.!?:)"\']}。！？：）】」』》'
+
+    def _is_ollama_glm_backend(self) -> bool:
+        """Detect the narrow backend family affected by Ollama/GLM stop misreports."""
+        model_lower = (self.model or "").lower()
+        provider_lower = (self.provider or "").lower()
+        if "glm" not in model_lower and provider_lower != "zai":
+            return False
+        if "ollama" in self._base_url_lower or ":11434" in self._base_url_lower:
+            return True
+        return bool(self.base_url and is_local_endpoint(self.base_url))
+
+    def _should_treat_stop_as_truncated(
+        self,
+        finish_reason: str,
+        assistant_message,
+        messages: Optional[list] = None,
+    ) -> bool:
+        """Detect conservative stop->length misreports for Ollama-hosted GLM models."""
+        if finish_reason != "stop" or self.api_mode != "chat_completions":
+            return False
+        if not self._is_ollama_glm_backend():
+            return False
+        if not any(
+            isinstance(msg, dict) and msg.get("role") == "tool"
+            for msg in (messages or [])
+        ):
+            return False
+        if assistant_message is None or getattr(assistant_message, "tool_calls", None):
+            return False
+
+        content = getattr(assistant_message, "content", None)
+        if not isinstance(content, str):
+            return False
+
+        visible_text = self._strip_think_blocks(content).strip()
+        if not visible_text:
+            return False
+        if len(visible_text) < 20 or not re.search(r"\s", visible_text):
+            return False
+
+        return not self._has_natural_response_ending(visible_text)
+
    def _looks_like_codex_intermediate_ack(
        self,
        user_message: str,
@ -3978,6 +4037,7 @@ class AIAgent:
            "model", "instructions", "input", "tools", "store",
            "reasoning", "include", "max_output_tokens", "temperature",
            "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
+            "extra_headers",
        }
        normalized: Dict[str, Any] = {
            "model": model,
@ -4013,6 +4073,20 @@ class AIAgent:
            if val is not None:
                normalized[passthrough_key] = val

+        extra_headers = api_kwargs.get("extra_headers")
+        if extra_headers is not None:
+            if not isinstance(extra_headers, dict):
+                raise ValueError("Codex Responses request 'extra_headers' must be an object.")
+            normalized_headers: Dict[str, str] = {}
+            for key, value in extra_headers.items():
+                if not isinstance(key, str) or not key.strip():
+                    raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
+                if value is None:
+                    continue
+                normalized_headers[key.strip()] = str(value)
+            if normalized_headers:
+                normalized["extra_headers"] = normalized_headers
+
        if allow_stream:
            stream = api_kwargs.get("stream")
            if stream is not None and stream is not True:
@ -6451,7 +6525,12 @@ class AIAgent:
            if not is_github_responses:
                kwargs["prompt_cache_key"] = self.session_id

-            if reasoning_enabled:
+            is_xai_responses = self.provider == "xai" or "api.x.ai" in (self.base_url or "").lower()
+
+            if reasoning_enabled and is_xai_responses:
+                # xAI reasons automatically — no effort param, just include encrypted content
+                kwargs["include"] = ["reasoning.encrypted_content"]
+            elif reasoning_enabled:
                if is_github_responses:
                    # Copilot's Responses route advertises reasoning-effort support,
                    # but not OpenAI-specific prompt cache or encrypted reasoning
@ -6462,7 +6541,7 @@ class AIAgent:
                else:
                    kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
                    kwargs["include"] = ["reasoning.encrypted_content"]
-            elif not is_github_responses:
+            elif not is_github_responses and not is_xai_responses:
                kwargs["include"] = []

            if self.request_overrides:
@ -6471,6 +6550,9 @@ class AIAgent:
            if self.max_tokens is not None and not is_codex_backend:
                kwargs["max_output_tokens"] = self.max_tokens

+            if is_xai_responses and getattr(self, "session_id", None):
+                kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
+
            return kwargs

        sanitized_messages = api_messages
@ -6635,18 +6717,24 @@ class AIAgent:
            options["num_ctx"] = self._ollama_num_ctx
            extra_body["options"] = options

+        # Ollama / custom provider: pass think=false when reasoning is disabled.
+        # Ollama does not recognise the OpenRouter-style `reasoning` extra_body
+        # field, so we use its native `think` parameter instead.
+        # This prevents thinking-capable models (Qwen3, etc.) from generating
+        # <think> blocks and producing empty-response errors when the user has
+        # set reasoning_effort: none.
+        if self.provider == "custom" and self.reasoning_config and isinstance(self.reasoning_config, dict):
+            _effort = (self.reasoning_config.get("effort") or "").strip().lower()
+            _enabled = self.reasoning_config.get("enabled", True)
+            if _effort == "none" or _enabled is False:
+                extra_body["think"] = False
+
        if self._is_qwen_portal():
            extra_body["vl_high_resolution_images"] = True

        if extra_body:
            api_kwargs["extra_body"] = extra_body

-        # xAI prompt caching: send x-grok-conv-id header to route requests
-        # to the same server, maximizing automatic cache hits.
-        # https://docs.x.ai/developers/advanced-api-usage/prompt-caching
-        if "x.ai" in self._base_url_lower and hasattr(self, "session_id") and self.session_id:
-            api_kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
-
        # Priority Processing / generic request overrides (e.g. service_tier).
        # Applied last so overrides win over any defaults set above.
        if self.request_overrides:
@ -6757,9 +6845,16 @@ class AIAgent:
                except Exception:
                    pass

+        # Sanitize surrogates from API response — some models (e.g. Kimi/GLM via Ollama)
+        # can return invalid surrogate code points that crash json.dumps() on persist.
+        _raw_content = assistant_message.content or ""
+        _san_content = _sanitize_surrogates(_raw_content)
+        if reasoning_text:
+            reasoning_text = _sanitize_surrogates(reasoning_text)
+
        msg = {
            "role": "assistant",
-            "content": assistant_message.content or "",
+            "content": _san_content,
            "reasoning": reasoning_text,
            "finish_reason": finish_reason,
        }
@ -7418,7 +7513,7 @@ class AIAgent:
        # Start spinner for CLI mode (skip when TUI handles tool progress)
        spinner = None
        if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
-            face = random.choice(KawaiiSpinner.KAWAII_WAITING)
+            face = random.choice(KawaiiSpinner.get_waiting_faces())
            spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots', print_fn=self._print_fn)
            spinner.start()

@ -7432,24 +7527,50 @@ class AIAgent:

                # Wait for all to complete with periodic heartbeats so the
                # gateway's inactivity monitor doesn't kill us during long
-                # concurrent tool batches.
+                # concurrent tool batches.  Also check for user interrupts
+                # so we don't block indefinitely when the user sends /stop
+                # or a new message during concurrent tool execution.
                _conc_start = time.time()
+                _interrupt_logged = False
                while True:
                    done, not_done = concurrent.futures.wait(
-                        futures, timeout=30.0,
+                        futures, timeout=5.0,
                    )
                    if not not_done:
                        break
+
+                    # Check for interrupt — the per-thread interrupt signal
+                    # already causes individual tools (terminal, execute_code)
+                    # to abort, but tools without interrupt checks (web_search,
+                    # read_file) will run to completion.  Cancel any futures
+                    # that haven't started yet so we don't block on them.
+                    if self._interrupt_requested:
+                        if not _interrupt_logged:
+                            _interrupt_logged = True
+                            self._vprint(
+                                f"{self.log_prefix}⚡ Interrupt: cancelling "
+                                f"{len(not_done)} pending concurrent tool(s)",
+                                force=True,
+                            )
+                        for f in not_done:
+                            f.cancel()
+                        # Give already-running tools a moment to notice the
+                        # per-thread interrupt signal and exit gracefully.
+                        concurrent.futures.wait(not_done, timeout=3.0)
+                        break
+
                    _conc_elapsed = int(time.time() - _conc_start)
-                    _still_running = [
-                        parsed_calls[futures.index(f)][1]
-                        for f in not_done
-                        if f in futures
-                    ]
-                    self._touch_activity(
-                        f"concurrent tools running ({_conc_elapsed}s, "
-                        f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
-                    )
+                    # Heartbeat every ~30s (6 × 5s poll intervals)
+                    if _conc_elapsed > 0 and _conc_elapsed % 30 < 6:
+                        _still_running = [
+                            parsed_calls[futures.index(f)][1]
+                            for f in not_done
+                            if f in futures
+                        ]
+                        self._touch_activity(
+                            f"concurrent tools running ({_conc_elapsed}s, "
+                            f"{len(not_done)} remaining: {', '.join(_still_running[:3])})"
+                        )
        finally:
            if spinner:
                # Build a summary message for the spinner stop
@ -7461,8 +7582,11 @@ class AIAgent:
        for i, (tc, name, args) in enumerate(parsed_calls):
            r = results[i]
            if r is None:
-                # Shouldn't happen, but safety fallback
-                function_result = f"Error executing tool '{name}': thread did not return a result"
+                # Tool was cancelled (interrupt) or thread didn't return
+                if self._interrupt_requested:
+                    function_result = f"[Tool execution cancelled — {name} was skipped due to user interrupt]"
+                else:
+                    function_result = f"Error executing tool '{name}': thread did not return a result"
                tool_duration = 0.0
            else:
                function_name, function_args, function_result, tool_duration, is_error = r
@ -7714,7 +7838,7 @@ class AIAgent:
                    spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
                spinner = None
                if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
-                    face = random.choice(KawaiiSpinner.KAWAII_WAITING)
+                    face = random.choice(KawaiiSpinner.get_waiting_faces())
                    spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots', print_fn=self._print_fn)
                    spinner.start()
                self._delegate_spinner = spinner
@ -7741,7 +7865,7 @@ class AIAgent:
                # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
                spinner = None
                if self.quiet_mode and not self.tool_progress_callback:
-                    face = random.choice(KawaiiSpinner.KAWAII_WAITING)
+                    face = random.choice(KawaiiSpinner.get_waiting_faces())
                    emoji = _get_tool_emoji(function_name)
                    preview = _build_tool_preview(function_name, function_args) or function_name
                    spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
@ -7765,7 +7889,7 @@ class AIAgent:
                # These are not in the tool registry — route through MemoryManager.
                spinner = None
                if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
-                    face = random.choice(KawaiiSpinner.KAWAII_WAITING)
+                    face = random.choice(KawaiiSpinner.get_waiting_faces())
                    emoji = _get_tool_emoji(function_name)
                    preview = _build_tool_preview(function_name, function_args) or function_name
                    spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
@ -7787,7 +7911,7 @@ class AIAgent:
            elif self.quiet_mode:
                spinner = None
                if self._should_emit_quiet_tool_messages() and self._should_start_quiet_spinner():
-                    face = random.choice(KawaiiSpinner.KAWAII_WAITING)
+                    face = random.choice(KawaiiSpinner.get_waiting_faces())
                    emoji = _get_tool_emoji(function_name)
                    preview = _build_tool_preview(function_name, function_args) or function_name
                    spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
@ -8150,6 +8274,16 @@ class AIAgent:
        if isinstance(persist_user_message, str):
            persist_user_message = _sanitize_surrogates(persist_user_message)

+        # Strip leaked <memory-context> blocks from user input.  When Honcho's
+        # saveMessages persists a turn that included injected context, the block
+        # can reappear in the next turn's user message via message history.
+        # Stripping here prevents stale memory tags from leaking into the
+        # conversation and being visible to the user or the model as user text.
+        if isinstance(user_message, str):
+            user_message = sanitize_context(user_message)
+        if isinstance(persist_user_message, str):
+            persist_user_message = sanitize_context(persist_user_message)
+
        # Store stream callback for _interruptible_api_call to pick up
        self._stream_callback = stream_callback
        self._persist_user_message_idx = None
@ -8429,6 +8563,16 @@ class AIAgent:
            self._interrupt_message = None
            self._interrupt_thread_signal_pending = False

+        # Notify memory providers of the new turn so cadence tracking works.
+        # Must happen BEFORE prefetch_all() so providers know which turn it is
+        # and can gate context/dialectic refresh via contextCadence/dialecticCadence.
+        if self._memory_manager:
+            try:
+                _turn_msg = original_user_message if isinstance(original_user_message, str) else ""
+                self._memory_manager.on_turn_start(self._user_turn_count, _turn_msg)
+            except Exception:
+                pass
+
        # External memory provider: prefetch once before the tool loop.
        # Reuse the cached result on every iteration to avoid re-calling
        # prefetch_all() on each tool call (10 tool calls = 10x latency + cost).
@ -8620,6 +8764,12 @@ class AIAgent:
                    new_tcs.append(tc)
                am["tool_calls"] = new_tcs

+            # Proactively strip any surrogate characters before the API call.
+            # Models served via Ollama (Kimi K2.5, GLM-5, Qwen) can return
+            # lone surrogates (U+D800-U+DFFF) that crash json.dumps() inside
+            # the OpenAI SDK. Sanitizing here prevents the 3-retry cycle.
+            _sanitize_messages_surrogates(api_messages)
+
            # Calculate approximate request size for logging
            total_chars = sum(len(str(msg)) for msg in api_messages)
            approx_tokens = estimate_messages_tokens_rough(api_messages)
@ -8633,8 +8783,8 @@ class AIAgent:
                self._vprint(f"{self.log_prefix}   🔧 Available tools: {len(self.tools) if self.tools else 0}")
            else:
                # Animated thinking spinner in quiet mode
-                face = random.choice(KawaiiSpinner.KAWAII_THINKING)
-                verb = random.choice(KawaiiSpinner.THINKING_VERBS)
+                face = random.choice(KawaiiSpinner.get_thinking_faces())
+                verb = random.choice(KawaiiSpinner.get_thinking_verbs())
                if self.thinking_callback:
                    # CLI TUI mode: use prompt_toolkit widget instead of raw spinner
                    # (works in both streaming and non-streaming modes)
@ -9018,6 +9168,17 @@ class AIAgent:
                        finish_reason = stop_reason_map.get(response.stop_reason, "stop")
                    else:
                        finish_reason = response.choices[0].finish_reason
+                        assistant_message = response.choices[0].message
+                        if self._should_treat_stop_as_truncated(
+                            finish_reason,
+                            assistant_message,
+                            messages,
+                        ):
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Treating suspicious Ollama/GLM stop response as truncated",
+                                force=True,
+                            )
+                            finish_reason = "length"

                    if finish_reason == "length":
                        self._vprint(f"{self.log_prefix}⚠️  Response truncated (finish_reason='length') - model hit max output tokens", force=True)
@ -10792,8 +10953,9 @@ class AIAgent:
                            #   tool(result) → assistant("(empty)") → user(nudge)
                            # Without this, we'd have tool → user which most
                            # APIs reject as an invalid sequence.
-                            assistant_msg["content"] = "(empty)"
-                            messages.append(assistant_msg)
+                            _nudge_msg = self._build_assistant_message(assistant_message, finish_reason)
+                            _nudge_msg["content"] = "(empty)"
+                            messages.append(_nudge_msg)
                            messages.append({
                                "role": "user",
                                "content": (
--- a/scripts/release.py
+++ b/scripts/release.py
@ -64,7 +64,9 @@ AUTHOR_MAP = {
    "259807879+Bartok9@users.noreply.github.com": "Bartok9",
    "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
    "268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1",
+    "27917469+nosleepcassette@users.noreply.github.com": "nosleepcassette",
    "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
+    "109555139+davetist@users.noreply.github.com": "davetist",
    # contributors (manual mapping from git names)
    "dmayhem93@gmail.com": "dmahan93",
    "samherring99@gmail.com": "samherring99",
@ -83,6 +85,7 @@ AUTHOR_MAP = {
    "4317663+helix4u@users.noreply.github.com": "helix4u",
    "331214+counterposition@users.noreply.github.com": "counterposition",
    "blspear@gmail.com": "BrennerSpear",
+    "239876380+handsdiff@users.noreply.github.com": "handsdiff",
    "gpickett00@gmail.com": "gpickett00",
    "mcosma@gmail.com": "wakamex",
    "clawdia.nash@proton.me": "clawdia-nash",
@ -124,6 +127,7 @@ AUTHOR_MAP = {
    "balyan.sid@gmail.com": "balyansid",
    "oluwadareab12@gmail.com": "bennytimz",
    "simon@simonmarcus.org": "simon-marcus",
+    "xowiekk@gmail.com": "Xowiek",
    "1243352777@qq.com": "zons-zhaozhy",
    # ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply
    #    crossref, and GH contributor list matching (April 2026 audit) ──
@ -175,6 +179,7 @@ AUTHOR_MAP = {
    "limars874@gmail.com": "limars874",
    "lisicheng168@gmail.com": "lesterli",
    "mingjwan@microsoft.com": "MagicRay1217",
+    "orangeko@gmail.com": "GenKoKo",
    "niyant@spicefi.xyz": "spniyant",
    "olafthiele@gmail.com": "olafthiele",
    "oncuevtv@gmail.com": "sprmn24",
--- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md
+++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md
@ -351,8 +351,8 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con
 |----------|------|-------------|
 | OpenRouter | API key | `OPENROUTER_API_KEY` |
 | Anthropic | API key | `ANTHROPIC_API_KEY` |
-| Nous Portal | OAuth | `hermes login --provider nous` |
-| OpenAI Codex | OAuth | `hermes login --provider openai-codex` |
+| Nous Portal | OAuth | `hermes auth` |
+| OpenAI Codex | OAuth | `hermes auth` |
 | GitHub Copilot | Token | `COPILOT_GITHUB_TOKEN` |
 | Google Gemini | API key | `GOOGLE_API_KEY` or `GEMINI_API_KEY` |
 | DeepSeek | API key | `DEEPSEEK_API_KEY` |
--- a/skills/productivity/google-workspace/scripts/google_api.py
+++ b/skills/productivity/google-workspace/scripts/google_api.py
@ -47,6 +47,13 @@ SCOPES = [
 ]


+def _normalize_authorized_user_payload(payload: dict) -> dict:
+    normalized = dict(payload)
+    if not normalized.get("type"):
+        normalized["type"] = "authorized_user"
+    return normalized
+
+
 def _ensure_authenticated():
    if not TOKEN_PATH.exists():
        print("Not authenticated. Run the setup script first:", file=sys.stderr)
@ -170,7 +177,12 @@ def get_credentials():
    creds = Credentials.from_authorized_user_file(str(TOKEN_PATH), _stored_token_scopes())
    if creds.expired and creds.refresh_token:
        creds.refresh(Request())
-        TOKEN_PATH.write_text(creds.to_json())
+        TOKEN_PATH.write_text(
+            json.dumps(
+                _normalize_authorized_user_payload(json.loads(creds.to_json())),
+                indent=2,
+            )
+        )
    if not creds.valid:
        print("Token is invalid. Re-run setup.", file=sys.stderr)
        sys.exit(1)
--- a/skills/productivity/google-workspace/scripts/gws_bridge.py
+++ b/skills/productivity/google-workspace/scripts/gws_bridge.py
@ -19,6 +19,13 @@ def get_token_path() -> Path:
    return get_hermes_home() / "google_token.json"


+def _normalize_authorized_user_payload(payload: dict) -> dict:
+    normalized = dict(payload)
+    if not normalized.get("type"):
+        normalized["type"] = "authorized_user"
+    return normalized
+
+
 def refresh_token(token_data: dict) -> dict:
    """Refresh the access token using the refresh token."""
    import urllib.error
@ -55,7 +62,9 @@ def refresh_token(token_data: dict) -> dict:
        tz=timezone.utc,
    ).isoformat()

-    get_token_path().write_text(json.dumps(token_data, indent=2))
+    get_token_path().write_text(
+        json.dumps(_normalize_authorized_user_payload(token_data), indent=2)
+    )
    return token_data


--- a/skills/productivity/google-workspace/scripts/setup.py
+++ b/skills/productivity/google-workspace/scripts/setup.py
@ -60,6 +60,13 @@ REQUIRED_PACKAGES = ["google-api-python-client", "google-auth-oauthlib", "google
 REDIRECT_URI = "http://localhost:1"


+def _normalize_authorized_user_payload(payload: dict) -> dict:
+    normalized = dict(payload)
+    if not normalized.get("type"):
+        normalized["type"] = "authorized_user"
+    return normalized
+
+
 def _load_token_payload(path: Path = TOKEN_PATH) -> dict:
    try:
        return json.loads(path.read_text())
@ -151,7 +158,12 @@ def check_auth():
    if creds.expired and creds.refresh_token:
        try:
            creds.refresh(Request())
-            TOKEN_PATH.write_text(creds.to_json())
+            TOKEN_PATH.write_text(
+                json.dumps(
+                    _normalize_authorized_user_payload(json.loads(creds.to_json())),
+                    indent=2,
+                )
+            )
            missing_scopes = _missing_scopes_from_payload(_load_token_payload(TOKEN_PATH))
            if missing_scopes:
                print(f"AUTHENTICATED (partial): Token refreshed but missing {len(missing_scopes)} scopes:")
@ -313,7 +325,7 @@ def exchange_auth_code(code: str):
        sys.exit(1)

    creds = flow.credentials
-    token_payload = json.loads(creds.to_json())
+    token_payload = _normalize_authorized_user_payload(json.loads(creds.to_json()))

    # Store only the scopes actually granted by the user, not what was requested.
    # creds.to_json() writes the requested scopes, which causes refresh to fail
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -89,7 +89,8 @@ class TestReadCodexAccessToken:
        hermes_home.mkdir(parents=True, exist_ok=True)
        (hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-        result = _read_codex_access_token()
+        with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
+            result = _read_codex_access_token()
        assert result is None

    def test_empty_token_returns_none(self, tmp_path, monkeypatch):
@ -146,7 +147,8 @@ class TestReadCodexAccessToken:
            },
        }))
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-        result = _read_codex_access_token()
+        with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
+            result = _read_codex_access_token()
        assert result is None, "Expired JWT should return None"

    def test_valid_jwt_returns_token(self, tmp_path, monkeypatch):
@ -585,7 +587,10 @@ class TestGetTextAuxiliaryClient:
        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"

    def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
-        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+        with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
+             patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \
+             patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \
+             patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client()
        assert model == "gpt-5.2-codex"
@ -623,17 +628,21 @@ class TestGetTextAuxiliaryClient:
        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
-        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
-             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
+        with patch("agent.auxiliary_client._resolve_auto", return_value=(None, None)):
            client, model = get_text_auxiliary_client()
        assert client is None
        assert model is None

-    def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self):
+    def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
        with patch("agent.auxiliary_client._resolve_custom_runtime",
                   return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \
             patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \
+             patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
+             patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \
+             patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client()

--- a/tests/agent/test_auxiliary_named_custom_providers.py
+++ b/tests/agent/test_auxiliary_named_custom_providers.py
@ -232,7 +232,7 @@ class TestResolveVisionProviderClientModelNormalization:

        assert provider == "zai"
        assert client is not None
-        assert model == "glm-5.1"
+        assert model == "glm-5v-turbo"  # zai has dedicated vision model in _PROVIDER_VISION_MODELS


 class TestVisionPathApiMode:
--- a/tests/agent/test_credential_pool.py
+++ b/tests/agent/test_credential_pool.py
@ -252,6 +252,11 @@ def test_exhausted_402_entry_resets_after_one_hour(tmp_path, monkeypatch):

 def test_explicit_reset_timestamp_overrides_default_429_ttl(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    # Prevent auto-seeding from Codex CLI tokens on the host
+    monkeypatch.setattr(
+        "hermes_cli.auth._import_codex_cli_tokens",
+        lambda: None,
+    )
    _write_auth_store(
        tmp_path,
        {
--- a/tests/agent/test_memory_provider.py
+++ b/tests/agent/test_memory_provider.py
@ -939,3 +939,74 @@ class TestOnMemoryWriteBridge:
        mgr.on_memory_write("add", "user", "test")
        # Good provider still received the call despite bad provider crashing
        assert good.memory_writes == [("add", "user", "test")]
+
+
+class TestHonchoCadenceTracking:
+    """Verify Honcho provider cadence gating depends on on_turn_start().
+
+    Bug: _turn_count was never updated because on_turn_start() was not called
+    from run_conversation(). This meant cadence checks always passed (every
+    turn fired both context refresh and dialectic). Fixed by calling
+    on_turn_start(self._user_turn_count, msg) before prefetch_all().
+    """
+
+    def test_turn_count_updates_on_turn_start(self):
+        """on_turn_start sets _turn_count, enabling cadence math."""
+        from plugins.memory.honcho import HonchoMemoryProvider
+        p = HonchoMemoryProvider()
+        assert p._turn_count == 0
+        p.on_turn_start(1, "hello")
+        assert p._turn_count == 1
+        p.on_turn_start(5, "world")
+        assert p._turn_count == 5
+
+    def test_queue_prefetch_respects_dialectic_cadence(self):
+        """With dialecticCadence=3, dialectic should skip turns 2 and 3."""
+        from plugins.memory.honcho import HonchoMemoryProvider
+        p = HonchoMemoryProvider()
+        p._dialectic_cadence = 3
+        p._recall_mode = "context"
+        p._session_key = "test-session"
+        # Simulate a manager that records prefetch calls
+        class FakeManager:
+            def prefetch_context(self, key, query=None):
+                pass
+            def prefetch_dialectic(self, key, query):
+                pass
+
+        p._manager = FakeManager()
+
+        # Simulate turn 1: last_dialectic_turn = -999, so (1 - (-999)) >= 3 -> fires
+        p.on_turn_start(1, "turn 1")
+        p._last_dialectic_turn = 1  # simulate it fired
+        p._last_context_turn = 1
+
+        # Simulate turn 2: (2 - 1) = 1 < 3 -> should NOT fire dialectic
+        p.on_turn_start(2, "turn 2")
+        assert (p._turn_count - p._last_dialectic_turn) < p._dialectic_cadence
+
+        # Simulate turn 3: (3 - 1) = 2 < 3 -> should NOT fire dialectic
+        p.on_turn_start(3, "turn 3")
+        assert (p._turn_count - p._last_dialectic_turn) < p._dialectic_cadence
+
+        # Simulate turn 4: (4 - 1) = 3 >= 3 -> should fire dialectic
+        p.on_turn_start(4, "turn 4")
+        assert (p._turn_count - p._last_dialectic_turn) >= p._dialectic_cadence
+
+    def test_injection_frequency_first_turn_with_1indexed(self):
+        """injection_frequency='first-turn' must inject on turn 1 (1-indexed)."""
+        from plugins.memory.honcho import HonchoMemoryProvider
+        p = HonchoMemoryProvider()
+        p._injection_frequency = "first-turn"
+
+        # Turn 1 should inject (not skip)
+        p.on_turn_start(1, "first message")
+        assert p._turn_count == 1
+        # The guard is `_turn_count > 1`, so turn 1 passes through
+        should_skip = p._injection_frequency == "first-turn" and p._turn_count > 1
+        assert not should_skip, "First turn (turn 1) should NOT be skipped"
+
+        # Turn 2 should skip
+        p.on_turn_start(2, "second message")
+        should_skip = p._injection_frequency == "first-turn" and p._turn_count > 1
+        assert should_skip, "Second turn (turn 2) SHOULD be skipped"
--- a/tests/cli/test_cli_new_session.py
+++ b/tests/cli/test_cli_new_session.py
@ -34,6 +34,7 @@ class _FakeAgent:
            [{"id": "t1", "content": "unfinished task", "status": "in_progress"}]
        )
        self.flush_memories = MagicMock()
+        self.commit_memory_session = MagicMock()
        self._invalidate_system_prompt = MagicMock()

        # Token counters (non-zero to verify reset)
--- a/tests/cli/test_cli_status_command.py
+++ b/tests/cli/test_cli_status_command.py
@ -1,5 +1,6 @@
 """Tests for CLI /status command behavior."""
 from datetime import datetime
+from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import MagicMock, patch

@ -83,3 +84,18 @@ def test_show_session_status_prints_gateway_style_summary():
    _, kwargs = cli_obj.console.print.call_args
    assert kwargs.get("highlight") is False
    assert kwargs.get("markup") is False
+
+
+def test_profile_command_reports_custom_root_profile(monkeypatch, tmp_path, capsys):
+    """Profile detection works for custom-root deployments (not under ~/.hermes)."""
+    cli_obj = _make_cli()
+    profile_home = tmp_path / "profiles" / "coder"
+
+    monkeypatch.setenv("HERMES_HOME", str(profile_home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path / "unrelated-home")
+
+    cli_obj._handle_profile_command()
+
+    out = capsys.readouterr().out
+    assert "Profile: coder" in out
+    assert f"Home:    {profile_home}" in out
--- a/tests/cli/test_personality_none.py
+++ b/tests/cli/test_personality_none.py
@ -144,6 +144,18 @@ class TestGatewayPersonalityNone:

        assert "none" in result.lower()

+    @pytest.mark.asyncio
+    async def test_empty_personality_list_uses_profile_display_path(self, tmp_path):
+        runner = self._make_runner(personalities={})
+        (tmp_path / "config.yaml").write_text(yaml.dump({"agent": {"personalities": {}}}))
+
+        with patch("gateway.run._hermes_home", tmp_path), \
+             patch("hermes_constants.display_hermes_home", return_value="~/.hermes/profiles/coder"):
+            event = self._make_event("")
+            result = await runner._handle_personality_command(event)
+
+        assert result == "No personalities configured in `~/.hermes/profiles/coder/config.yaml`"
+

 class TestPersonalityDictFormat:
    """Test dict-format custom personalities with description, tone, style."""
--- a/tests/gateway/conftest.py
+++ b/tests/gateway/conftest.py
@ -0,0 +1,66 @@
+"""Shared fixtures for gateway tests.
+
+The ``_ensure_telegram_mock`` helper guarantees that a minimal mock of
+the ``telegram`` package is registered in :data:`sys.modules` **before**
+any test file triggers ``from gateway.platforms.telegram import ...``.
+
+Without this, ``pytest-xdist`` workers that happen to collect
+``test_telegram_caption_merge.py`` (bare top-level import, no per-file
+mock) first will cache ``ChatType = None`` from the production
+ImportError fallback, causing 30+ downstream test failures wherever
+``ChatType.GROUP`` / ``ChatType.SUPERGROUP`` is accessed.
+
+Individual test files may still call their own ``_ensure_telegram_mock``
+— it short-circuits when the mock is already present.
+"""
+
+import sys
+from unittest.mock import MagicMock
+
+
+def _ensure_telegram_mock() -> None:
+    """Install a comprehensive telegram mock in sys.modules.
+
+    Idempotent — skips when the real library is already imported.
+    Uses ``sys.modules[name] = mod`` (overwrite) instead of
+    ``setdefault`` so it wins even if a partial/broken import
+    already cached a module with ``ChatType = None``.
+    """
+    if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
+        return  # Real library is installed — nothing to mock
+
+    mod = MagicMock()
+    mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
+    mod.constants.ParseMode.MARKDOWN = "Markdown"
+    mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
+    mod.constants.ParseMode.HTML = "HTML"
+    mod.constants.ChatType.PRIVATE = "private"
+    mod.constants.ChatType.GROUP = "group"
+    mod.constants.ChatType.SUPERGROUP = "supergroup"
+    mod.constants.ChatType.CHANNEL = "channel"
+
+    # Real exception classes so ``except (NetworkError, ...)`` clauses
+    # in production code don't blow up with TypeError.
+    mod.error.NetworkError = type("NetworkError", (OSError,), {})
+    mod.error.TimedOut = type("TimedOut", (OSError,), {})
+    mod.error.BadRequest = type("BadRequest", (Exception,), {})
+    mod.error.Forbidden = type("Forbidden", (Exception,), {})
+    mod.error.InvalidToken = type("InvalidToken", (Exception,), {})
+    mod.error.RetryAfter = type("RetryAfter", (Exception,), {"retry_after": 1})
+    mod.error.Conflict = type("Conflict", (Exception,), {})
+
+    # Update.ALL_TYPES used in start_polling()
+    mod.Update.ALL_TYPES = []
+
+    for name in (
+        "telegram",
+        "telegram.ext",
+        "telegram.constants",
+        "telegram.request",
+    ):
+        sys.modules[name] = mod
+    sys.modules["telegram.error"] = mod.error
+
+
+# Run at collection time — before any test file's module-level imports.
+_ensure_telegram_mock()
--- a/tests/gateway/test_config.py
+++ b/tests/gateway/test_config.py
@ -284,6 +284,58 @@ class TestLoadGatewayConfig:
        assert config.unauthorized_dm_behavior == "ignore"
        assert config.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"

+    def test_bridges_telegram_disable_link_previews_from_config_yaml(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "telegram:\n"
+            "  disable_link_previews: true\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        assert config.platforms[Platform.TELEGRAM].extra["disable_link_previews"] is True
+
+    def test_bridges_telegram_proxy_url_from_config_yaml(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "telegram:\n"
+            "  proxy_url: socks5://127.0.0.1:1080\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.delenv("TELEGRAM_PROXY", raising=False)
+
+        load_gateway_config()
+
+        import os
+        assert os.environ.get("TELEGRAM_PROXY") == "socks5://127.0.0.1:1080"
+
+    def test_telegram_proxy_env_takes_precedence_over_config(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text(
+            "telegram:\n"
+            "  proxy_url: http://from-config:8080\n",
+            encoding="utf-8",
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        monkeypatch.setenv("TELEGRAM_PROXY", "socks5://from-env:1080")
+
+        load_gateway_config()
+
+        import os
+        assert os.environ.get("TELEGRAM_PROXY") == "socks5://from-env:1080"
+

 class TestHomeChannelEnvOverrides:
    """Home channel env vars should apply even when the platform was already
--- a/tests/gateway/test_duplicate_reply_suppression.py
+++ b/tests/gateway/test_duplicate_reply_suppression.py
@ -1,12 +1,15 @@
 """Tests for duplicate reply suppression across the gateway stack.

-Covers three fix paths:
+Covers four fix paths:
  1. base.py: stale response suppressed when interrupt_event is set and a
     pending message exists (#8221 / #2483)
-  2. run.py return path: already_sent propagated from stream consumer's
-     already_sent flag without requiring response_previewed (#8375)
-  3. run.py queued-message path: first response correctly detected as
-     already-streamed when already_sent is True without response_previewed
+  2. run.py return path: only confirmed final streamed delivery suppresses
+     the fallback final send; partial streamed output must not
+  3. run.py queued-message path: first response is skipped only when the
+     final response was actually streamed, not merely when partial output existed
+  4. stream_consumer.py cancellation handler: only confirms final delivery
+     when the best-effort send actually succeeds, not merely because partial
+     content was sent earlier
 """

 import asyncio
@ -153,15 +156,16 @@ class TestBaseInterruptSuppression:
        assert any(s["content"] == "Valid response" for s in adapter.sent)


-# ===================================================================
-# Test 2: run.py — already_sent without response_previewed (#8375)
+# Test 2: run.py — partial streamed output must not suppress final send
 # ===================================================================

-class TestAlreadySentWithoutResponsePreviewed:
-    """The already_sent flag on the response dict should be set when the
-    stream consumer's already_sent is True, even if response_previewed is
-    False.  This prevents duplicate sends when streaming was interrupted
-    by flood control."""
+class TestOnlyFinalStreamDeliverySuppressesFinalSend:
+    """The gateway should suppress the fallback final send only when the
+    stream consumer confirmed the final assistant reply was delivered.
+
+    Partial streamed output is not enough. If only already_sent=True,
+    the fallback final send must still happen so Telegram users don't lose
+    the real answer."""

    def _make_mock_stream_consumer(self, already_sent=False, final_response_sent=False):
        sc = SimpleNamespace(
@ -170,21 +174,20 @@ class TestAlreadySentWithoutResponsePreviewed:
        )
        return sc

-    def test_already_sent_set_without_response_previewed(self):
-        """Stream consumer already_sent=True should propagate to response
-        dict even when response_previewed is False."""
+    def test_partial_stream_output_does_not_set_already_sent(self):
+        """already_sent=True alone must NOT suppress final delivery."""
        sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
        response = {"final_response": "text", "response_previewed": False}

-        # Reproduce the logic from run.py return path (post-fix)
        if sc and isinstance(response, dict) and not response.get("failed"):
-            if (
-                getattr(sc, "final_response_sent", False)
-                or getattr(sc, "already_sent", False)
-            ):
+            _final = response.get("final_response") or ""
+            _is_empty_sentinel = not _final or _final == "(empty)"
+            _streamed = bool(sc and getattr(sc, "final_response_sent", False))
+            _previewed = bool(response.get("response_previewed"))
+            if not _is_empty_sentinel and (_streamed or _previewed):
                response["already_sent"] = True

-        assert response.get("already_sent") is True
+        assert "already_sent" not in response

    def test_already_sent_not_set_when_nothing_sent(self):
        """When stream consumer hasn't sent anything, already_sent should
@ -193,24 +196,26 @@ class TestAlreadySentWithoutResponsePreviewed:
        response = {"final_response": "text", "response_previewed": False}

        if sc and isinstance(response, dict) and not response.get("failed"):
-            if (
-                getattr(sc, "final_response_sent", False)
-                or getattr(sc, "already_sent", False)
-            ):
+            _final = response.get("final_response") or ""
+            _is_empty_sentinel = not _final or _final == "(empty)"
+            _streamed = bool(sc and getattr(sc, "final_response_sent", False))
+            _previewed = bool(response.get("response_previewed"))
+            if not _is_empty_sentinel and (_streamed or _previewed):
                response["already_sent"] = True

        assert "already_sent" not in response

    def test_already_sent_set_on_final_response_sent(self):
-        """final_response_sent=True should still work as before."""
+        """final_response_sent=True should suppress duplicate final sends."""
        sc = self._make_mock_stream_consumer(already_sent=False, final_response_sent=True)
        response = {"final_response": "text"}

        if sc and isinstance(response, dict) and not response.get("failed"):
-            if (
-                getattr(sc, "final_response_sent", False)
-                or getattr(sc, "already_sent", False)
-            ):
+            _final = response.get("final_response") or ""
+            _is_empty_sentinel = not _final or _final == "(empty)"
+            _streamed = bool(sc and getattr(sc, "final_response_sent", False))
+            _previewed = bool(response.get("response_previewed"))
+            if not _is_empty_sentinel and (_streamed or _previewed):
                response["already_sent"] = True

        assert response.get("already_sent") is True
@ -222,10 +227,11 @@ class TestAlreadySentWithoutResponsePreviewed:
        response = {"final_response": "Error: something broke", "failed": True}

        if sc and isinstance(response, dict) and not response.get("failed"):
-            if (
-                getattr(sc, "final_response_sent", False)
-                or getattr(sc, "already_sent", False)
-            ):
+            _final = response.get("final_response") or ""
+            _is_empty_sentinel = not _final or _final == "(empty)"
+            _streamed = bool(sc and getattr(sc, "final_response_sent", False))
+            _previewed = bool(response.get("response_previewed"))
+            if not _is_empty_sentinel and (_streamed or _previewed):
                response["already_sent"] = True

        assert "already_sent" not in response
@ -255,10 +261,9 @@ class TestEmptyResponseNotSuppressed:
        if sc and isinstance(response, dict) and not response.get("failed"):
            _final = response.get("final_response") or ""
            _is_empty_sentinel = not _final or _final == "(empty)"
-            if not _is_empty_sentinel and (
-                getattr(sc, "final_response_sent", False)
-                or getattr(sc, "already_sent", False)
-            ):
+            _streamed = bool(sc and getattr(sc, "final_response_sent", False))
+            _previewed = bool(response.get("response_previewed"))
+            if not _is_empty_sentinel and (_streamed or _previewed):
                response["already_sent"] = True

    def test_empty_sentinel_not_suppressed_with_already_sent(self):
@ -283,10 +288,10 @@ class TestEmptyResponseNotSuppressed:
        self._apply_suppression_logic(response, sc)
        assert "already_sent" not in response

-    def test_real_response_still_suppressed_with_already_sent(self):
-        """Normal non-empty response should still be suppressed when
-        streaming delivered content."""
-        sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=False)
+    def test_real_response_still_suppressed_only_when_final_delivery_confirmed(self):
+        """Normal non-empty response should be suppressed only when the final
+        response was actually streamed."""
+        sc = self._make_mock_stream_consumer(already_sent=True, final_response_sent=True)
        response = {"final_response": "Here are the search results..."}
        self._apply_suppression_logic(response, sc)
        assert response.get("already_sent") is True
@ -299,8 +304,8 @@ class TestEmptyResponseNotSuppressed:
        assert "already_sent" not in response

 class TestQueuedMessageAlreadyStreamed:
-    """The queued-message path should detect that the first response was
-    already streamed (already_sent=True) even without response_previewed."""
+    """The queued-message path should skip the first response only when the
+    final response was actually streamed."""

    def _make_mock_sc(self, already_sent=False, final_response_sent=False):
        return SimpleNamespace(
@ -308,18 +313,38 @@ class TestQueuedMessageAlreadyStreamed:
            final_response_sent=final_response_sent,
        )

-    def test_queued_path_detects_already_streamed(self):
-        """already_sent=True on stream consumer means first response was
-        streamed — skip re-sending before processing queued message."""
-        _sc = self._make_mock_sc(already_sent=True)
+    def test_queued_path_only_skips_send_when_final_response_was_streamed(self):
+        """Partial streamed output alone must not suppress the first response
+        before the queued follow-up is processed."""
+        _sc = self._make_mock_sc(already_sent=True, final_response_sent=False)

-        # Reproduce the queued-message logic from run.py (post-fix)
        _already_streamed = bool(
-            _sc
-            and (
-                getattr(_sc, "final_response_sent", False)
-                or getattr(_sc, "already_sent", False)
-            )
+            _sc and getattr(_sc, "final_response_sent", False)
+        )
+
+        assert _already_streamed is False
+
+    def test_queued_path_detects_confirmed_final_stream_delivery(self):
+        """Confirmed final streamed delivery should skip the resend."""
+        _sc = self._make_mock_sc(already_sent=True, final_response_sent=True)
+        response = {"response_previewed": False}
+
+        _already_streamed = bool(
+            (_sc and getattr(_sc, "final_response_sent", False))
+            or bool(response.get("response_previewed"))
+        )
+
+        assert _already_streamed is True
+
+    def test_queued_path_detects_previewed_response_delivery(self):
+        """A response already previewed via the adapter should not be resent
+        before processing the queued follow-up."""
+        _sc = self._make_mock_sc(already_sent=False, final_response_sent=False)
+        response = {"response_previewed": True}
+
+        _already_streamed = bool(
+            (_sc and getattr(_sc, "final_response_sent", False))
+            or bool(response.get("response_previewed"))
        )

        assert _already_streamed is True
@ -327,14 +352,10 @@ class TestQueuedMessageAlreadyStreamed:
    def test_queued_path_sends_when_not_streamed(self):
        """Nothing was streamed — first response should be sent before
        processing the queued message."""
-        _sc = self._make_mock_sc(already_sent=False)
+        _sc = self._make_mock_sc(already_sent=False, final_response_sent=False)

        _already_streamed = bool(
-            _sc
-            and (
-                getattr(_sc, "final_response_sent", False)
-                or getattr(_sc, "already_sent", False)
-            )
+            _sc and getattr(_sc, "final_response_sent", False)
        )

        assert _already_streamed is False
@ -344,11 +365,96 @@ class TestQueuedMessageAlreadyStreamed:
        _sc = None

        _already_streamed = bool(
-            _sc
-            and (
-                getattr(_sc, "final_response_sent", False)
-                or getattr(_sc, "already_sent", False)
-            )
+            _sc and getattr(_sc, "final_response_sent", False)
        )

        assert _already_streamed is False
+
+
+# ===================================================================
+# Test 4: stream_consumer.py — cancellation handler delivery confirmation
+# ===================================================================
+
+class TestCancellationHandlerDeliveryConfirmation:
+    """The stream consumer's cancellation handler should only set
+    final_response_sent when the best-effort send actually succeeds.
+    Partial content (already_sent=True) alone must not promote to
+    final_response_sent — that would suppress the gateway's fallback
+    send even when the user never received the real answer."""
+
+    def test_partial_only_no_accumulated_stays_false(self):
+        """Cancelled after sending intermediate text, nothing accumulated.
+        final_response_sent must stay False so the gateway fallback fires."""
+        already_sent = True
+        final_response_sent = False
+        accumulated = ""
+        message_id = None
+
+        _best_effort_ok = False
+        if accumulated and message_id:
+            _best_effort_ok = True  # wouldn't enter
+        if _best_effort_ok and not final_response_sent:
+            final_response_sent = True
+
+        assert final_response_sent is False
+
+    def test_best_effort_succeeds_sets_true(self):
+        """When accumulated content exists and best-effort send succeeds,
+        final_response_sent should become True."""
+        already_sent = True
+        final_response_sent = False
+        accumulated = "Here are the search results..."
+        message_id = "msg_123"
+
+        _best_effort_ok = False
+        if accumulated and message_id:
+            _best_effort_ok = True  # simulating successful _send_or_edit
+        if _best_effort_ok and not final_response_sent:
+            final_response_sent = True
+
+        assert final_response_sent is True
+
+    def test_best_effort_fails_stays_false(self):
+        """When best-effort send fails (flood control, network), the
+        gateway fallback must deliver the response."""
+        already_sent = True
+        final_response_sent = False
+        accumulated = "Here are the search results..."
+        message_id = "msg_123"
+
+        _best_effort_ok = False
+        if accumulated and message_id:
+            _best_effort_ok = False  # simulating failed _send_or_edit
+        if _best_effort_ok and not final_response_sent:
+            final_response_sent = True
+
+        assert final_response_sent is False
+
+    def test_preserves_existing_true(self):
+        """If final_response_sent was already True before cancellation,
+        it must remain True regardless."""
+        already_sent = True
+        final_response_sent = True
+        accumulated = ""
+        message_id = None
+
+        _best_effort_ok = False
+        if accumulated and message_id:
+            pass
+        if _best_effort_ok and not final_response_sent:
+            final_response_sent = True
+
+        assert final_response_sent is True
+
+    def test_old_behavior_would_have_promoted_partial(self):
+        """Verify the old code would have incorrectly promoted
+        already_sent to final_response_sent even with no accumulated
+        content — proving the bug existed."""
+        already_sent = True
+        final_response_sent = False
+
+        # OLD cancellation handler logic:
+        if already_sent:
+            final_response_sent = True
+
+        assert final_response_sent is True  # the bug: partial promoted to final
--- a/tests/gateway/test_insights_unicode_flags.py
+++ b/tests/gateway/test_insights_unicode_flags.py
@ -0,0 +1,54 @@
+"""Tests for Unicode dash normalization in /insights command flag parsing.
+
+Telegram on iOS auto-converts -- to em/en dashes. The /insights handler
+normalizes these before parsing --days and --source flags.
+"""
+import re
+import pytest
+
+
+# The regex from gateway/run.py insights handler
+_UNICODE_DASH_RE = re.compile(r'[\u2012\u2013\u2014\u2015](days|source)')
+
+
+def _normalize_insights_args(raw: str) -> str:
+    """Apply the same normalization as the /insights handler."""
+    return _UNICODE_DASH_RE.sub(r'--\1', raw)
+
+
+class TestInsightsUnicodeDashFlags:
+    """--days and --source must survive iOS Unicode dash conversion."""
+
+    @pytest.mark.parametrize("input_str,expected", [
+        # Standard double hyphen (baseline)
+        ("--days 7", "--days 7"),
+        ("--source telegram", "--source telegram"),
+        # Em dash (U+2014)
+        ("\u2014days 7", "--days 7"),
+        ("\u2014source telegram", "--source telegram"),
+        # En dash (U+2013)
+        ("\u2013days 7", "--days 7"),
+        ("\u2013source telegram", "--source telegram"),
+        # Figure dash (U+2012)
+        ("\u2012days 7", "--days 7"),
+        # Horizontal bar (U+2015)
+        ("\u2015days 7", "--days 7"),
+        # Combined flags with em dashes
+        ("\u2014days 30 \u2014source cli", "--days 30 --source cli"),
+    ])
+    def test_unicode_dash_normalized(self, input_str, expected):
+        result = _normalize_insights_args(input_str)
+        assert result == expected
+
+    def test_regular_hyphens_unaffected(self):
+        """Normal --days/--source must pass through unchanged."""
+        assert _normalize_insights_args("--days 7 --source discord") == "--days 7 --source discord"
+
+    def test_bare_number_still_works(self):
+        """Shorthand /insights 7 (no flag) must not be mangled."""
+        assert _normalize_insights_args("7") == "7"
+
+    def test_no_flags_unchanged(self):
+        """Input with no flags passes through as-is."""
+        assert _normalize_insights_args("") == ""
+        assert _normalize_insights_args("30") == "30"
--- a/tests/gateway/test_run_progress_topics.py
+++ b/tests/gateway/test_run_progress_topics.py
@ -1,5 +1,6 @@
 """Tests for topic-aware gateway progress updates."""

+import asyncio
 import importlib
 import sys
 import time
@ -415,6 +416,21 @@ class QueuedCommentaryAgent:
        }


+class BackgroundReviewAgent:
+    def __init__(self, **kwargs):
+        self.background_review_callback = kwargs.get("background_review_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        if self.background_review_callback:
+            self.background_review_callback("💾 Skill 'prospect-scanner' created.")
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
 class VerboseAgent:
    """Agent that emits a tool call with args whose JSON exceeds 200 chars."""
    LONG_CODE = "x" * 300
@ -668,6 +684,66 @@ async def test_run_agent_queued_message_does_not_treat_commentary_as_final(monke
    assert "final response 1" in sent_texts


+@pytest.mark.asyncio
+async def test_run_agent_defers_background_review_notification_until_release(monkeypatch, tmp_path):
+    adapter, result = await _run_with_agent(
+        monkeypatch,
+        tmp_path,
+        BackgroundReviewAgent,
+        session_id="sess-bg-review-order",
+        config_data={"display": {"interim_assistant_messages": True}},
+    )
+
+    assert result["final_response"] == "done"
+    assert adapter.sent == []
+
+
+@pytest.mark.asyncio
+async def test_base_processing_releases_post_delivery_callback_after_main_send():
+    """Post-delivery callbacks on the adapter fire after the main response."""
+    adapter = ProgressCaptureAdapter()
+
+    async def _handler(event):
+        return "done"
+
+    adapter.set_message_handler(_handler)
+
+    released = []
+
+    def _post_delivery_cb():
+        released.append(True)
+        adapter.sent.append(
+            {
+                "chat_id": "bg-review",
+                "content": "💾 Skill 'prospect-scanner' created.",
+                "reply_to": None,
+                "metadata": None,
+            }
+        )
+
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1001",
+        chat_type="group",
+        thread_id="17585",
+    )
+    event = MessageEvent(
+        text="hello",
+        message_type=MessageType.TEXT,
+        source=source,
+        message_id="msg-1",
+    )
+    session_key = "agent:main:telegram:group:-1001:17585"
+    adapter._active_sessions[session_key] = asyncio.Event()
+    adapter._post_delivery_callbacks[session_key] = _post_delivery_cb
+
+    await adapter._process_message_background(event, session_key)
+
+    sent_texts = [call["content"] for call in adapter.sent]
+    assert sent_texts == ["done", "💾 Skill 'prospect-scanner' created."]
+    assert released == [True]
+
+
@pytest.mark.asyncio
 async def test_verbose_mode_does_not_truncate_args_by_default(monkeypatch, tmp_path):
    """Verbose mode with default tool_preview_length (0) should NOT truncate args.
--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@ -283,6 +283,19 @@ class TestBuildSessionContextPrompt:
        assert "Local" in prompt
        assert "machine running this agent" in prompt

+    def test_local_delivery_path_uses_display_hermes_home(self):
+        config = GatewayConfig()
+        source = SessionSource(
+            platform=Platform.LOCAL, chat_id="cli",
+            chat_name="CLI terminal", chat_type="dm",
+        )
+        ctx = build_session_context(source, config)
+
+        with patch("hermes_constants.display_hermes_home", return_value="~/.hermes/profiles/coder"):
+            prompt = build_session_context_prompt(ctx)
+
+        assert "~/.hermes/profiles/coder/cron/output/" in prompt
+
    def test_whatsapp_prompt(self):
        config = GatewayConfig(
            platforms={
--- a/tests/gateway/test_session_env.py
+++ b/tests/gateway/test_session_env.py
@ -209,11 +209,13 @@ def test_set_session_env_includes_session_key():

    # Capture baseline value before setting (may be non-empty from another
    # test in the same pytest-xdist worker sharing the context).
-    baseline = get_session_env("HERMES_SESSION_KEY")
    tokens = runner._set_session_env(context)
    assert get_session_env("HERMES_SESSION_KEY") == "tg:-1001:17585"
    runner._clear_session_env(tokens)
-    assert get_session_env("HERMES_SESSION_KEY") == baseline
+    # After clearing, the session key must not retain the value we just set.
+    # The exact post-clear value depends on context propagation from other
+    # tests, so only check that our value was removed, not what replaced it.
+    assert get_session_env("HERMES_SESSION_KEY") != "tg:-1001:17585"


 def test_session_key_no_race_condition_with_contextvars(monkeypatch):
@ -251,3 +253,72 @@ def test_session_key_no_race_condition_with_contextvars(monkeypatch):
    assert results["session-B"] == "session-B", (
        f"Session B got '{results['session-B']}' instead of 'session-B' — race condition!"
    )
+
+
+@pytest.mark.asyncio
+async def test_run_in_executor_with_context_preserves_session_env(monkeypatch):
+    """Gateway executor work should inherit session contextvars for tool routing."""
+    runner = object.__new__(GatewayRunner)
+    monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_THREAD_ID", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_USER_ID", raising=False)
+
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="2144471399",
+        chat_type="dm",
+        user_id="123456",
+        user_name="alice",
+        thread_id=None,
+    )
+    context = SessionContext(
+        source=source,
+        connected_platforms=[],
+        home_channels={},
+        session_key="agent:main:telegram:dm:2144471399",
+    )
+
+    tokens = runner._set_session_env(context)
+    try:
+        result = await runner._run_in_executor_with_context(
+            lambda: {
+                "platform": get_session_env("HERMES_SESSION_PLATFORM"),
+                "chat_id": get_session_env("HERMES_SESSION_CHAT_ID"),
+                "user_id": get_session_env("HERMES_SESSION_USER_ID"),
+                "session_key": get_session_env("HERMES_SESSION_KEY"),
+            }
+        )
+    finally:
+        runner._clear_session_env(tokens)
+
+    assert result == {
+        "platform": "telegram",
+        "chat_id": "2144471399",
+        "user_id": "123456",
+        "session_key": "agent:main:telegram:dm:2144471399",
+    }
+
+
+@pytest.mark.asyncio
+async def test_run_in_executor_with_context_forwards_args():
+    """_run_in_executor_with_context should forward *args to the callable."""
+    runner = object.__new__(GatewayRunner)
+
+    def add(a, b):
+        return a + b
+
+    result = await runner._run_in_executor_with_context(add, 3, 7)
+    assert result == 10
+
+
+@pytest.mark.asyncio
+async def test_run_in_executor_with_context_propagates_exceptions():
+    """Exceptions inside the executor should propagate to the caller."""
+    runner = object.__new__(GatewayRunner)
+
+    def blow_up():
+        raise ValueError("boom")
+
+    with pytest.raises(ValueError, match="boom"):
+        await runner._run_in_executor_with_context(blow_up)
--- a/tests/gateway/test_session_race_guard.py
+++ b/tests/gateway/test_session_race_guard.py
@ -14,7 +14,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest

 from gateway.config import GatewayConfig, Platform, PlatformConfig
-from gateway.platforms.base import MessageEvent, MessageType
+from gateway.platforms.base import MessageEvent, MessageType, merge_pending_message_event
 from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
 from gateway.session import SessionSource, build_session_key

@ -184,6 +184,80 @@ async def test_second_message_during_sentinel_queued_not_duplicate():
        await task1


+def test_merge_pending_message_event_merges_text_and_photo_followups():
+    pending = {}
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="12345",
+        chat_type="dm",
+        user_id="u1",
+    )
+    session_key = build_session_key(source)
+
+    text_event = MessageEvent(
+        text="first follow-up",
+        message_type=MessageType.TEXT,
+        source=source,
+    )
+    photo_event = MessageEvent(
+        text="see screenshot",
+        message_type=MessageType.PHOTO,
+        source=source,
+        media_urls=["/tmp/test.png"],
+        media_types=["image/png"],
+    )
+
+    merge_pending_message_event(pending, session_key, text_event, merge_text=True)
+    merge_pending_message_event(pending, session_key, photo_event, merge_text=True)
+
+    merged = pending[session_key]
+    assert merged.message_type == MessageType.PHOTO
+    assert merged.text == "first follow-up\n\nsee screenshot"
+    assert merged.media_urls == ["/tmp/test.png"]
+    assert merged.media_types == ["image/png"]
+
+
+@pytest.mark.asyncio
+async def test_recent_telegram_text_followup_is_queued_without_interrupt():
+    runner = _make_runner()
+    event = _make_event(text="follow-up")
+    session_key = build_session_key(event.source)
+
+    fake_agent = MagicMock()
+    fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
+    runner._running_agents[session_key] = fake_agent
+    import time as _time
+    runner._running_agents_ts[session_key] = _time.time()
+
+    result = await runner._handle_message(event)
+
+    assert result is None
+    fake_agent.interrupt.assert_not_called()
+    adapter = runner.adapters[Platform.TELEGRAM]
+    assert adapter._pending_messages[session_key].text == "follow-up"
+
+
+@pytest.mark.asyncio
+async def test_recent_telegram_followups_append_in_pending_queue():
+    runner = _make_runner()
+    first = _make_event(text="part one")
+    second = _make_event(text="part two")
+    session_key = build_session_key(first.source)
+
+    fake_agent = MagicMock()
+    fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
+    runner._running_agents[session_key] = fake_agent
+    import time as _time
+    runner._running_agents_ts[session_key] = _time.time()
+
+    await runner._handle_message(first)
+    await runner._handle_message(second)
+
+    fake_agent.interrupt.assert_not_called()
+    adapter = runner.adapters[Platform.TELEGRAM]
+    assert adapter._pending_messages[session_key].text == "part one\npart two"
+
+
 # ------------------------------------------------------------------
 # Test 5: Sentinel not placed for command messages
 # ------------------------------------------------------------------
@ -273,6 +347,7 @@ async def test_stop_hard_kills_running_agent():

    # Simulate a running (possibly hung) agent
    fake_agent = MagicMock()
+    fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
    runner._running_agents[session_key] = fake_agent

    # Send /stop
@ -305,6 +380,7 @@ async def test_stop_clears_pending_messages():
    )

    fake_agent = MagicMock()
+    fake_agent.get_activity_summary.return_value = {"seconds_since_activity": 0}
    runner._running_agents[session_key] = fake_agent
    runner._pending_messages[session_key] = "some queued text"

--- a/tests/gateway/test_slack.py
+++ b/tests/gateway/test_slack.py
@ -1678,11 +1678,11 @@ class TestProgressMessageThread:
        msg_event = captured_events[0]
        source = msg_event.source

-        # For a top-level DM: source.thread_id should remain None
-        # (session keying must not be affected)
-        assert source.thread_id is None, (
-            "source.thread_id must stay None for top-level DMs "
-            "so they share one continuous session"
+        # With default dm_top_level_threads_as_sessions=True, source.thread_id
+        # should equal the message ts so each DM thread gets its own session.
+        assert source.thread_id == "1234567890.000001", (
+            "source.thread_id must equal the message ts for top-level DMs "
+            "so each reply thread gets its own session"
        )

        # The message_id should be the event's ts — this is what the gateway
@ -1707,6 +1707,34 @@ class TestProgressMessageThread:
            "ensuring progress messages land in the thread"
        )

+    @pytest.mark.asyncio
+    async def test_dm_toplevel_shares_session_when_disabled(self, adapter):
+        """Opting out restores legacy single-session-per-DM-channel behavior."""
+        adapter.config.extra["dm_top_level_threads_as_sessions"] = False
+
+        event = {
+            "channel": "D_DM",
+            "channel_type": "im",
+            "user": "U_USER",
+            "text": "Hello bot",
+            "ts": "1234567890.000001",
+        }
+
+        captured_events = []
+        adapter.handle_message = AsyncMock(side_effect=lambda e: captured_events.append(e))
+
+        with patch.object(adapter, "_resolve_user_name", new=AsyncMock(return_value="testuser")):
+            await adapter._handle_slack_message(event)
+
+        assert len(captured_events) == 1
+        msg_event = captured_events[0]
+        source = msg_event.source
+
+        assert source.thread_id is None, (
+            "source.thread_id must stay None when "
+            "dm_top_level_threads_as_sessions is disabled"
+        )
+
    @pytest.mark.asyncio
    async def test_channel_mention_progress_uses_thread_ts(self, adapter):
        """Progress messages for a channel @mention should go into the reply thread."""
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@ -279,3 +279,28 @@ async def test_status_command_bypasses_active_session_guard():
    assert "Agent Running" in sent[0]
    assert not interrupt_event.is_set(), "/status incorrectly triggered an agent interrupt"
    assert session_key not in adapter._pending_messages, "/status was incorrectly queued"
+
+
+@pytest.mark.asyncio
+async def test_profile_command_reports_custom_root_profile(monkeypatch, tmp_path):
+    """Gateway /profile detects custom-root profiles (not under ~/.hermes)."""
+    from pathlib import Path
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner = _make_runner(session_entry)
+    profile_home = tmp_path / "profiles" / "coder"
+
+    monkeypatch.setenv("HERMES_HOME", str(profile_home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path / "unrelated-home")
+
+    result = await runner._handle_profile_command(_make_event("/profile"))
+
+    assert "**Profile:** `coder`" in result
+    assert f"**Home:** `{profile_home}`" in result
--- a/tests/gateway/test_telegram_approval_buttons.py
+++ b/tests/gateway/test_telegram_approval_buttons.py
@ -50,9 +50,9 @@ from gateway.platforms.telegram import TelegramAdapter
 from gateway.config import Platform, PlatformConfig


-def _make_adapter():
+def _make_adapter(extra=None):
    """Create a TelegramAdapter with mocked internals."""
-    config = PlatformConfig(enabled=True, token="test-token")
+    config = PlatformConfig(enabled=True, token="test-token", extra=extra or {})
    adapter = TelegramAdapter(config)
    adapter._bot = AsyncMock()
    adapter._app = MagicMock()
@ -134,6 +134,23 @@ class TestTelegramExecApproval:
        )
        assert result.success is False

+    @pytest.mark.asyncio
+    async def test_disable_link_previews_sets_preview_kwargs(self):
+        adapter = _make_adapter(extra={"disable_link_previews": True})
+        mock_msg = MagicMock()
+        mock_msg.message_id = 42
+        adapter._bot.send_message = AsyncMock(return_value=mock_msg)
+
+        await adapter.send_exec_approval(
+            chat_id="12345", command="ls", session_key="s"
+        )
+
+        kwargs = adapter._bot.send_message.call_args[1]
+        assert (
+            kwargs.get("disable_web_page_preview") is True
+            or kwargs.get("link_preview_options") is not None
+        )
+
    @pytest.mark.asyncio
    async def test_truncates_long_command(self):
        adapter = _make_adapter()
--- a/tests/gateway/test_telegram_thread_fallback.py
+++ b/tests/gateway/test_telegram_thread_fallback.py
@ -45,6 +45,11 @@ class FakeRetryAfter(Exception):

 # Build a fake telegram module tree so the adapter's internal imports work
 _fake_telegram = types.ModuleType("telegram")
+_fake_telegram.Update = object
+_fake_telegram.Bot = object
+_fake_telegram.Message = object
+_fake_telegram.InlineKeyboardButton = object
+_fake_telegram.InlineKeyboardMarkup = object
 _fake_telegram_error = types.ModuleType("telegram.error")
 _fake_telegram_error.NetworkError = FakeNetworkError
 _fake_telegram_error.BadRequest = FakeBadRequest
@ -52,7 +57,21 @@ _fake_telegram_error.TimedOut = FakeTimedOut
 _fake_telegram.error = _fake_telegram_error
 _fake_telegram_constants = types.ModuleType("telegram.constants")
 _fake_telegram_constants.ParseMode = SimpleNamespace(MARKDOWN_V2="MarkdownV2")
+_fake_telegram_constants.ChatType = SimpleNamespace(
+    GROUP="group",
+    SUPERGROUP="supergroup",
+    CHANNEL="channel",
+)
 _fake_telegram.constants = _fake_telegram_constants
+_fake_telegram_ext = types.ModuleType("telegram.ext")
+_fake_telegram_ext.Application = object
+_fake_telegram_ext.CommandHandler = object
+_fake_telegram_ext.CallbackQueryHandler = object
+_fake_telegram_ext.MessageHandler = object
+_fake_telegram_ext.ContextTypes = SimpleNamespace(DEFAULT_TYPE=object)
+_fake_telegram_ext.filters = object
+_fake_telegram_request = types.ModuleType("telegram.request")
+_fake_telegram_request.HTTPXRequest = object


@pytest.fixture(autouse=True)
@ -61,6 +80,8 @@ def _inject_fake_telegram(monkeypatch):
    monkeypatch.setitem(sys.modules, "telegram", _fake_telegram)
    monkeypatch.setitem(sys.modules, "telegram.error", _fake_telegram_error)
    monkeypatch.setitem(sys.modules, "telegram.constants", _fake_telegram_constants)
+    monkeypatch.setitem(sys.modules, "telegram.ext", _fake_telegram_ext)
+    monkeypatch.setitem(sys.modules, "telegram.request", _fake_telegram_request)


 def _make_adapter():
@ -68,6 +89,7 @@ def _make_adapter():

    config = PlatformConfig(enabled=True, token="fake-token")
    adapter = object.__new__(TelegramAdapter)
+    adapter.config = config
    adapter._config = config
    adapter._platform = Platform.TELEGRAM
    adapter._connected = True
@ -82,6 +104,81 @@ def _make_adapter():
    return adapter


+def test_forum_general_topic_without_message_thread_id_keeps_thread_context():
+    """Forum General-topic messages should keep synthetic thread context."""
+    from gateway.platforms import telegram as telegram_mod
+
+    adapter = _make_adapter()
+    message = SimpleNamespace(
+        text="hello from General",
+        caption=None,
+        chat=SimpleNamespace(
+            id=-100123,
+            type=telegram_mod.ChatType.SUPERGROUP,
+            is_forum=True,
+            title="Forum group",
+        ),
+        from_user=SimpleNamespace(id=456, full_name="Alice"),
+        message_thread_id=None,
+        reply_to_message=None,
+        message_id=10,
+        date=None,
+    )
+
+    event = adapter._build_message_event(message, msg_type=SimpleNamespace(value="text"))
+
+    assert event.source.chat_id == "-100123"
+    assert event.source.chat_type == "group"
+    assert event.source.thread_id == "1"
+
+
+@pytest.mark.asyncio
+async def test_send_omits_general_topic_thread_id():
+    """Telegram sends to forum General should omit message_thread_id=1."""
+    adapter = _make_adapter()
+    call_log = []
+
+    async def mock_send_message(**kwargs):
+        call_log.append(dict(kwargs))
+        return SimpleNamespace(message_id=42)
+
+    adapter._bot = SimpleNamespace(send_message=mock_send_message)
+
+    result = await adapter.send(
+        chat_id="-100123",
+        content="test message",
+        metadata={"thread_id": "1"},
+    )
+
+    assert result.success is True
+    assert len(call_log) == 1
+    assert call_log[0]["chat_id"] == -100123
+    assert call_log[0]["text"] == "test message"
+    assert call_log[0]["reply_to_message_id"] is None
+    assert call_log[0]["message_thread_id"] is None
+
+
+@pytest.mark.asyncio
+async def test_send_typing_retries_without_general_thread_when_not_found():
+    """Typing for forum General should fall back if Telegram rejects thread 1."""
+    adapter = _make_adapter()
+    call_log = []
+
+    async def mock_send_chat_action(**kwargs):
+        call_log.append(dict(kwargs))
+        if kwargs.get("message_thread_id") == 1:
+            raise FakeBadRequest("Message thread not found")
+
+    adapter._bot = SimpleNamespace(send_chat_action=mock_send_chat_action)
+
+    await adapter.send_typing("-100123", metadata={"thread_id": "1"})
+
+    assert call_log == [
+        {"chat_id": -100123, "action": "typing", "message_thread_id": 1},
+        {"chat_id": -100123, "action": "typing", "message_thread_id": None},
+    ]
+
+
@pytest.mark.asyncio
 async def test_send_retries_without_thread_on_thread_not_found():
    """When message_thread_id causes 'thread not found', retry without it."""
--- a/tests/hermes_cli/test_gateway_service.py
+++ b/tests/hermes_cli/test_gateway_service.py
@ -613,6 +613,7 @@ class TestDetectVenvDir:
        # Not inside a virtualenv
        monkeypatch.setattr("sys.prefix", "/usr")
        monkeypatch.setattr("sys.base_prefix", "/usr")
+        monkeypatch.delenv("VIRTUAL_ENV", raising=False)
        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)

        dot_venv = tmp_path / ".venv"
@ -624,6 +625,7 @@ class TestDetectVenvDir:
    def test_falls_back_to_venv_directory(self, tmp_path, monkeypatch):
        monkeypatch.setattr("sys.prefix", "/usr")
        monkeypatch.setattr("sys.base_prefix", "/usr")
+        monkeypatch.delenv("VIRTUAL_ENV", raising=False)
        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)

        venv = tmp_path / "venv"
@ -635,6 +637,7 @@ class TestDetectVenvDir:
    def test_prefers_dot_venv_over_venv(self, tmp_path, monkeypatch):
        monkeypatch.setattr("sys.prefix", "/usr")
        monkeypatch.setattr("sys.base_prefix", "/usr")
+        monkeypatch.delenv("VIRTUAL_ENV", raising=False)
        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)

        (tmp_path / ".venv").mkdir()
@ -646,6 +649,7 @@ class TestDetectVenvDir:
    def test_returns_none_when_no_virtualenv(self, tmp_path, monkeypatch):
        monkeypatch.setattr("sys.prefix", "/usr")
        monkeypatch.setattr("sys.base_prefix", "/usr")
+        monkeypatch.delenv("VIRTUAL_ENV", raising=False)
        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)

        result = gateway_cli._detect_venv_dir()
--- a/tests/hermes_cli/test_model_switch_copilot_api_mode.py
+++ b/tests/hermes_cli/test_model_switch_copilot_api_mode.py
@ -0,0 +1,101 @@
+"""Regression tests for Copilot api_mode recomputation during /model switch.
+
+When switching models within the Copilot provider (e.g. GPT-5 → Claude),
+the stale api_mode from resolve_runtime_provider must be overridden with
+a fresh value computed from the *new* model.  Without the fix, Claude
+requests went through the Responses API and failed with
+``unsupported_api_for_model``.
+"""
+
+from unittest.mock import patch
+
+from hermes_cli.model_switch import switch_model
+
+
+_MOCK_VALIDATION = {
+    "accepted": True,
+    "persist": True,
+    "recognized": True,
+    "message": None,
+}
+
+
+def _run_copilot_switch(
+    raw_input: str,
+    current_provider: str = "copilot",
+    current_model: str = "gpt-5.4",
+    explicit_provider: str = "",
+    runtime_api_mode: str = "codex_responses",
+):
+    """Run switch_model with Copilot mocks and return the result."""
+    with (
+        patch("hermes_cli.model_switch.resolve_alias", return_value=None),
+        patch("hermes_cli.model_switch.list_provider_models", return_value=[]),
+        patch(
+            "hermes_cli.runtime_provider.resolve_runtime_provider",
+            return_value={
+                "api_key": "ghu_test_token",
+                "base_url": "https://api.githubcopilot.com",
+                "api_mode": runtime_api_mode,
+            },
+        ),
+        patch(
+            "hermes_cli.models.validate_requested_model",
+            return_value=_MOCK_VALIDATION,
+        ),
+        patch("hermes_cli.model_switch.get_model_info", return_value=None),
+        patch("hermes_cli.model_switch.get_model_capabilities", return_value=None),
+        patch("hermes_cli.models.detect_provider_for_model", return_value=None),
+    ):
+        return switch_model(
+            raw_input=raw_input,
+            current_provider=current_provider,
+            current_model=current_model,
+            explicit_provider=explicit_provider,
+        )
+
+
+def test_same_provider_copilot_switch_recomputes_api_mode():
+    """GPT-5 → Claude on copilot: api_mode must flip to chat_completions."""
+    result = _run_copilot_switch(
+        raw_input="claude-opus-4.6",
+        current_provider="copilot",
+        current_model="gpt-5.4",
+    )
+
+    assert result.success, f"switch_model failed: {result.error_message}"
+    assert result.new_model == "claude-opus-4.6"
+    assert result.target_provider == "copilot"
+    assert result.api_mode == "chat_completions"
+
+
+def test_explicit_copilot_switch_uses_selected_model_api_mode():
+    """Cross-provider switch to copilot: api_mode from new model, not stale runtime."""
+    result = _run_copilot_switch(
+        raw_input="claude-opus-4.6",
+        current_provider="openrouter",
+        current_model="anthropic/claude-sonnet-4.6",
+        explicit_provider="copilot",
+    )
+
+    assert result.success, f"switch_model failed: {result.error_message}"
+    assert result.new_model == "claude-opus-4.6"
+    assert result.target_provider == "github-copilot"
+    assert result.api_mode == "chat_completions"
+
+
+def test_copilot_gpt5_keeps_codex_responses():
+    """GPT-5 → GPT-5 on copilot: api_mode must stay codex_responses."""
+    result = _run_copilot_switch(
+        raw_input="gpt-5.4-mini",
+        current_provider="copilot",
+        current_model="gpt-5.4",
+        runtime_api_mode="codex_responses",
+    )
+
+    assert result.success, f"switch_model failed: {result.error_message}"
+    assert result.new_model == "gpt-5.4-mini"
+    assert result.target_provider == "copilot"
+    # gpt-5.4-mini is a GPT-5 variant — should use codex_responses
+    # (gpt-5-mini is the special case that uses chat_completions)
+    assert result.api_mode == "codex_responses"
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@ -163,7 +163,7 @@ class TestNormalizeProvider:
 class TestProviderLabel:
    def test_known_labels_and_auto(self):
        assert provider_label("anthropic") == "Anthropic"
-        assert provider_label("kimi") == "Kimi / Moonshot"
+        assert provider_label("kimi") == "Kimi / Kimi Coding Plan"
        assert provider_label("copilot") == "GitHub Copilot"
        assert provider_label("copilot-acp") == "GitHub Copilot ACP"
        assert provider_label("auto") == "Auto"
--- a/tests/hermes_cli/test_ollama_cloud_provider.py
+++ b/tests/hermes_cli/test_ollama_cloud_provider.py
@ -0,0 +1,351 @@
+"""Tests for Ollama Cloud provider integration."""
+
+import os
+import pytest
+from unittest.mock import patch, MagicMock
+
+from hermes_cli.auth import PROVIDER_REGISTRY, resolve_provider, resolve_api_key_provider_credentials
+from hermes_cli.models import _PROVIDER_MODELS, _PROVIDER_LABELS, _PROVIDER_ALIASES, normalize_provider
+from hermes_cli.model_normalize import normalize_model_for_provider
+from agent.model_metadata import _URL_TO_PROVIDER, _PROVIDER_PREFIXES
+from agent.models_dev import PROVIDER_TO_MODELS_DEV, list_agentic_models
+
+
+# ── Provider Registry ──
+
+class TestOllamaCloudProviderRegistry:
+    def test_ollama_cloud_in_registry(self):
+        assert "ollama-cloud" in PROVIDER_REGISTRY
+
+    def test_ollama_cloud_config(self):
+        pconfig = PROVIDER_REGISTRY["ollama-cloud"]
+        assert pconfig.id == "ollama-cloud"
+        assert pconfig.name == "Ollama Cloud"
+        assert pconfig.auth_type == "api_key"
+        assert pconfig.inference_base_url == "https://ollama.com/v1"
+
+    def test_ollama_cloud_env_vars(self):
+        pconfig = PROVIDER_REGISTRY["ollama-cloud"]
+        assert pconfig.api_key_env_vars == ("OLLAMA_API_KEY",)
+        assert pconfig.base_url_env_var == "OLLAMA_BASE_URL"
+
+    def test_ollama_cloud_base_url(self):
+        assert "ollama.com" in PROVIDER_REGISTRY["ollama-cloud"].inference_base_url
+
+
+# ── Provider Aliases ──
+
+PROVIDER_ENV_VARS = (
+    "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
+    "GOOGLE_API_KEY", "GEMINI_API_KEY", "OLLAMA_API_KEY",
+    "GLM_API_KEY", "ZAI_API_KEY", "KIMI_API_KEY",
+    "MINIMAX_API_KEY", "DEEPSEEK_API_KEY",
+)
+
+@pytest.fixture(autouse=True)
+def _clean_provider_env(monkeypatch):
+    for var in PROVIDER_ENV_VARS:
+        monkeypatch.delenv(var, raising=False)
+
+
+class TestOllamaCloudAliases:
+    def test_explicit_ollama_cloud(self):
+        assert resolve_provider("ollama-cloud") == "ollama-cloud"
+
+    def test_alias_ollama_underscore(self):
+        """ollama_cloud (underscore) is the unambiguous cloud alias."""
+        assert resolve_provider("ollama_cloud") == "ollama-cloud"
+
+    def test_bare_ollama_stays_local(self):
+        """Bare 'ollama' alias routes to 'custom' (local) — not cloud."""
+        assert resolve_provider("ollama") == "custom"
+
+    def test_models_py_aliases(self):
+        assert _PROVIDER_ALIASES.get("ollama_cloud") == "ollama-cloud"
+        # bare "ollama" stays local
+        assert _PROVIDER_ALIASES.get("ollama") == "custom"
+
+    def test_normalize_provider(self):
+        assert normalize_provider("ollama-cloud") == "ollama-cloud"
+
+
+# ── Auto-detection ──
+
+class TestOllamaCloudAutoDetection:
+    def test_auto_detects_ollama_api_key(self, monkeypatch):
+        monkeypatch.setenv("OLLAMA_API_KEY", "test-ollama-key")
+        assert resolve_provider("auto") == "ollama-cloud"
+
+
+# ── Credential Resolution ──
+
+class TestOllamaCloudCredentials:
+    def test_resolve_with_ollama_api_key(self, monkeypatch):
+        monkeypatch.setenv("OLLAMA_API_KEY", "ollama-secret")
+        creds = resolve_api_key_provider_credentials("ollama-cloud")
+        assert creds["provider"] == "ollama-cloud"
+        assert creds["api_key"] == "ollama-secret"
+        assert creds["base_url"] == "https://ollama.com/v1"
+
+    def test_resolve_with_custom_base_url(self, monkeypatch):
+        monkeypatch.setenv("OLLAMA_API_KEY", "key")
+        monkeypatch.setenv("OLLAMA_BASE_URL", "https://custom.ollama/v1")
+        creds = resolve_api_key_provider_credentials("ollama-cloud")
+        assert creds["base_url"] == "https://custom.ollama/v1"
+
+    def test_runtime_ollama_cloud(self, monkeypatch):
+        monkeypatch.setenv("OLLAMA_API_KEY", "ollama-key")
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        result = resolve_runtime_provider(requested="ollama-cloud")
+        assert result["provider"] == "ollama-cloud"
+        assert result["api_mode"] == "chat_completions"
+        assert result["api_key"] == "ollama-key"
+        assert result["base_url"] == "https://ollama.com/v1"
+
+
+# ── Model Catalog (dynamic — no static list) ──
+
+class TestOllamaCloudModelCatalog:
+    def test_no_static_model_list(self):
+        """Ollama Cloud models are fetched dynamically — no static list to maintain."""
+        assert "ollama-cloud" not in _PROVIDER_MODELS
+
+    def test_provider_label(self):
+        assert "ollama-cloud" in _PROVIDER_LABELS
+        assert _PROVIDER_LABELS["ollama-cloud"] == "Ollama Cloud"
+
+
+# ── Merged Model Discovery ──
+
+class TestOllamaCloudMergedDiscovery:
+    def test_merges_live_and_models_dev(self, tmp_path, monkeypatch):
+        """Live API models appear first, models.dev additions fill gaps."""
+        from hermes_cli.models import fetch_ollama_cloud_models
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
+
+        mock_mdev = {
+            "ollama-cloud": {
+                "models": {
+                    "glm-5": {"tool_call": True},
+                    "kimi-k2.5": {"tool_call": True},
+                    "nemotron-3-super": {"tool_call": True},
+                }
+            }
+        }
+        with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.5:397b", "glm-5"]), \
+             patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
+            result = fetch_ollama_cloud_models(force_refresh=True)
+
+        # Live models first, then models.dev additions (deduped)
+        assert result[0] == "qwen3.5:397b"  # from live API
+        assert result[1] == "glm-5"          # from live API (also in models.dev)
+        assert "kimi-k2.5" in result         # from models.dev only
+        assert "nemotron-3-super" in result  # from models.dev only
+        assert result.count("glm-5") == 1    # no duplicates
+
+    def test_falls_back_to_models_dev_without_api_key(self, tmp_path, monkeypatch):
+        """Without API key, only models.dev results are returned."""
+        from hermes_cli.models import fetch_ollama_cloud_models
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
+
+        mock_mdev = {
+            "ollama-cloud": {
+                "models": {
+                    "glm-5": {"tool_call": True},
+                }
+            }
+        }
+        with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
+            result = fetch_ollama_cloud_models(force_refresh=True)
+
+        assert result == ["glm-5"]
+
+    def test_uses_disk_cache(self, tmp_path, monkeypatch):
+        """Second call returns cached results without hitting APIs."""
+        from hermes_cli.models import fetch_ollama_cloud_models
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
+
+        with patch("hermes_cli.models.fetch_api_models", return_value=["model-a"]) as mock_api, \
+             patch("agent.models_dev.fetch_models_dev", return_value={}):
+            first = fetch_ollama_cloud_models(force_refresh=True)
+            assert first == ["model-a"]
+            assert mock_api.call_count == 1
+
+            # Second call — should use disk cache, not call API
+            second = fetch_ollama_cloud_models()
+            assert second == ["model-a"]
+            assert mock_api.call_count == 1  # no extra API call
+
+    def test_force_refresh_bypasses_cache(self, tmp_path, monkeypatch):
+        """force_refresh=True always hits the API even with fresh cache."""
+        from hermes_cli.models import fetch_ollama_cloud_models
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
+
+        with patch("hermes_cli.models.fetch_api_models", return_value=["model-a"]) as mock_api, \
+             patch("agent.models_dev.fetch_models_dev", return_value={}):
+            fetch_ollama_cloud_models(force_refresh=True)
+            fetch_ollama_cloud_models(force_refresh=True)
+            assert mock_api.call_count == 2
+
+    def test_stale_cache_used_on_total_failure(self, tmp_path, monkeypatch):
+        """If both API and models.dev fail, stale cache is returned."""
+        from hermes_cli.models import fetch_ollama_cloud_models, _save_ollama_cloud_cache
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
+
+        # Pre-populate a stale cache
+        _save_ollama_cloud_cache(["stale-model"])
+
+        # Make the cache appear stale by backdating it
+        import json
+        cache_path = tmp_path / "ollama_cloud_models_cache.json"
+        with open(cache_path) as f:
+            data = json.load(f)
+        data["cached_at"] = 0  # epoch = very stale
+        with open(cache_path, "w") as f:
+            json.dump(data, f)
+
+        with patch("hermes_cli.models.fetch_api_models", return_value=None), \
+             patch("agent.models_dev.fetch_models_dev", return_value={}):
+            result = fetch_ollama_cloud_models(force_refresh=True)
+
+        assert result == ["stale-model"]
+
+    def test_empty_on_total_failure_no_cache(self, tmp_path, monkeypatch):
+        """Returns empty list when everything fails and no cache exists."""
+        from hermes_cli.models import fetch_ollama_cloud_models
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
+
+        with patch("agent.models_dev.fetch_models_dev", return_value={}):
+            result = fetch_ollama_cloud_models(force_refresh=True)
+
+        assert result == []
+
+
+# ── Model Normalization ──
+
+class TestOllamaCloudModelNormalization:
+    def test_passthrough_bare_name(self):
+        """Ollama Cloud is a passthrough provider — model names used as-is."""
+        assert normalize_model_for_provider("qwen3.5:397b", "ollama-cloud") == "qwen3.5:397b"
+
+    def test_passthrough_with_tag(self):
+        assert normalize_model_for_provider("cogito-2.1:671b", "ollama-cloud") == "cogito-2.1:671b"
+
+    def test_passthrough_no_tag(self):
+        assert normalize_model_for_provider("glm-5", "ollama-cloud") == "glm-5"
+
+
+# ── URL-to-Provider Mapping ──
+
+class TestOllamaCloudUrlMapping:
+    def test_url_to_provider(self):
+        assert _URL_TO_PROVIDER.get("ollama.com") == "ollama-cloud"
+
+    def test_provider_prefix_canonical(self):
+        assert "ollama-cloud" in _PROVIDER_PREFIXES
+
+    def test_provider_prefix_alias(self):
+        assert "ollama" in _PROVIDER_PREFIXES
+
+
+# ── models.dev Integration ──
+
+class TestOllamaCloudModelsDev:
+    def test_ollama_cloud_mapped(self):
+        assert PROVIDER_TO_MODELS_DEV.get("ollama-cloud") == "ollama-cloud"
+
+    def test_list_agentic_models_with_mock_data(self):
+        """list_agentic_models filters correctly from mock models.dev data."""
+        mock_data = {
+            "ollama-cloud": {
+                "models": {
+                    "qwen3.5:397b": {"tool_call": True},
+                    "glm-5": {"tool_call": True},
+                    "nemotron-3-nano:30b": {"tool_call": True},
+                    "some-embedding:latest": {"tool_call": False},
+                }
+            }
+        }
+        with patch("agent.models_dev.fetch_models_dev", return_value=mock_data):
+            result = list_agentic_models("ollama-cloud")
+        assert "qwen3.5:397b" in result
+        assert "glm-5" in result
+        assert "nemotron-3-nano:30b" in result
+        assert "some-embedding:latest" not in result  # no tool_call
+
+
+# ── Agent Init (no SyntaxError) ──
+
+class TestOllamaCloudAgentInit:
+    def test_agent_imports_without_error(self):
+        """Verify run_agent.py has no SyntaxError."""
+        import importlib
+        import run_agent
+        importlib.reload(run_agent)
+
+    def test_ollama_cloud_agent_uses_chat_completions(self, monkeypatch):
+        """Ollama Cloud falls through to chat_completions — no special elif needed."""
+        monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
+        with patch("run_agent.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            from run_agent import AIAgent
+            agent = AIAgent(
+                model="qwen3.5:397b",
+                provider="ollama-cloud",
+                api_key="test-key",
+                base_url="https://ollama.com/v1",
+            )
+            assert agent.api_mode == "chat_completions"
+            assert agent.provider == "ollama-cloud"
+
+
+# ── providers.py New System ──
+
+class TestOllamaCloudProvidersNew:
+    def test_overlay_exists(self):
+        from hermes_cli.providers import HERMES_OVERLAYS
+        assert "ollama-cloud" in HERMES_OVERLAYS
+        overlay = HERMES_OVERLAYS["ollama-cloud"]
+        assert overlay.transport == "openai_chat"
+        assert overlay.base_url_env_var == "OLLAMA_BASE_URL"
+
+    def test_alias_resolves(self):
+        from hermes_cli.providers import normalize_provider as np
+        assert np("ollama") == "custom"  # bare "ollama" = local
+        assert np("ollama-cloud") == "ollama-cloud"
+
+    def test_label_override(self):
+        from hermes_cli.providers import _LABEL_OVERRIDES
+        assert _LABEL_OVERRIDES.get("ollama-cloud") == "Ollama Cloud"
+
+    def test_get_label(self):
+        from hermes_cli.providers import get_label
+        assert get_label("ollama-cloud") == "Ollama Cloud"
+
+    def test_get_provider(self):
+        from hermes_cli.providers import get_provider
+        pdef = get_provider("ollama-cloud")
+        assert pdef is not None
+        assert pdef.id == "ollama-cloud"
+        assert pdef.transport == "openai_chat"
+
+
+# ── Auxiliary Model ──
+
+class TestOllamaCloudAuxiliary:
+    def test_aux_model_defined(self):
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+        assert "ollama-cloud" in _API_KEY_PROVIDER_AUX_MODELS
+        assert _API_KEY_PROVIDER_AUX_MODELS["ollama-cloud"] == "nemotron-3-nano:30b"
--- a/tests/hermes_cli/test_plugins.py
+++ b/tests/hermes_cli/test_plugins.py
@ -18,6 +18,8 @@ from hermes_cli.plugins import (
    PluginManager,
    PluginManifest,
    get_plugin_manager,
+    get_plugin_command_handler,
+    get_plugin_commands,
    get_pre_tool_call_block_message,
    discover_plugins,
    invoke_hook,
@ -605,7 +607,292 @@ class TestPreLlmCallTargetRouting:
        assert "plain text C" in _plugin_user_context


-# NOTE: TestPluginCommands removed – register_command() was never implemented
-# in PluginContext (hermes_cli/plugins.py).  The tests referenced _plugin_commands,
-# commands_registered, get_plugin_command_handler, and GATEWAY_KNOWN_COMMANDS
-# integration — all of which are unimplemented features.
+# ── TestPluginCommands ────────────────────────────────────────────────────
+
+
+class TestPluginCommands:
+    """Tests for plugin slash command registration via register_command()."""
+
+    def test_register_command_basic(self):
+        """register_command() stores handler, description, and plugin name."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        handler = lambda args: f"echo {args}"
+        ctx.register_command("mycmd", handler, description="My custom command")
+
+        assert "mycmd" in mgr._plugin_commands
+        entry = mgr._plugin_commands["mycmd"]
+        assert entry["handler"] is handler
+        assert entry["description"] == "My custom command"
+        assert entry["plugin"] == "test-plugin"
+
+    def test_register_command_normalizes_name(self):
+        """Names are lowercased, stripped, and leading slashes removed."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        ctx.register_command("/MyCmd ", lambda a: a, description="test")
+        assert "mycmd" in mgr._plugin_commands
+        assert "/MyCmd " not in mgr._plugin_commands
+
+    def test_register_command_empty_name_rejected(self, caplog):
+        """Empty name after normalization is rejected with a warning."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        with caplog.at_level(logging.WARNING):
+            ctx.register_command("", lambda a: a)
+        assert len(mgr._plugin_commands) == 0
+        assert "empty name" in caplog.text
+
+    def test_register_command_builtin_conflict_rejected(self, caplog):
+        """Commands that conflict with built-in names are rejected."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        with caplog.at_level(logging.WARNING):
+            ctx.register_command("help", lambda a: a)
+        assert "help" not in mgr._plugin_commands
+        assert "conflicts" in caplog.text.lower()
+
+    def test_register_command_default_description(self):
+        """Missing description defaults to 'Plugin command'."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        ctx.register_command("status-cmd", lambda a: a)
+        assert mgr._plugin_commands["status-cmd"]["description"] == "Plugin command"
+
+    def test_get_plugin_command_handler_found(self):
+        """get_plugin_command_handler() returns the handler for a registered command."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        handler = lambda args: f"result: {args}"
+        ctx.register_command("mycmd", handler, description="test")
+
+        with patch("hermes_cli.plugins._plugin_manager", mgr):
+            result = get_plugin_command_handler("mycmd")
+            assert result is handler
+
+    def test_get_plugin_command_handler_not_found(self):
+        """get_plugin_command_handler() returns None for unregistered commands."""
+        mgr = PluginManager()
+        with patch("hermes_cli.plugins._plugin_manager", mgr):
+            assert get_plugin_command_handler("nonexistent") is None
+
+    def test_get_plugin_commands_returns_dict(self):
+        """get_plugin_commands() returns the full commands dict."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+        ctx.register_command("cmd-a", lambda a: a, description="A")
+        ctx.register_command("cmd-b", lambda a: a, description="B")
+
+        with patch("hermes_cli.plugins._plugin_manager", mgr):
+            cmds = get_plugin_commands()
+            assert "cmd-a" in cmds
+            assert "cmd-b" in cmds
+            assert cmds["cmd-a"]["description"] == "A"
+
+    def test_commands_tracked_on_loaded_plugin(self, tmp_path, monkeypatch):
+        """Commands registered during discover_and_load() are tracked on LoadedPlugin."""
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        _make_plugin_dir(
+            plugins_dir, "cmd-plugin",
+            register_body=(
+                'ctx.register_command("mycmd", lambda a: "ok", description="Test")'
+            ),
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        loaded = mgr._plugins["cmd-plugin"]
+        assert loaded.enabled
+        assert "mycmd" in loaded.commands_registered
+
+    def test_commands_in_list_plugins_output(self, tmp_path, monkeypatch):
+        """list_plugins() includes command count."""
+        plugins_dir = tmp_path / "hermes_test" / "plugins"
+        _make_plugin_dir(
+            plugins_dir, "cmd-plugin",
+            register_body=(
+                'ctx.register_command("mycmd", lambda a: "ok", description="Test")'
+            ),
+        )
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
+
+        mgr = PluginManager()
+        mgr.discover_and_load()
+
+        info = mgr.list_plugins()
+        assert len(info) == 1
+        assert info[0]["commands"] == 1
+
+    def test_handler_receives_raw_args(self):
+        """The handler is called with the raw argument string."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        received = []
+        ctx.register_command("echo", lambda args: received.append(args) or "ok")
+
+        handler = mgr._plugin_commands["echo"]["handler"]
+        handler("hello world")
+        assert received == ["hello world"]
+
+    def test_multiple_plugins_register_different_commands(self):
+        """Multiple plugins can each register their own commands."""
+        mgr = PluginManager()
+
+        for plugin_name, cmd_name in [("plugin-a", "cmd-a"), ("plugin-b", "cmd-b")]:
+            manifest = PluginManifest(name=plugin_name, source="user")
+            ctx = PluginContext(manifest, mgr)
+            ctx.register_command(cmd_name, lambda a: a, description=f"From {plugin_name}")
+
+        assert "cmd-a" in mgr._plugin_commands
+        assert "cmd-b" in mgr._plugin_commands
+        assert mgr._plugin_commands["cmd-a"]["plugin"] == "plugin-a"
+        assert mgr._plugin_commands["cmd-b"]["plugin"] == "plugin-b"
+
+
+# ── TestPluginDispatchTool ────────────────────────────────────────────────
+
+
+class TestPluginDispatchTool:
+    """Tests for PluginContext.dispatch_tool() — tool dispatch with agent context."""
+
+    def test_dispatch_tool_calls_registry(self):
+        """dispatch_tool() delegates to registry.dispatch()."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        mock_registry = MagicMock()
+        mock_registry.dispatch.return_value = '{"result": "ok"}'
+
+        with patch("hermes_cli.plugins.PluginContext.dispatch_tool.__module__", "hermes_cli.plugins"):
+            with patch.dict("sys.modules", {}):
+                with patch("tools.registry.registry", mock_registry):
+                    result = ctx.dispatch_tool("web_search", {"query": "test"})
+
+        assert result == '{"result": "ok"}'
+
+    def test_dispatch_tool_injects_parent_agent_from_cli_ref(self):
+        """When _cli_ref has an agent, it's passed as parent_agent."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        mock_agent = MagicMock()
+        mock_cli = MagicMock()
+        mock_cli.agent = mock_agent
+        mgr._cli_ref = mock_cli
+
+        mock_registry = MagicMock()
+        mock_registry.dispatch.return_value = '{"ok": true}'
+
+        with patch("tools.registry.registry", mock_registry):
+            ctx.dispatch_tool("delegate_task", {"goal": "test"})
+
+        mock_registry.dispatch.assert_called_once()
+        call_kwargs = mock_registry.dispatch.call_args
+        assert call_kwargs[1].get("parent_agent") is mock_agent
+
+    def test_dispatch_tool_no_parent_agent_when_no_cli_ref(self):
+        """When _cli_ref is None (gateway mode), no parent_agent is injected."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+        mgr._cli_ref = None
+
+        mock_registry = MagicMock()
+        mock_registry.dispatch.return_value = '{"ok": true}'
+
+        with patch("tools.registry.registry", mock_registry):
+            ctx.dispatch_tool("delegate_task", {"goal": "test"})
+
+        call_kwargs = mock_registry.dispatch.call_args
+        assert "parent_agent" not in call_kwargs[1]
+
+    def test_dispatch_tool_no_parent_agent_when_agent_is_none(self):
+        """When cli_ref exists but agent is None (not yet initialized), skip parent_agent."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        mock_cli = MagicMock()
+        mock_cli.agent = None
+        mgr._cli_ref = mock_cli
+
+        mock_registry = MagicMock()
+        mock_registry.dispatch.return_value = '{"ok": true}'
+
+        with patch("tools.registry.registry", mock_registry):
+            ctx.dispatch_tool("delegate_task", {"goal": "test"})
+
+        call_kwargs = mock_registry.dispatch.call_args
+        assert "parent_agent" not in call_kwargs[1]
+
+    def test_dispatch_tool_respects_explicit_parent_agent(self):
+        """Explicit parent_agent kwarg is not overwritten by _cli_ref.agent."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+
+        cli_agent = MagicMock(name="cli_agent")
+        mock_cli = MagicMock()
+        mock_cli.agent = cli_agent
+        mgr._cli_ref = mock_cli
+
+        explicit_agent = MagicMock(name="explicit_agent")
+
+        mock_registry = MagicMock()
+        mock_registry.dispatch.return_value = '{"ok": true}'
+
+        with patch("tools.registry.registry", mock_registry):
+            ctx.dispatch_tool("delegate_task", {"goal": "test"}, parent_agent=explicit_agent)
+
+        call_kwargs = mock_registry.dispatch.call_args
+        assert call_kwargs[1]["parent_agent"] is explicit_agent
+
+    def test_dispatch_tool_forwards_extra_kwargs(self):
+        """Extra kwargs are forwarded to registry.dispatch()."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+        mgr._cli_ref = None
+
+        mock_registry = MagicMock()
+        mock_registry.dispatch.return_value = '{"ok": true}'
+
+        with patch("tools.registry.registry", mock_registry):
+            ctx.dispatch_tool("some_tool", {"x": 1}, task_id="test-123")
+
+        call_kwargs = mock_registry.dispatch.call_args
+        assert call_kwargs[1]["task_id"] == "test-123"
+
+    def test_dispatch_tool_returns_json_string(self):
+        """dispatch_tool() returns the raw JSON string from the registry."""
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-plugin", source="user")
+        ctx = PluginContext(manifest, mgr)
+        mgr._cli_ref = None
+
+        mock_registry = MagicMock()
+        mock_registry.dispatch.return_value = '{"error": "Unknown tool: fake"}'
+
+        with patch("tools.registry.registry", mock_registry):
+            result = ctx.dispatch_tool("fake", {})
+
+        assert '"error"' in result
--- a/tests/honcho_plugin/test_cli.py
+++ b/tests/honcho_plugin/test_cli.py
@ -0,0 +1,56 @@
+"""Tests for plugins/memory/honcho/cli.py."""
+
+from types import SimpleNamespace
+
+
+class TestCmdStatus:
+    def test_reports_connection_failure_when_session_setup_fails(self, monkeypatch, capsys, tmp_path):
+        import plugins.memory.honcho.cli as honcho_cli
+
+        cfg_path = tmp_path / "honcho.json"
+        cfg_path.write_text("{}")
+
+        class FakeConfig:
+            enabled = True
+            api_key = "root-key"
+            workspace_id = "hermes"
+            host = "hermes"
+            base_url = None
+            ai_peer = "hermes"
+            peer_name = "eri"
+            recall_mode = "hybrid"
+            user_observe_me = True
+            user_observe_others = False
+            ai_observe_me = False
+            ai_observe_others = True
+            write_frequency = "async"
+            session_strategy = "per-session"
+            context_tokens = 800
+
+            def resolve_session_name(self):
+                return "hermes"
+
+        monkeypatch.setattr(honcho_cli, "_read_config", lambda: {"apiKey": "***"})
+        monkeypatch.setattr(honcho_cli, "_config_path", lambda: cfg_path)
+        monkeypatch.setattr(honcho_cli, "_local_config_path", lambda: cfg_path)
+        monkeypatch.setattr(honcho_cli, "_active_profile_name", lambda: "default")
+        monkeypatch.setattr(
+            "plugins.memory.honcho.client.HonchoClientConfig.from_global_config",
+            lambda host=None: FakeConfig(),
+        )
+        monkeypatch.setattr(
+            "plugins.memory.honcho.client.get_honcho_client",
+            lambda cfg: object(),
+        )
+
+        def _boom(hcfg, client):
+            raise RuntimeError("Invalid API key")
+
+        monkeypatch.setattr(honcho_cli, "_show_peer_cards", _boom)
+        monkeypatch.setitem(__import__("sys").modules, "honcho", SimpleNamespace())
+
+        honcho_cli.cmd_status(SimpleNamespace(all=False))
+
+        out = capsys.readouterr().out
+        assert "FAILED (Invalid API key)" in out
+        assert "Connection... OK" not in out
--- a/tests/honcho_plugin/test_client.py
+++ b/tests/honcho_plugin/test_client.py
@ -1,5 +1,6 @@
 """Tests for plugins/memory/honcho/client.py — Honcho client configuration."""

+import importlib.util
 import json
 import os
 from pathlib import Path
@ -25,6 +26,7 @@ class TestHonchoClientConfigDefaults:
        assert config.workspace_id == "hermes"
        assert config.api_key is None
        assert config.environment == "production"
+        assert config.timeout is None
        assert config.enabled is False
        assert config.save_messages is True
        assert config.session_strategy == "per-directory"
@ -76,6 +78,11 @@ class TestFromEnv:
        assert config.base_url == "http://localhost:8000"
        assert config.enabled is True

+    def test_reads_timeout_from_env(self):
+        with patch.dict(os.environ, {"HONCHO_TIMEOUT": "90"}, clear=True):
+            config = HonchoClientConfig.from_env()
+        assert config.timeout == 90.0
+

 class TestFromGlobalConfig:
    def test_missing_config_falls_back_to_env(self, tmp_path):
@ -87,10 +94,10 @@ class TestFromGlobalConfig:
        assert config.enabled is False
        assert config.api_key is None

-    def test_reads_full_config(self, tmp_path):
+    def test_reads_full_config(self, tmp_path, monkeypatch):
        config_file = tmp_path / "config.json"
        config_file.write_text(json.dumps({
-            "apiKey": "my-honcho-key",
+            "apiKey": "***",
            "workspace": "my-workspace",
            "environment": "staging",
            "peerName": "alice",
@ -108,9 +115,11 @@ class TestFromGlobalConfig:
                }
            }
        }))
+        # Isolate from real ~/.hermes/honcho.json
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "isolated"))

        config = HonchoClientConfig.from_global_config(config_path=config_file)
-        assert config.api_key == "my-honcho-key"
+        assert config.api_key == "***"
        # Host block workspace overrides root workspace
        assert config.workspace_id == "override-ws"
        assert config.ai_peer == "override-ai"
@ -154,10 +163,31 @@ class TestFromGlobalConfig:
    def test_session_strategy_default_from_global_config(self, tmp_path):
        """from_global_config with no sessionStrategy should match dataclass default."""
        config_file = tmp_path / "config.json"
-        config_file.write_text(json.dumps({"apiKey": "key"}))
+        config_file.write_text(json.dumps({"apiKey": "***"}))
        config = HonchoClientConfig.from_global_config(config_path=config_file)
        assert config.session_strategy == "per-directory"

+    def test_context_tokens_default_is_none(self, tmp_path):
+        """Default context_tokens should be None (uncapped) unless explicitly set."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"apiKey": "***"}))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.context_tokens is None
+
+    def test_context_tokens_explicit_sets_cap(self, tmp_path):
+        """Explicit contextTokens in config sets the cap."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"apiKey": "***", "contextTokens": 1200}))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.context_tokens == 1200
+
+    def test_context_tokens_explicit_overrides_default(self, tmp_path):
+        """Explicit contextTokens in config should override the default."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"apiKey": "***", "contextTokens": 2000}))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.context_tokens == 2000
+
    def test_context_tokens_host_block_wins(self, tmp_path):
        """Host block contextTokens should override root."""
        config_file = tmp_path / "config.json"
@ -232,6 +262,20 @@ class TestFromGlobalConfig:
        config = HonchoClientConfig.from_global_config(config_path=config_file)
        assert config.base_url == "http://root:9000"

+    def test_timeout_from_config_root(self, tmp_path):
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"timeout": 75}))
+
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.timeout == 75.0
+
+    def test_request_timeout_alias_from_config_root(self, tmp_path):
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"requestTimeout": "82.5"}))
+
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.timeout == 82.5
+

 class TestResolveSessionName:
    def test_manual_override(self):
@ -333,13 +377,14 @@ class TestResolveConfigPath:
        hermes_home.mkdir()
        local_cfg = hermes_home / "honcho.json"
        local_cfg.write_text(json.dumps({
-            "apiKey": "local-key",
+            "apiKey": "***",
            "workspace": "local-ws",
        }))

-        with patch.dict(os.environ, {"HERMES_HOME": str(hermes_home)}):
+        with patch.dict(os.environ, {"HERMES_HOME": str(hermes_home)}), \
+             patch.object(Path, "home", return_value=tmp_path):
            config = HonchoClientConfig.from_global_config()
-        assert config.api_key == "local-key"
+        assert config.api_key == "***"
        assert config.workspace_id == "local-ws"


@ -500,46 +545,115 @@ class TestObservationModeMigration:
        assert cfg.ai_observe_others is True


-class TestInitOnSessionStart:
-    """Tests for the initOnSessionStart config field."""
+class TestGetHonchoClient:
+    def teardown_method(self):
+        reset_honcho_client()

-    def test_default_is_false(self):
+    @pytest.mark.skipif(
+        not importlib.util.find_spec("honcho"),
+        reason="honcho SDK not installed"
+    )
+    def test_passes_timeout_from_config(self):
+        fake_honcho = MagicMock(name="Honcho")
+        cfg = HonchoClientConfig(
+            api_key="test-key",
+            timeout=91.0,
+            workspace_id="hermes",
+            environment="production",
+        )
+
+        with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho:
+            client = get_honcho_client(cfg)
+
+        assert client is fake_honcho
+        mock_honcho.assert_called_once()
+        assert mock_honcho.call_args.kwargs["timeout"] == 91.0
+
+    @pytest.mark.skipif(
+        not importlib.util.find_spec("honcho"),
+        reason="honcho SDK not installed"
+    )
+    def test_hermes_config_timeout_override_used_when_config_timeout_missing(self):
+        fake_honcho = MagicMock(name="Honcho")
+        cfg = HonchoClientConfig(
+            api_key="test-key",
+            workspace_id="hermes",
+            environment="production",
+        )
+
+        with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
+             patch("hermes_cli.config.load_config", return_value={"honcho": {"timeout": 88}}):
+            client = get_honcho_client(cfg)
+
+        assert client is fake_honcho
+        mock_honcho.assert_called_once()
+        assert mock_honcho.call_args.kwargs["timeout"] == 88.0
+
+    @pytest.mark.skipif(
+        not importlib.util.find_spec("honcho"),
+        reason="honcho SDK not installed"
+    )
+    def test_hermes_request_timeout_alias_used(self):
+        fake_honcho = MagicMock(name="Honcho")
+        cfg = HonchoClientConfig(
+            api_key="test-key",
+            workspace_id="hermes",
+            environment="production",
+        )
+
+        with patch("honcho.Honcho", return_value=fake_honcho) as mock_honcho, \
+             patch("hermes_cli.config.load_config", return_value={"honcho": {"request_timeout": "77.5"}}):
+            client = get_honcho_client(cfg)
+
+        assert client is fake_honcho
+        mock_honcho.assert_called_once()
+        assert mock_honcho.call_args.kwargs["timeout"] == 77.5
+
+
+class TestResolveSessionNameGatewayKey:
+    """Regression tests for gateway_session_key priority in resolve_session_name.
+
+    Ensures gateway platforms get stable per-chat Honcho sessions even when
+    sessionStrategy=per-session would otherwise create ephemeral sessions.
+    Regression: plugin refactor 924bc67e dropped gateway key plumbing.
+    """
+
+    def test_gateway_key_overrides_per_session_strategy(self):
+        """gateway_session_key must win over per-session session_id."""
+        config = HonchoClientConfig(session_strategy="per-session")
+        result = config.resolve_session_name(
+            session_id="20260412_171002_69bb38",
+            gateway_session_key="agent:main:telegram:dm:8439114563",
+        )
+        assert result == "agent-main-telegram-dm-8439114563"
+
+    def test_session_title_still_wins_over_gateway_key(self):
+        """Explicit /title remap takes priority over gateway_session_key."""
+        config = HonchoClientConfig(session_strategy="per-session")
+        result = config.resolve_session_name(
+            session_title="my-custom-title",
+            session_id="20260412_171002_69bb38",
+            gateway_session_key="agent:main:telegram:dm:8439114563",
+        )
+        assert result == "my-custom-title"
+
+    def test_per_session_fallback_without_gateway_key(self):
+        """Without gateway_session_key, per-session returns session_id (CLI path)."""
+        config = HonchoClientConfig(session_strategy="per-session")
+        result = config.resolve_session_name(
+            session_id="20260412_171002_69bb38",
+            gateway_session_key=None,
+        )
+        assert result == "20260412_171002_69bb38"
+
+    def test_gateway_key_sanitizes_special_chars(self):
+        """Colons and other non-alphanumeric chars are replaced with hyphens."""
        config = HonchoClientConfig()
-        assert config.init_on_session_start is False
-
-    def test_root_level_true(self, tmp_path):
-        cfg_file = tmp_path / "config.json"
-        cfg_file.write_text(json.dumps({
-            "apiKey": "k",
-            "initOnSessionStart": True,
-        }))
-        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
-        assert cfg.init_on_session_start is True
-
-    def test_host_block_overrides_root(self, tmp_path):
-        cfg_file = tmp_path / "config.json"
-        cfg_file.write_text(json.dumps({
-            "apiKey": "k",
-            "initOnSessionStart": True,
-            "hosts": {"hermes": {"initOnSessionStart": False}},
-        }))
-        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
-        assert cfg.init_on_session_start is False
-
-    def test_host_block_true_overrides_root_absent(self, tmp_path):
-        cfg_file = tmp_path / "config.json"
-        cfg_file.write_text(json.dumps({
-            "apiKey": "k",
-            "hosts": {"hermes": {"initOnSessionStart": True}},
-        }))
-        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
-        assert cfg.init_on_session_start is True
-
-    def test_absent_everywhere_defaults_false(self, tmp_path):
-        cfg_file = tmp_path / "config.json"
-        cfg_file.write_text(json.dumps({"apiKey": "k"}))
-        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
-        assert cfg.init_on_session_start is False
+        result = config.resolve_session_name(
+            gateway_session_key="agent:main:telegram:dm:8439114563",
+        )
+        assert result == "agent-main-telegram-dm-8439114563"
+        assert ":" not in result


 class TestResetHonchoClient:
@ -549,3 +663,91 @@ class TestResetHonchoClient:
        assert mod._honcho_client is not None
        reset_honcho_client()
        assert mod._honcho_client is None
+
+
+class TestDialecticDepthParsing:
+    """Tests for _parse_dialectic_depth and _parse_dialectic_depth_levels."""
+
+    def test_default_depth_is_1(self, tmp_path):
+        """Default dialecticDepth should be 1."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"apiKey": "***"}))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.dialectic_depth == 1
+
+    def test_depth_from_root(self, tmp_path):
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"apiKey": "***", "dialecticDepth": 2}))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.dialectic_depth == 2
+
+    def test_depth_host_block_wins(self, tmp_path):
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({
+            "apiKey": "***",
+            "dialecticDepth": 1,
+            "hosts": {"hermes": {"dialecticDepth": 3}},
+        }))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.dialectic_depth == 3
+
+    def test_depth_clamped_high(self, tmp_path):
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"apiKey": "***", "dialecticDepth": 10}))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.dialectic_depth == 3
+
+    def test_depth_clamped_low(self, tmp_path):
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"apiKey": "***", "dialecticDepth": -1}))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.dialectic_depth == 1
+
+    def test_depth_levels_default_none(self, tmp_path):
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"apiKey": "***"}))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.dialectic_depth_levels is None
+
+    def test_depth_levels_from_config(self, tmp_path):
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({
+            "apiKey": "***",
+            "dialecticDepth": 2,
+            "dialecticDepthLevels": ["minimal", "high"],
+        }))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.dialectic_depth_levels == ["minimal", "high"]
+
+    def test_depth_levels_padded_if_short(self, tmp_path):
+        """Array shorter than depth gets padded with 'low'."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({
+            "apiKey": "***",
+            "dialecticDepth": 3,
+            "dialecticDepthLevels": ["high"],
+        }))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.dialectic_depth_levels == ["high", "low", "low"]
+
+    def test_depth_levels_truncated_if_long(self, tmp_path):
+        """Array longer than depth gets truncated."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({
+            "apiKey": "***",
+            "dialecticDepth": 1,
+            "dialecticDepthLevels": ["high", "max", "medium"],
+        }))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.dialectic_depth_levels == ["high"]
+
+    def test_depth_levels_invalid_values_default_to_low(self, tmp_path):
+        """Invalid reasoning levels in the array fall back to 'low'."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({
+            "apiKey": "***",
+            "dialecticDepth": 2,
+            "dialecticDepthLevels": ["invalid", "high"],
+        }))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.dialectic_depth_levels == ["low", "high"]
--- a/tests/honcho_plugin/test_session.py
+++ b/tests/honcho_plugin/test_session.py
@ -205,27 +205,62 @@ class TestPeerLookupHelpers:

    def test_get_peer_card_uses_direct_peer_lookup(self):
        mgr, session = self._make_cached_manager()
-        user_peer = MagicMock()
-        user_peer.get_card.return_value = ["Name: Robert"]
-        mgr._get_or_create_peer = MagicMock(return_value=user_peer)
+        assistant_peer = MagicMock()
+        assistant_peer.get_card.return_value = ["Name: Robert"]
+        mgr._get_or_create_peer = MagicMock(return_value=assistant_peer)

        assert mgr.get_peer_card(session.key) == ["Name: Robert"]
-        user_peer.get_card.assert_called_once_with()
+        assistant_peer.get_card.assert_called_once_with(target=session.user_peer_id)

-    def test_search_context_uses_peer_context_response(self):
+    def test_search_context_uses_assistant_perspective_with_target(self):
        mgr, session = self._make_cached_manager()
-        user_peer = MagicMock()
-        user_peer.context.return_value = SimpleNamespace(
+        assistant_peer = MagicMock()
+        assistant_peer.context.return_value = SimpleNamespace(
            representation="Robert runs neuralancer",
            peer_card=["Location: Melbourne"],
        )
-        mgr._get_or_create_peer = MagicMock(return_value=user_peer)
+        mgr._get_or_create_peer = MagicMock(return_value=assistant_peer)

        result = mgr.search_context(session.key, "neuralancer")

        assert "Robert runs neuralancer" in result
        assert "- Location: Melbourne" in result
-        user_peer.context.assert_called_once_with(search_query="neuralancer")
+        assistant_peer.context.assert_called_once_with(
+            target=session.user_peer_id,
+            search_query="neuralancer",
+        )
+
+    def test_search_context_unified_mode_uses_user_self_context(self):
+        mgr, session = self._make_cached_manager()
+        mgr._ai_observe_others = False
+        user_peer = MagicMock()
+        user_peer.context.return_value = SimpleNamespace(
+            representation="Unified self context",
+            peer_card=["Name: Robert"],
+        )
+        mgr._get_or_create_peer = MagicMock(return_value=user_peer)
+
+        result = mgr.search_context(session.key, "self")
+
+        assert "Unified self context" in result
+        user_peer.context.assert_called_once_with(search_query="self")
+
+    def test_search_context_accepts_explicit_ai_peer_id(self):
+        mgr, session = self._make_cached_manager()
+        ai_peer = MagicMock()
+        ai_peer.context.return_value = SimpleNamespace(
+            representation="Assistant self context",
+            peer_card=["Role: Assistant"],
+        )
+        mgr._get_or_create_peer = MagicMock(return_value=ai_peer)
+
+        result = mgr.search_context(session.key, "assistant", peer=session.assistant_peer_id)
+
+        assert "Assistant self context" in result
+        ai_peer.context.assert_called_once_with(
+            target=session.assistant_peer_id,
+            search_query="assistant",
+        )

    def test_get_prefetch_context_fetches_user_and_ai_from_peer_api(self):
        mgr, session = self._make_cached_manager()
@ -235,9 +270,15 @@ class TestPeerLookupHelpers:
            peer_card=["Name: Robert"],
        )
        ai_peer = MagicMock()
-        ai_peer.context.return_value = SimpleNamespace(
-            representation="AI representation",
-            peer_card=["Owner: Robert"],
+        ai_peer.context.side_effect = lambda **kwargs: SimpleNamespace(
+            representation=(
+                "AI representation" if kwargs.get("target") == session.assistant_peer_id
+                else "Mixed representation"
+            ),
+            peer_card=(
+                ["Role: Assistant"] if kwargs.get("target") == session.assistant_peer_id
+                else ["Name: Robert"]
+            ),
        )
        mgr._get_or_create_peer = MagicMock(side_effect=[user_peer, ai_peer])

@ -247,17 +288,23 @@ class TestPeerLookupHelpers:
            "representation": "User representation",
            "card": "Name: Robert",
            "ai_representation": "AI representation",
-            "ai_card": "Owner: Robert",
+            "ai_card": "Role: Assistant",
        }
-        user_peer.context.assert_called_once_with()
-        ai_peer.context.assert_called_once_with()
+        user_peer.context.assert_called_once_with(target=session.user_peer_id)
+        ai_peer.context.assert_called_once_with(target=session.assistant_peer_id)

    def test_get_ai_representation_uses_peer_api(self):
        mgr, session = self._make_cached_manager()
        ai_peer = MagicMock()
-        ai_peer.context.return_value = SimpleNamespace(
-            representation="AI representation",
-            peer_card=["Owner: Robert"],
+        ai_peer.context.side_effect = lambda **kwargs: SimpleNamespace(
+            representation=(
+                "AI representation" if kwargs.get("target") == session.assistant_peer_id
+                else "Mixed representation"
+            ),
+            peer_card=(
+                ["Role: Assistant"] if kwargs.get("target") == session.assistant_peer_id
+                else ["Name: Robert"]
+            ),
        )
        mgr._get_or_create_peer = MagicMock(return_value=ai_peer)

@ -265,9 +312,167 @@ class TestPeerLookupHelpers:

        assert result == {
            "representation": "AI representation",
-            "card": "Owner: Robert",
+            "card": "Role: Assistant",
        }
-        ai_peer.context.assert_called_once_with()
+        ai_peer.context.assert_called_once_with(target=session.assistant_peer_id)
+
+    def test_create_conclusion_defaults_to_user_target(self):
+        mgr, session = self._make_cached_manager()
+        assistant_peer = MagicMock()
+        scope = MagicMock()
+        assistant_peer.conclusions_of.return_value = scope
+        mgr._get_or_create_peer = MagicMock(return_value=assistant_peer)
+
+        ok = mgr.create_conclusion(session.key, "User prefers dark mode")
+
+        assert ok is True
+        assistant_peer.conclusions_of.assert_called_once_with(session.user_peer_id)
+        scope.create.assert_called_once_with([{
+            "content": "User prefers dark mode",
+            "session_id": session.honcho_session_id,
+        }])
+
+    def test_create_conclusion_can_target_ai_peer(self):
+        mgr, session = self._make_cached_manager()
+        assistant_peer = MagicMock()
+        scope = MagicMock()
+        assistant_peer.conclusions_of.return_value = scope
+        mgr._get_or_create_peer = MagicMock(return_value=assistant_peer)
+
+        ok = mgr.create_conclusion(session.key, "Assistant prefers terse summaries", peer="ai")
+
+        assert ok is True
+        assistant_peer.conclusions_of.assert_called_once_with(session.assistant_peer_id)
+        scope.create.assert_called_once_with([{
+            "content": "Assistant prefers terse summaries",
+            "session_id": session.honcho_session_id,
+        }])
+
+    def test_create_conclusion_accepts_explicit_user_peer_id(self):
+        mgr, session = self._make_cached_manager()
+        assistant_peer = MagicMock()
+        scope = MagicMock()
+        assistant_peer.conclusions_of.return_value = scope
+        mgr._get_or_create_peer = MagicMock(return_value=assistant_peer)
+
+        ok = mgr.create_conclusion(session.key, "Robert prefers vinyl", peer=session.user_peer_id)
+
+        assert ok is True
+        assistant_peer.conclusions_of.assert_called_once_with(session.user_peer_id)
+        scope.create.assert_called_once_with([{
+            "content": "Robert prefers vinyl",
+            "session_id": session.honcho_session_id,
+        }])
+
+
+class TestConcludeToolDispatch:
+    def test_honcho_conclude_defaults_to_user_peer(self):
+        provider = HonchoMemoryProvider()
+        provider._session_initialized = True
+        provider._session_key = "telegram:123"
+        provider._manager = MagicMock()
+        provider._manager.create_conclusion.return_value = True
+
+        result = provider.handle_tool_call(
+            "honcho_conclude",
+            {"conclusion": "User prefers dark mode"},
+        )
+
+        assert "Conclusion saved for user" in result
+        provider._manager.create_conclusion.assert_called_once_with(
+            "telegram:123",
+            "User prefers dark mode",
+            peer="user",
+        )
+
+    def test_honcho_conclude_can_target_ai_peer(self):
+        provider = HonchoMemoryProvider()
+        provider._session_initialized = True
+        provider._session_key = "telegram:123"
+        provider._manager = MagicMock()
+        provider._manager.create_conclusion.return_value = True
+
+        result = provider.handle_tool_call(
+            "honcho_conclude",
+            {"conclusion": "Assistant likes terse replies", "peer": "ai"},
+        )
+
+        assert "Conclusion saved for ai" in result
+        provider._manager.create_conclusion.assert_called_once_with(
+            "telegram:123",
+            "Assistant likes terse replies",
+            peer="ai",
+        )
+
+    def test_honcho_profile_can_target_explicit_peer_id(self):
+        provider = HonchoMemoryProvider()
+        provider._session_initialized = True
+        provider._session_key = "telegram:123"
+        provider._manager = MagicMock()
+        provider._manager.get_peer_card.return_value = ["Role: Assistant"]
+
+        result = provider.handle_tool_call(
+            "honcho_profile",
+            {"peer": "hermes"},
+        )
+
+        assert "Role: Assistant" in result
+        provider._manager.get_peer_card.assert_called_once_with("telegram:123", peer="hermes")
+
+    def test_honcho_search_can_target_explicit_peer_id(self):
+        provider = HonchoMemoryProvider()
+        provider._session_initialized = True
+        provider._session_key = "telegram:123"
+        provider._manager = MagicMock()
+        provider._manager.search_context.return_value = "Assistant self context"
+
+        result = provider.handle_tool_call(
+            "honcho_search",
+            {"query": "assistant", "peer": "hermes"},
+        )
+
+        assert "Assistant self context" in result
+        provider._manager.search_context.assert_called_once_with(
+            "telegram:123",
+            "assistant",
+            max_tokens=800,
+            peer="hermes",
+        )
+
+    def test_honcho_reasoning_can_target_explicit_peer_id(self):
+        provider = HonchoMemoryProvider()
+        provider._session_initialized = True
+        provider._session_key = "telegram:123"
+        provider._manager = MagicMock()
+        provider._manager.dialectic_query.return_value = "Assistant answer"
+
+        result = provider.handle_tool_call(
+            "honcho_reasoning",
+            {"query": "who are you", "peer": "hermes"},
+        )
+
+        assert "Assistant answer" in result
+        provider._manager.dialectic_query.assert_called_once_with(
+            "telegram:123",
+            "who are you",
+            reasoning_level=None,
+            peer="hermes",
+        )
+
+    def test_honcho_conclude_missing_both_params_returns_error(self):
+        """Calling honcho_conclude with neither conclusion nor delete_id returns a tool error."""
+        import json
+        provider = HonchoMemoryProvider()
+        provider._session_initialized = True
+        provider._session_key = "telegram:123"
+        provider._manager = MagicMock()
+
+        result = provider.handle_tool_call("honcho_conclude", {})
+
+        parsed = json.loads(result)
+        assert "error" in parsed or "Missing required" in parsed.get("result", "")
+        provider._manager.create_conclusion.assert_not_called()
+        provider._manager.delete_conclusion.assert_not_called()


 # ---------------------------------------------------------------------------
@ -366,6 +571,54 @@ class TestToolsModeInitBehavior:
        assert cfg.peer_name == "8439114563"


+class TestPerSessionMigrateGuard:
+    """Verify migrate_memory_files is skipped under per-session strategy.
+
+    per-session creates a fresh Honcho session every Hermes run. Uploading
+    MEMORY.md/USER.md/SOUL.md to each short-lived session floods the backend
+    with duplicate content. The guard was added to prevent orphan sessions
+    containing only <prior_memory_file> wrappers.
+    """
+
+    def _make_provider_with_strategy(self, strategy, init_on_session_start=True):
+        """Create a HonchoMemoryProvider and track migrate_memory_files calls."""
+        from plugins.memory.honcho.client import HonchoClientConfig
+        from unittest.mock import patch, MagicMock
+
+        cfg = HonchoClientConfig(
+            api_key="test-key",
+            enabled=True,
+            recall_mode="tools",
+            init_on_session_start=init_on_session_start,
+            session_strategy=strategy,
+        )
+
+        provider = HonchoMemoryProvider()
+
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []  # empty = new session → triggers migration path
+        mock_manager.get_or_create.return_value = mock_session
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-session-001")
+
+        return provider, mock_manager
+
+    def test_migrate_skipped_for_per_session(self):
+        """per-session strategy must NOT call migrate_memory_files."""
+        _, mock_manager = self._make_provider_with_strategy("per-session")
+        mock_manager.migrate_memory_files.assert_not_called()
+
+    def test_migrate_runs_for_per_directory(self):
+        """per-directory strategy with empty session SHOULD call migrate_memory_files."""
+        _, mock_manager = self._make_provider_with_strategy("per-directory")
+        mock_manager.migrate_memory_files.assert_called_once()
+
+
 class TestChunkMessage:
    def test_short_message_single_chunk(self):
        result = HonchoMemoryProvider._chunk_message("hello world", 100)
@ -420,6 +673,60 @@ class TestChunkMessage:
            assert len(chunk) <= 25000


+# ---------------------------------------------------------------------------
+# Context token budget enforcement
+# ---------------------------------------------------------------------------
+
+
+class TestTruncateToBudget:
+    def test_truncates_oversized_context(self):
+        """Text exceeding context_tokens budget is truncated at a word boundary."""
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        provider = HonchoMemoryProvider()
+        provider._config = HonchoClientConfig(context_tokens=10)
+
+        long_text = "word " * 200  # ~1000 chars, well over 10*4=40 char budget
+        result = provider._truncate_to_budget(long_text)
+
+        assert len(result) <= 50  # budget_chars + ellipsis + word boundary slack
+        assert result.endswith(" …")
+
+    def test_no_truncation_within_budget(self):
+        """Text within budget passes through unchanged."""
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        provider = HonchoMemoryProvider()
+        provider._config = HonchoClientConfig(context_tokens=1000)
+
+        short_text = "Name: Robert, Location: Melbourne"
+        assert provider._truncate_to_budget(short_text) == short_text
+
+    def test_no_truncation_when_context_tokens_none(self):
+        """When context_tokens is None (explicit opt-out), no truncation."""
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        provider = HonchoMemoryProvider()
+        provider._config = HonchoClientConfig(context_tokens=None)
+
+        long_text = "word " * 500
+        assert provider._truncate_to_budget(long_text) == long_text
+
+    def test_context_tokens_cap_bounds_prefetch(self):
+        """With an explicit token budget, oversized prefetch is bounded."""
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        provider = HonchoMemoryProvider()
+        provider._config = HonchoClientConfig(context_tokens=1200)
+
+        # Simulate a massive representation (10k chars)
+        huge_text = "x" * 10000
+        result = provider._truncate_to_budget(huge_text)
+
+        # 1200 tokens * 4 chars = 4800 chars + " …"
+        assert len(result) <= 4805
+
+
 # ---------------------------------------------------------------------------
 # Dialectic input guard
 # ---------------------------------------------------------------------------
@ -452,3 +759,387 @@ class TestDialecticInputGuard:
        # The query passed to chat() should be truncated
        actual_query = mock_peer.chat.call_args[0][0]
        assert len(actual_query) <= 100
+
+
+# ---------------------------------------------------------------------------
+
+
+class TestDialecticCadenceDefaults:
+    """Regression tests for dialectic_cadence default value."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        """Create a HonchoMemoryProvider with mocked dependencies."""
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid")
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-session-001")
+
+        return provider
+
+    def test_default_is_3(self):
+        """Default dialectic_cadence should be 3 to avoid per-turn LLM calls."""
+        provider = self._make_provider()
+        assert provider._dialectic_cadence == 3
+
+    def test_config_override(self):
+        """dialecticCadence from config overrides the default."""
+        provider = self._make_provider(cfg_extra={"raw": {"dialecticCadence": 5}})
+        assert provider._dialectic_cadence == 5
+
+
+class TestBaseContextSummary:
+    """Base context injection should include session summary when available."""
+
+    def test_format_includes_summary(self):
+        """Session summary should appear first in the formatted context."""
+        provider = HonchoMemoryProvider()
+        ctx = {
+            "summary": "Testing Honcho tools and dialectic depth.",
+            "representation": "Eri is a developer.",
+            "card": "Name: Eri Barrett",
+        }
+        formatted = provider._format_first_turn_context(ctx)
+        assert "## Session Summary" in formatted
+        assert formatted.index("Session Summary") < formatted.index("User Representation")
+
+    def test_format_without_summary(self):
+        """No summary key means no summary section."""
+        provider = HonchoMemoryProvider()
+        ctx = {"representation": "Eri is a developer.", "card": "Name: Eri"}
+        formatted = provider._format_first_turn_context(ctx)
+        assert "Session Summary" not in formatted
+        assert "User Representation" in formatted
+
+    def test_format_empty_summary_skipped(self):
+        """Empty summary string should not produce a section."""
+        provider = HonchoMemoryProvider()
+        ctx = {"summary": "", "representation": "rep", "card": "card"}
+        formatted = provider._format_first_turn_context(ctx)
+        assert "Session Summary" not in formatted
+
+
+class TestDialecticDepth:
+    """Tests for the dialecticDepth multi-pass system."""
+
+    @staticmethod
+    def _make_provider(cfg_extra=None):
+        from unittest.mock import patch, MagicMock
+        from plugins.memory.honcho.client import HonchoClientConfig
+
+        defaults = dict(api_key="test-key", enabled=True, recall_mode="hybrid")
+        if cfg_extra:
+            defaults.update(cfg_extra)
+        cfg = HonchoClientConfig(**defaults)
+        provider = HonchoMemoryProvider()
+        mock_manager = MagicMock()
+        mock_session = MagicMock()
+        mock_session.messages = []
+        mock_manager.get_or_create.return_value = mock_session
+
+        with patch("plugins.memory.honcho.client.HonchoClientConfig.from_global_config", return_value=cfg), \
+             patch("plugins.memory.honcho.client.get_honcho_client", return_value=MagicMock()), \
+             patch("plugins.memory.honcho.session.HonchoSessionManager", return_value=mock_manager), \
+             patch("hermes_constants.get_hermes_home", return_value=MagicMock()):
+            provider.initialize(session_id="test-session-001")
+
+        return provider
+
+    def test_default_depth_is_1(self):
+        """Default dialecticDepth should be 1 — single .chat() call."""
+        provider = self._make_provider()
+        assert provider._dialectic_depth == 1
+
+    def test_depth_from_config(self):
+        """dialecticDepth from config sets the depth."""
+        provider = self._make_provider(cfg_extra={"dialectic_depth": 2})
+        assert provider._dialectic_depth == 2
+
+    def test_depth_clamped_to_3(self):
+        """dialecticDepth > 3 gets clamped to 3."""
+        provider = self._make_provider(cfg_extra={"dialectic_depth": 7})
+        assert provider._dialectic_depth == 3
+
+    def test_depth_clamped_to_1(self):
+        """dialecticDepth < 1 gets clamped to 1."""
+        provider = self._make_provider(cfg_extra={"dialectic_depth": 0})
+        assert provider._dialectic_depth == 1
+
+    def test_depth_levels_from_config(self):
+        """dialecticDepthLevels array is read from config."""
+        provider = self._make_provider(cfg_extra={
+            "dialectic_depth": 2,
+            "dialectic_depth_levels": ["minimal", "high"],
+        })
+        assert provider._dialectic_depth_levels == ["minimal", "high"]
+
+    def test_depth_levels_none_by_default(self):
+        """When dialecticDepthLevels is not configured, it's None."""
+        provider = self._make_provider()
+        assert provider._dialectic_depth_levels is None
+
+    def test_resolve_pass_level_uses_depth_levels(self):
+        """Per-pass levels from dialecticDepthLevels override proportional."""
+        provider = self._make_provider(cfg_extra={
+            "dialectic_depth": 2,
+            "dialectic_depth_levels": ["minimal", "high"],
+        })
+        assert provider._resolve_pass_level(0) == "minimal"
+        assert provider._resolve_pass_level(1) == "high"
+
+    def test_resolve_pass_level_proportional_depth_1(self):
+        """Depth 1 pass 0 uses the base reasoning level."""
+        provider = self._make_provider(cfg_extra={
+            "dialectic_depth": 1,
+            "dialectic_reasoning_level": "medium",
+        })
+        assert provider._resolve_pass_level(0) == "medium"
+
+    def test_resolve_pass_level_proportional_depth_2(self):
+        """Depth 2: pass 0 is minimal, pass 1 is base level."""
+        provider = self._make_provider(cfg_extra={
+            "dialectic_depth": 2,
+            "dialectic_reasoning_level": "high",
+        })
+        assert provider._resolve_pass_level(0) == "minimal"
+        assert provider._resolve_pass_level(1) == "high"
+
+    def test_cold_start_prompt(self):
+        """Cold start (no base context) uses general user query."""
+        provider = self._make_provider()
+        prompt = provider._build_dialectic_prompt(0, [], is_cold=True)
+        assert "preferences" in prompt.lower()
+        assert "session" not in prompt.lower()
+
+    def test_warm_session_prompt(self):
+        """Warm session (has context) uses session-scoped query."""
+        provider = self._make_provider()
+        prompt = provider._build_dialectic_prompt(0, [], is_cold=False)
+        assert "session" in prompt.lower()
+        assert "current conversation" in prompt.lower()
+
+    def test_signal_sufficient_short_response(self):
+        """Short responses are not sufficient signal."""
+        assert not HonchoMemoryProvider._signal_sufficient("ok")
+        assert not HonchoMemoryProvider._signal_sufficient("")
+        assert not HonchoMemoryProvider._signal_sufficient(None)
+
+    def test_signal_sufficient_structured_response(self):
+        """Structured responses with bullets/headers are sufficient."""
+        result = "## Current State\n- Working on Honcho PR\n- Testing dialectic depth\n" + "x" * 50
+        assert HonchoMemoryProvider._signal_sufficient(result)
+
+    def test_signal_sufficient_long_unstructured(self):
+        """Long responses are sufficient even without structure."""
+        assert HonchoMemoryProvider._signal_sufficient("a" * 301)
+
+    def test_run_dialectic_depth_single_pass(self):
+        """Depth 1 makes exactly one .chat() call."""
+        from unittest.mock import MagicMock
+        provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
+        provider._manager = MagicMock()
+        provider._manager.dialectic_query.return_value = "user prefers zero-fluff"
+        provider._session_key = "test"
+        provider._base_context_cache = None  # cold start
+
+        result = provider._run_dialectic_depth("hello")
+        assert result == "user prefers zero-fluff"
+        assert provider._manager.dialectic_query.call_count == 1
+
+    def test_run_dialectic_depth_two_passes(self):
+        """Depth 2 makes two .chat() calls when pass 1 signal is weak."""
+        from unittest.mock import MagicMock
+        provider = self._make_provider(cfg_extra={"dialectic_depth": 2})
+        provider._manager = MagicMock()
+        provider._manager.dialectic_query.side_effect = [
+            "thin response",  # pass 0: weak signal
+            "## Synthesis\n- Grounded in evidence\n- Current PR work\n" + "x" * 100,  # pass 1: strong
+        ]
+        provider._session_key = "test"
+        provider._base_context_cache = "existing context"
+
+        result = provider._run_dialectic_depth("test query")
+        assert provider._manager.dialectic_query.call_count == 2
+        assert "Synthesis" in result
+
+    def test_first_turn_runs_dialectic_synchronously(self):
+        """First turn should fire the dialectic synchronously (cold start)."""
+        from unittest.mock import MagicMock, patch
+        provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
+        provider._manager = MagicMock()
+        provider._manager.dialectic_query.return_value = "cold start synthesis"
+        provider._manager.get_prefetch_context.return_value = None
+        provider._manager.pop_context_result.return_value = None
+        provider._session_key = "test"
+        provider._base_context_cache = ""  # cold start
+        provider._last_dialectic_turn = -999  # never fired
+
+        result = provider.prefetch("hello world")
+        assert "cold start synthesis" in result
+        assert provider._manager.dialectic_query.call_count == 1
+        # After first-turn sync, _last_dialectic_turn should be updated
+        assert provider._last_dialectic_turn != -999
+
+    def test_first_turn_dialectic_does_not_double_fire(self):
+        """After first-turn sync dialectic, queue_prefetch should skip (cadence)."""
+        from unittest.mock import MagicMock
+        provider = self._make_provider(cfg_extra={"dialectic_depth": 1})
+        provider._manager = MagicMock()
+        provider._manager.dialectic_query.return_value = "cold start synthesis"
+        provider._manager.get_prefetch_context.return_value = None
+        provider._manager.pop_context_result.return_value = None
+        provider._session_key = "test"
+        provider._base_context_cache = ""
+        provider._last_dialectic_turn = -999
+        provider._turn_count = 0
+
+        # First turn fires sync dialectic
+        provider.prefetch("hello")
+        assert provider._manager.dialectic_query.call_count == 1
+
+        # Now queue_prefetch on same turn should skip (cadence: 0 - 0 < 3)
+        provider._manager.dialectic_query.reset_mock()
+        provider.queue_prefetch("hello")
+        assert provider._manager.dialectic_query.call_count == 0
+
+    def test_run_dialectic_depth_bails_early_on_strong_signal(self):
+        """Depth 2 skips pass 1 when pass 0 returns strong signal."""
+        from unittest.mock import MagicMock
+        provider = self._make_provider(cfg_extra={"dialectic_depth": 2})
+        provider._manager = MagicMock()
+        provider._manager.dialectic_query.return_value = (
+            "## Full Assessment\n- Strong structured response\n- With evidence\n" + "x" * 200
+        )
+        provider._session_key = "test"
+        provider._base_context_cache = "existing context"
+
+        result = provider._run_dialectic_depth("test query")
+        # Only 1 call because pass 0 had sufficient signal
+        assert provider._manager.dialectic_query.call_count == 1
+
+
+# ---------------------------------------------------------------------------
+# set_peer_card None guard
+# ---------------------------------------------------------------------------
+
+
+class TestSetPeerCardNoneGuard:
+    """set_peer_card must return None (not raise) when peer ID cannot be resolved."""
+
+    def _make_manager(self):
+        from plugins.memory.honcho.client import HonchoClientConfig
+        from plugins.memory.honcho.session import HonchoSessionManager
+
+        cfg = HonchoClientConfig(api_key="test-key", enabled=True)
+        mgr = HonchoSessionManager.__new__(HonchoSessionManager)
+        mgr._cache = {}
+        mgr._sessions_cache = {}
+        mgr._config = cfg
+        return mgr
+
+    def test_returns_none_when_peer_resolves_to_none(self):
+        """set_peer_card returns None when _resolve_peer_id returns None."""
+        from unittest.mock import patch
+        mgr = self._make_manager()
+
+        session = HonchoSession(
+            key="test",
+            honcho_session_id="sid",
+            user_peer_id="user-peer",
+            assistant_peer_id="ai-peer",
+        )
+        mgr._cache["test"] = session
+
+        with patch.object(mgr, "_resolve_peer_id", return_value=None):
+            result = mgr.set_peer_card("test", ["fact 1", "fact 2"], peer="ghost")
+
+        assert result is None
+
+    def test_returns_none_when_session_missing(self):
+        """set_peer_card returns None when session key is not in cache."""
+        mgr = self._make_manager()
+        result = mgr.set_peer_card("nonexistent", ["fact"], peer="user")
+        assert result is None
+
+
+# ---------------------------------------------------------------------------
+# get_session_context cache-miss fallback respects peer param
+# ---------------------------------------------------------------------------
+
+
+class TestGetSessionContextFallback:
+    """get_session_context fallback must honour the peer param when honcho_session is absent."""
+
+    def _make_manager_with_session(self, user_peer_id="user-peer", assistant_peer_id="ai-peer"):
+        from plugins.memory.honcho.client import HonchoClientConfig
+        from plugins.memory.honcho.session import HonchoSessionManager
+
+        cfg = HonchoClientConfig(api_key="test-key", enabled=True)
+        mgr = HonchoSessionManager.__new__(HonchoSessionManager)
+        mgr._cache = {}
+        mgr._sessions_cache = {}
+        mgr._config = cfg
+        mgr._dialectic_dynamic = True
+        mgr._dialectic_reasoning_level = "low"
+        mgr._dialectic_max_input_chars = 10000
+        mgr._ai_observe_others = True
+
+        session = HonchoSession(
+            key="test",
+            honcho_session_id="sid-missing-from-sessions-cache",
+            user_peer_id=user_peer_id,
+            assistant_peer_id=assistant_peer_id,
+        )
+        mgr._cache["test"] = session
+        # Deliberately NOT adding to _sessions_cache to trigger fallback path
+        return mgr
+
+    def test_fallback_uses_user_peer_for_user(self):
+        """On cache miss, peer='user' fetches user peer context."""
+        mgr = self._make_manager_with_session()
+        fetch_calls = []
+
+        def _fake_fetch(peer_id, search_query=None, *, target=None):
+            fetch_calls.append((peer_id, target))
+            return {"representation": "user rep", "card": []}
+
+        mgr._fetch_peer_context = _fake_fetch
+
+        mgr.get_session_context("test", peer="user")
+
+        assert len(fetch_calls) == 1
+        peer_id, target = fetch_calls[0]
+        assert peer_id == "user-peer"
+        assert target == "user-peer"
+
+    def test_fallback_uses_ai_peer_for_ai(self):
+        """On cache miss, peer='ai' fetches assistant peer context, not user."""
+        mgr = self._make_manager_with_session()
+        fetch_calls = []
+
+        def _fake_fetch(peer_id, search_query=None, *, target=None):
+            fetch_calls.append((peer_id, target))
+            return {"representation": "ai rep", "card": []}
+
+        mgr._fetch_peer_context = _fake_fetch
+
+        mgr.get_session_context("test", peer="ai")
+
+        assert len(fetch_calls) == 1
+        peer_id, target = fetch_calls[0]
+        assert peer_id == "ai-peer", f"expected ai-peer, got {peer_id}"
+        assert target == "ai-peer"
--- a/tests/run_agent/test_concurrent_interrupt.py
+++ b/tests/run_agent/test_concurrent_interrupt.py
@ -0,0 +1,139 @@
+"""Tests for interrupt handling in concurrent tool execution."""
+
+import concurrent.futures
+import threading
+import time
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _isolate_hermes(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
+    (tmp_path / ".hermes").mkdir(exist_ok=True)
+
+
+def _make_agent(monkeypatch):
+    """Create a minimal AIAgent-like object with just the methods under test."""
+    monkeypatch.setenv("OPENROUTER_API_KEY", "")
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "")
+    # Avoid full AIAgent init — just import the class and build a stub
+    import run_agent as _ra
+
+    class _Stub:
+        _interrupt_requested = False
+        log_prefix = ""
+        quiet_mode = True
+        verbose_logging = False
+        log_prefix_chars = 200
+        _checkpoint_mgr = MagicMock(enabled=False)
+        _subdirectory_hints = MagicMock()
+        tool_progress_callback = None
+        tool_start_callback = None
+        tool_complete_callback = None
+        _todo_store = MagicMock()
+        _session_db = None
+        valid_tool_names = set()
+        _turns_since_memory = 0
+        _iters_since_skill = 0
+        _current_tool = None
+        _last_activity = 0
+        _print_fn = print
+
+        def _touch_activity(self, desc):
+            self._last_activity = time.time()
+
+        def _vprint(self, msg, force=False):
+            pass
+
+        def _safe_print(self, msg):
+            pass
+
+        def _should_emit_quiet_tool_messages(self):
+            return False
+
+        def _should_start_quiet_spinner(self):
+            return False
+
+        def _has_stream_consumers(self):
+            return False
+
+    stub = _Stub()
+    # Bind the real methods
+    stub._execute_tool_calls_concurrent = _ra.AIAgent._execute_tool_calls_concurrent.__get__(stub)
+    stub._invoke_tool = MagicMock(side_effect=lambda *a, **kw: '{"ok": true}')
+    return stub
+
+
+class _FakeToolCall:
+    def __init__(self, name, args="{}", call_id="tc_1"):
+        self.function = MagicMock(name=name, arguments=args)
+        self.function.name = name
+        self.id = call_id
+
+
+class _FakeAssistantMsg:
+    def __init__(self, tool_calls):
+        self.tool_calls = tool_calls
+
+
+def test_concurrent_interrupt_cancels_pending(monkeypatch):
+    """When _interrupt_requested is set during concurrent execution,
+    the wait loop should exit early and cancelled tools get interrupt messages."""
+    agent = _make_agent(monkeypatch)
+
+    # Create a tool that blocks until interrupted
+    barrier = threading.Event()
+
+    original_invoke = agent._invoke_tool
+
+    def slow_tool(name, args, task_id, call_id=None):
+        if name == "slow_one":
+            # Block until the test sets the interrupt
+            barrier.wait(timeout=10)
+            return '{"slow": true}'
+        return '{"fast": true}'
+
+    agent._invoke_tool = MagicMock(side_effect=slow_tool)
+
+    tc1 = _FakeToolCall("fast_one", call_id="tc_fast")
+    tc2 = _FakeToolCall("slow_one", call_id="tc_slow")
+    msg = _FakeAssistantMsg([tc1, tc2])
+    messages = []
+
+    def _set_interrupt_after_delay():
+        time.sleep(0.3)
+        agent._interrupt_requested = True
+        barrier.set()  # unblock the slow tool
+
+    t = threading.Thread(target=_set_interrupt_after_delay)
+    t.start()
+
+    agent._execute_tool_calls_concurrent(msg, messages, "test_task")
+    t.join()
+
+    # Both tools should have results in messages
+    assert len(messages) == 2
+    # The interrupt was detected
+    assert agent._interrupt_requested is True
+
+
+def test_concurrent_preflight_interrupt_skips_all(monkeypatch):
+    """When _interrupt_requested is already set before concurrent execution,
+    all tools are skipped with cancellation messages."""
+    agent = _make_agent(monkeypatch)
+    agent._interrupt_requested = True
+
+    tc1 = _FakeToolCall("tool_a", call_id="tc_a")
+    tc2 = _FakeToolCall("tool_b", call_id="tc_b")
+    msg = _FakeAssistantMsg([tc1, tc2])
+    messages = []
+
+    agent._execute_tool_calls_concurrent(msg, messages, "test_task")
+
+    assert len(messages) == 2
+    assert "skipped due to user interrupt" in messages[0]["content"]
+    assert "skipped due to user interrupt" in messages[1]["content"]
+    # _invoke_tool should never have been called
+    agent._invoke_tool.assert_not_called()
--- a/tests/run_agent/test_invalid_context_length_warning.py
+++ b/tests/run_agent/test_invalid_context_length_warning.py
@ -9,6 +9,8 @@ def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-
    if custom_providers is not None:
        cfg["custom_providers"] = custom_providers

+    base_url = model_cfg.get("base_url", "")
+
    with (
        patch("hermes_cli.config.load_config", return_value=cfg),
        patch("agent.model_metadata.get_model_context_length", return_value=128_000),
@ -21,6 +23,7 @@ def _build_agent(model_cfg, custom_providers=None, model="anthropic/claude-opus-
        agent = AIAgent(
            model=model,
            api_key="test-key-1234567890",
+            base_url=base_url,
            quiet_mode=True,
            skip_context_files=True,
            skip_memory=True,
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@ -805,7 +805,10 @@ class TestCodexReasoningPreflight:
        reasoning_items = [i for i in normalized if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 1
        assert reasoning_items[0]["encrypted_content"] == "abc123encrypted"
-        assert reasoning_items[0]["id"] == "r_001"
+        # Note: "id" is intentionally excluded from normalized output —
+        # with store=False the API returns 404 on server-side id resolution.
+        # The id is only used for local deduplication via seen_ids.
+        assert "id" not in reasoning_items[0]
        assert reasoning_items[0]["summary"] == [{"type": "summary_text", "text": "Thinking about it"}]

    def test_reasoning_item_without_id(self, monkeypatch):
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -928,6 +928,7 @@ class TestBuildApiKwargs:
        kwargs = agent._build_api_kwargs(messages)
        assert kwargs["max_tokens"] == 4096

+
    def test_qwen_portal_formats_messages_and_metadata(self, agent):
        agent.base_url = "https://portal.qwen.ai/v1"
        agent._base_url_lower = agent.base_url.lower()
@ -984,6 +985,46 @@ class TestBuildApiKwargs:
        kwargs = agent._build_api_kwargs(messages)
        assert kwargs["max_tokens"] == 65536

+    def test_ollama_think_false_on_effort_none(self, agent):
+        """Custom (Ollama) provider with effort=none should inject think=false."""
+        agent.provider = "custom"
+        agent.base_url = "http://localhost:11434/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.reasoning_config = {"effort": "none"}
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs.get("extra_body", {}).get("think") is False
+
+    def test_ollama_think_false_on_enabled_false(self, agent):
+        """Custom (Ollama) provider with enabled=false should inject think=false."""
+        agent.provider = "custom"
+        agent.base_url = "http://localhost:11434/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.reasoning_config = {"enabled": False}
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs.get("extra_body", {}).get("think") is False
+
+    def test_ollama_no_think_param_when_reasoning_enabled(self, agent):
+        """Custom provider with reasoning enabled should NOT inject think=false."""
+        agent.provider = "custom"
+        agent.base_url = "http://localhost:11434/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.reasoning_config = {"enabled": True, "effort": "medium"}
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs.get("extra_body", {}).get("think") is None
+
+    def test_non_custom_provider_unaffected(self, agent):
+        """OpenRouter provider with effort=none should NOT inject think=false."""
+        agent.provider = "openrouter"
+        agent.model = "qwen/qwen3.5-plus-02-15"
+        agent.reasoning_config = {"effort": "none"}
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs.get("extra_body", {}).get("think") is None
+
+

 class TestBuildAssistantMessage:
    def test_basic_message(self, agent):
@ -2202,6 +2243,114 @@ class TestRunConversation:
        assert second_call_messages[-1]["role"] == "user"
        assert "truncated by the output length limit" in second_call_messages[-1]["content"]

+    def test_ollama_glm_stop_after_tools_without_terminal_boundary_requests_continuation(self, agent):
+        """Ollama-hosted GLM responses can misreport truncated output as stop."""
+        self._setup_agent(agent)
+        agent.base_url = "http://localhost:11434/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "glm-5.1:cloud"
+
+        tool_turn = _mock_response(
+            content="",
+            finish_reason="tool_calls",
+            tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")],
+        )
+        misreported_stop = _mock_response(
+            content="Based on the search results, the best next",
+            finish_reason="stop",
+        )
+        continued = _mock_response(
+            content=" step is to update the config.",
+            finish_reason="stop",
+        )
+        agent.client.chat.completions.create.side_effect = [
+            tool_turn,
+            misreported_stop,
+            continued,
+        ]
+
+        with (
+            patch("run_agent.handle_function_call", return_value="search result"),
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("hello")
+
+        assert result["completed"] is True
+        assert result["api_calls"] == 3
+        assert (
+            result["final_response"]
+            == "Based on the search results, the best next step is to update the config."
+        )
+
+        third_call_messages = agent.client.chat.completions.create.call_args_list[2].kwargs["messages"]
+        assert third_call_messages[-1]["role"] == "user"
+        assert "truncated by the output length limit" in third_call_messages[-1]["content"]
+
+    def test_ollama_glm_stop_with_terminal_boundary_does_not_continue(self, agent):
+        """Complete Ollama/GLM responses should not be reclassified as truncated."""
+        self._setup_agent(agent)
+        agent.base_url = "http://localhost:11434/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "glm-5.1:cloud"
+
+        tool_turn = _mock_response(
+            content="",
+            finish_reason="tool_calls",
+            tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")],
+        )
+        complete_stop = _mock_response(
+            content="Based on the search results, the best next step is to update the config.",
+            finish_reason="stop",
+        )
+        agent.client.chat.completions.create.side_effect = [tool_turn, complete_stop]
+
+        with (
+            patch("run_agent.handle_function_call", return_value="search result"),
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("hello")
+
+        assert result["completed"] is True
+        assert result["api_calls"] == 2
+        assert (
+            result["final_response"]
+            == "Based on the search results, the best next step is to update the config."
+        )
+
+    def test_non_ollama_stop_without_terminal_boundary_does_not_continue(self, agent):
+        """The stop->length workaround should stay scoped to Ollama/GLM backends."""
+        self._setup_agent(agent)
+        agent.base_url = "https://api.openai.com/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "gpt-4o-mini"
+
+        tool_turn = _mock_response(
+            content="",
+            finish_reason="tool_calls",
+            tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")],
+        )
+        normal_stop = _mock_response(
+            content="Based on the search results, the best next",
+            finish_reason="stop",
+        )
+        agent.client.chat.completions.create.side_effect = [tool_turn, normal_stop]
+
+        with (
+            patch("run_agent.handle_function_call", return_value="search result"),
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("hello")
+
+        assert result["completed"] is True
+        assert result["api_calls"] == 2
+        assert result["final_response"] == "Based on the search results, the best next"
+
    def test_length_thinking_exhausted_skips_continuation(self, agent):
        """When finish_reason='length' but content is only thinking, skip retries."""
        self._setup_agent(agent)
@ -3998,3 +4147,63 @@ class TestDeadRetryCode:
            f"Expected 2 occurrences of 'if retry_count >= max_retries:' "
            f"but found {occurrences}"
        )
+
+
+class TestMemoryContextSanitization:
+    """run_conversation() must strip leaked <memory-context> blocks from user input."""
+
+    def test_memory_context_stripped_from_user_message(self):
+        """Verify that <memory-context> blocks are removed before the message
+        enters the conversation loop — prevents stale Honcho injection from
+        leaking into user text."""
+        import inspect
+        src = inspect.getsource(AIAgent.run_conversation)
+        # The sanitize_context call must appear in run_conversation's preamble
+        assert "sanitize_context(user_message)" in src
+        assert "sanitize_context(persist_user_message)" in src
+
+    def test_sanitize_context_strips_full_block(self):
+        """End-to-end: a user message with an embedded memory-context block
+        is cleaned to just the actual user text."""
+        from agent.memory_manager import sanitize_context
+        user_text = "how is the honcho working"
+        injected = (
+            user_text + "\n\n"
+            "<memory-context>\n"
+            "[System note: The following is recalled memory context, "
+            "NOT new user input. Treat as informational background data.]\n\n"
+            "## User Representation\n"
+            "[2026-01-13 02:13:00] stale observation about AstroMap\n"
+            "</memory-context>"
+        )
+        result = sanitize_context(injected)
+        assert "memory-context" not in result.lower()
+        assert "stale observation" not in result
+        assert "how is the honcho working" in result
+
+
+class TestMemoryProviderTurnStart:
+    """run_conversation() must call memory_manager.on_turn_start() before prefetch_all().
+
+    Without this call, providers like Honcho never update _turn_count, so cadence
+    checks (contextCadence, dialecticCadence) are always satisfied — every turn
+    fires both context refresh and dialectic, ignoring the configured cadence.
+    """
+
+    def test_on_turn_start_called_before_prefetch(self):
+        """Source-level check: on_turn_start appears before prefetch_all in run_conversation."""
+        import inspect
+        src = inspect.getsource(AIAgent.run_conversation)
+        # Find the actual method calls, not comments
+        idx_turn_start = src.index(".on_turn_start(")
+        idx_prefetch = src.index(".prefetch_all(")
+        assert idx_turn_start < idx_prefetch, (
+            "on_turn_start() must be called before prefetch_all() in run_conversation "
+            "so that memory providers have the correct turn count for cadence checks"
+        )
+
+    def test_on_turn_start_uses_user_turn_count(self):
+        """Source-level check: on_turn_start receives self._user_turn_count."""
+        import inspect
+        src = inspect.getsource(AIAgent.run_conversation)
+        assert "on_turn_start(self._user_turn_count" in src
--- a/tests/skills/test_google_oauth_setup.py
+++ b/tests/skills/test_google_oauth_setup.py
@ -160,7 +160,9 @@ class TestExchangeAuthCode:
        assert flow.state == "saved-state"
        assert flow.code_verifier == "saved-verifier"
        assert flow.fetch_token_calls == [{"code": "4/test-auth-code"}]
-        assert json.loads(setup_module.TOKEN_PATH.read_text())["token"] == "access-token"
+        saved = json.loads(setup_module.TOKEN_PATH.read_text())
+        assert saved["token"] == "access-token"
+        assert saved["type"] == "authorized_user"
        assert not setup_module.PENDING_AUTH_PATH.exists()

    def test_extracts_code_from_redirect_url_and_checks_state(self, setup_module):
--- a/tests/skills/test_google_workspace_api.py
+++ b/tests/skills/test_google_workspace_api.py
@ -46,6 +46,12 @@ def api_module(monkeypatch, tmp_path):
    module = importlib.util.module_from_spec(spec)
    assert spec.loader is not None
    spec.loader.exec_module(module)
+    # Ensure the gws CLI code path is taken even when the binary isn't
+    # installed (CI).  Without this, calendar_list() falls through to the
+    # Python SDK path which imports ``googleapiclient`` — not in deps.
+    module._gws_binary = lambda: "/usr/bin/gws"
+    # Bypass authentication check — no real token file in CI.
+    module._ensure_authenticated = lambda: None
    return module


@ -94,6 +100,7 @@ def test_bridge_refreshes_expired_token(bridge_module, tmp_path):
    # Verify persisted
    saved = json.loads(token_path.read_text())
    assert saved["token"] == "ya29.refreshed"
+    assert saved["type"] == "authorized_user"


 def test_bridge_exits_on_missing_token(bridge_module):
@ -124,35 +131,41 @@ def test_bridge_main_injects_token_env(bridge_module, tmp_path):
    assert captured["cmd"] == ["gws", "gmail", "+triage"]


-def test_api_calendar_list_uses_agenda_by_default(api_module):
-    """calendar list without dates uses +agenda helper."""
+def test_api_calendar_list_uses_events_list(api_module):
+    """calendar_list calls _run_gws with events list + params."""
    captured = {}

    def capture_run(cmd, **kwargs):
        captured["cmd"] = cmd
-        return MagicMock(returncode=0)
+        return MagicMock(returncode=0, stdout="{}", stderr="")

    args = api_module.argparse.Namespace(
        start="", end="", max=25, calendar="primary", func=api_module.calendar_list,
    )

-    with patch.object(subprocess, "run", side_effect=capture_run):
-        with pytest.raises(SystemExit):
-            api_module.calendar_list(args)
+    with patch.object(api_module.subprocess, "run", side_effect=capture_run):
+        api_module.calendar_list(args)

-    gws_args = captured["cmd"][2:]  # skip python + bridge path
-    assert "calendar" in gws_args
-    assert "+agenda" in gws_args
-    assert "--days" in gws_args
+    cmd = captured["cmd"]
+    # _gws_binary() returns "/usr/bin/gws", so cmd[0] is that binary
+    assert cmd[0] == "/usr/bin/gws"
+    assert "calendar" in cmd
+    assert "events" in cmd
+    assert "list" in cmd
+    assert "--params" in cmd
+    params = json.loads(cmd[cmd.index("--params") + 1])
+    assert "timeMin" in params
+    assert "timeMax" in params
+    assert params["calendarId"] == "primary"


 def test_api_calendar_list_respects_date_range(api_module):
-    """calendar list with --start/--end uses raw events list API."""
+    """calendar list with --start/--end passes correct time bounds."""
    captured = {}

    def capture_run(cmd, **kwargs):
        captured["cmd"] = cmd
-        return MagicMock(returncode=0)
+        return MagicMock(returncode=0, stdout="{}", stderr="")

    args = api_module.argparse.Namespace(
        start="2026-04-01T00:00:00Z",
@ -162,14 +175,62 @@ def test_api_calendar_list_respects_date_range(api_module):
        func=api_module.calendar_list,
    )

-    with patch.object(subprocess, "run", side_effect=capture_run):
-        with pytest.raises(SystemExit):
-            api_module.calendar_list(args)
+    with patch.object(api_module.subprocess, "run", side_effect=capture_run):
+        api_module.calendar_list(args)

-    gws_args = captured["cmd"][2:]
-    assert "events" in gws_args
-    assert "list" in gws_args
-    params_idx = gws_args.index("--params")
-    params = json.loads(gws_args[params_idx + 1])
+    cmd = captured["cmd"]
+    params_idx = cmd.index("--params")
+    params = json.loads(cmd[params_idx + 1])
    assert params["timeMin"] == "2026-04-01T00:00:00Z"
    assert params["timeMax"] == "2026-04-07T23:59:59Z"
+
+
+def test_api_get_credentials_refresh_persists_authorized_user_type(api_module, monkeypatch):
+    token_path = api_module.TOKEN_PATH
+    _write_token(token_path, token="ya29.old")
+
+    class FakeCredentials:
+        def __init__(self):
+            self.expired = True
+            self.refresh_token = "1//refresh"
+            self.valid = True
+
+        def refresh(self, request):
+            self.expired = False
+
+        def to_json(self):
+            return json.dumps({
+                "token": "ya29.refreshed",
+                "refresh_token": "1//refresh",
+                "client_id": "123.apps.googleusercontent.com",
+                "client_secret": "secret",
+                "token_uri": "https://oauth2.googleapis.com/token",
+            })
+
+    class FakeCredentialsModule:
+        @staticmethod
+        def from_authorized_user_file(filename, scopes):
+            assert filename == str(token_path)
+            assert scopes == api_module.SCOPES
+            return FakeCredentials()
+
+    google_module = types.ModuleType("google")
+    oauth2_module = types.ModuleType("google.oauth2")
+    credentials_module = types.ModuleType("google.oauth2.credentials")
+    credentials_module.Credentials = FakeCredentialsModule
+    transport_module = types.ModuleType("google.auth.transport")
+    requests_module = types.ModuleType("google.auth.transport.requests")
+    requests_module.Request = lambda: object()
+
+    monkeypatch.setitem(sys.modules, "google", google_module)
+    monkeypatch.setitem(sys.modules, "google.oauth2", oauth2_module)
+    monkeypatch.setitem(sys.modules, "google.oauth2.credentials", credentials_module)
+    monkeypatch.setitem(sys.modules, "google.auth.transport", transport_module)
+    monkeypatch.setitem(sys.modules, "google.auth.transport.requests", requests_module)
+
+    creds = api_module.get_credentials()
+
+    saved = json.loads(token_path.read_text())
+    assert isinstance(creds, FakeCredentials)
+    assert saved["token"] == "ya29.refreshed"
+    assert saved["type"] == "authorized_user"
--- a/tests/tools/test_browser_camofox_persistence.py
+++ b/tests/tools/test_browser_camofox_persistence.py
@ -1,8 +1,8 @@
 """Persistence tests for the Camofox browser backend.

 Tests that managed persistence uses stable identity while default mode
-uses random identity. The actual browser profile persistence is handled
-by the Camofox server (when CAMOFOX_PROFILE_DIR is set).
+uses random identity. Camofox automatically maps each userId to a
+dedicated persistent Firefox profile on the server side.
 """

 import json
--- a/tests/tools/test_browser_cloud_fallback.py
+++ b/tests/tools/test_browser_cloud_fallback.py
@ -0,0 +1,166 @@
+"""Tests for cloud browser provider runtime fallback to local Chromium.
+
+Covers the fallback logic in _get_session_info() when a cloud provider
+is configured but fails at runtime (issue #10883).
+"""
+import logging
+from unittest.mock import Mock, patch
+
+import pytest
+
+import tools.browser_tool as browser_tool
+
+
+def _reset_session_state(monkeypatch):
+    """Clear caches so each test starts fresh."""
+    monkeypatch.setattr(browser_tool, "_active_sessions", {})
+    monkeypatch.setattr(browser_tool, "_cached_cloud_provider", None)
+    monkeypatch.setattr(browser_tool, "_cloud_provider_resolved", False)
+    monkeypatch.setattr(browser_tool, "_start_browser_cleanup_thread", lambda: None)
+    monkeypatch.setattr(browser_tool, "_update_session_activity", lambda t: None)
+
+
+class TestCloudProviderRuntimeFallback:
+    """Tests for _get_session_info cloud → local fallback."""
+
+    def test_cloud_failure_falls_back_to_local(self, monkeypatch):
+        """When cloud provider.create_session raises, fall back to local."""
+        _reset_session_state(monkeypatch)
+
+        provider = Mock()
+        provider.create_session.side_effect = RuntimeError("401 Unauthorized")
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
+
+        session = browser_tool._get_session_info("task-1")
+
+        assert session["fallback_from_cloud"] is True
+        assert "401 Unauthorized" in session["fallback_reason"]
+        assert session["fallback_provider"] == "Mock"
+        assert session["features"]["local"] is True
+        assert session["cdp_url"] is None
+
+    def test_cloud_success_no_fallback(self, monkeypatch):
+        """When cloud succeeds, no fallback markers are present."""
+        _reset_session_state(monkeypatch)
+
+        provider = Mock()
+        provider.create_session.return_value = {
+            "session_name": "cloud-sess",
+            "bb_session_id": "bb_123",
+            "cdp_url": None,
+            "features": {"browser_use": True},
+        }
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
+
+        session = browser_tool._get_session_info("task-2")
+
+        assert session["session_name"] == "cloud-sess"
+        assert "fallback_from_cloud" not in session
+        assert "fallback_reason" not in session
+
+    def test_cloud_and_local_both_fail(self, monkeypatch):
+        """When both cloud and local fail, raise RuntimeError with both contexts."""
+        _reset_session_state(monkeypatch)
+
+        provider = Mock()
+        provider.create_session.side_effect = RuntimeError("cloud boom")
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
+        monkeypatch.setattr(
+            browser_tool, "_create_local_session",
+            Mock(side_effect=OSError("no chromium")),
+        )
+
+        with pytest.raises(RuntimeError, match="cloud boom.*local.*no chromium"):
+            browser_tool._get_session_info("task-3")
+
+    def test_no_provider_uses_local_directly(self, monkeypatch):
+        """When no cloud provider is configured, local mode is used with no fallback markers."""
+        _reset_session_state(monkeypatch)
+
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: None)
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
+
+        session = browser_tool._get_session_info("task-4")
+
+        assert session["features"]["local"] is True
+        assert "fallback_from_cloud" not in session
+
+    def test_cdp_override_bypasses_provider(self, monkeypatch):
+        """CDP override takes priority — cloud provider is never consulted."""
+        _reset_session_state(monkeypatch)
+
+        provider = Mock()
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: "ws://host:9222/devtools/browser/abc")
+
+        session = browser_tool._get_session_info("task-5")
+
+        provider.create_session.assert_not_called()
+        assert session["cdp_url"] == "ws://host:9222/devtools/browser/abc"
+
+    def test_fallback_logs_warning_with_provider_name(self, monkeypatch, caplog):
+        """Fallback emits a warning log with the provider class name and error."""
+        _reset_session_state(monkeypatch)
+
+        BrowserUseProviderFake = type("BrowserUseProvider", (), {
+            "create_session": Mock(side_effect=ConnectionError("timeout")),
+        })
+        provider = BrowserUseProviderFake()
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
+
+        with caplog.at_level(logging.WARNING, logger="tools.browser_tool"):
+            session = browser_tool._get_session_info("task-6")
+
+        assert session["fallback_from_cloud"] is True
+        assert any("BrowserUseProvider" in r.message and "timeout" in r.message
+                    for r in caplog.records)
+
+    def test_cloud_failure_does_not_poison_next_task(self, monkeypatch):
+        """A fallback for one task_id doesn't affect a new task_id when cloud recovers."""
+        _reset_session_state(monkeypatch)
+
+        call_count = 0
+
+        def create_session_flaky(task_id):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                raise RuntimeError("transient failure")
+            return {
+                "session_name": "cloud-ok",
+                "bb_session_id": "bb_999",
+                "cdp_url": None,
+                "features": {"browser_use": True},
+            }
+
+        provider = Mock()
+        provider.create_session.side_effect = create_session_flaky
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
+
+        # First call fails → fallback
+        s1 = browser_tool._get_session_info("task-a")
+        assert s1["fallback_from_cloud"] is True
+
+        # Second call (different task) → cloud succeeds
+        s2 = browser_tool._get_session_info("task-b")
+        assert "fallback_from_cloud" not in s2
+        assert s2["session_name"] == "cloud-ok"
+
+    def test_cloud_returns_invalid_session_triggers_fallback(self, monkeypatch):
+        """Cloud provider returning None or empty dict triggers fallback."""
+        _reset_session_state(monkeypatch)
+
+        provider = Mock()
+        provider.create_session.return_value = None
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None)
+
+        session = browser_tool._get_session_info("task-7")
+
+        assert session["fallback_from_cloud"] is True
+        assert "invalid session" in session["fallback_reason"]
--- a/tests/tools/test_send_message_missing_platforms.py
+++ b/tests/tools/test_send_message_missing_platforms.py
@ -123,7 +123,7 @@ class TestSendMatrix:
        session.put.assert_called_once()
        call_kwargs = session.put.call_args
        url = call_kwargs[0][0]
-        assert url.startswith("https://matrix.example.com/_matrix/client/v3/rooms/!room:example.com/send/m.room.message/")
+        assert url.startswith("https://matrix.example.com/_matrix/client/v3/rooms/%21room%3Aexample.com/send/m.room.message/")
        assert call_kwargs[1]["headers"]["Authorization"] == "Bearer syt_tok"
        payload = call_kwargs[1]["json"]
        assert payload["msgtype"] == "m.text"
--- a/tests/tools/test_send_message_tool.py
+++ b/tests/tools/test_send_message_tool.py
@ -12,6 +12,7 @@ from gateway.config import Platform
 from tools.send_message_tool import (
    _parse_target_ref,
    _send_discord,
+    _send_matrix_via_adapter,
    _send_telegram,
    _send_to_platform,
    send_message_tool,
@ -576,7 +577,7 @@ class TestSendToPlatformChunking:

        sent_calls = []

-        async def fake_send(token, chat_id, message, media_files=None, thread_id=None):
+        async def fake_send(token, chat_id, message, media_files=None, thread_id=None, disable_link_previews=False):
            sent_calls.append(media_files or [])
            return {"success": True, "platform": "telegram", "chat_id": chat_id, "message_id": str(len(sent_calls))}

@ -594,6 +595,103 @@ class TestSendToPlatformChunking:
        assert all(call == [] for call in sent_calls[:-1])
        assert sent_calls[-1] == media

+    def test_matrix_media_uses_native_adapter_helper(self):
+
+        doc_path = Path("/tmp/test-send-message-matrix.pdf")
+        doc_path.write_bytes(b"%PDF-1.4 test")
+
+        try:
+            helper = AsyncMock(return_value={"success": True, "platform": "matrix", "chat_id": "!room:example.com", "message_id": "$evt"})
+            with patch("tools.send_message_tool._send_matrix_via_adapter", helper):
+                result = asyncio.run(
+                    _send_to_platform(
+                        Platform.MATRIX,
+                        SimpleNamespace(enabled=True, token="tok", extra={"homeserver": "https://matrix.example.com"}),
+                        "!room:example.com",
+                        "here you go",
+                        media_files=[(str(doc_path), False)],
+                    )
+                )
+
+            assert result["success"] is True
+            helper.assert_awaited_once()
+            call = helper.await_args
+            assert call.args[1] == "!room:example.com"
+            assert call.args[2] == "here you go"
+            assert call.kwargs["media_files"] == [(str(doc_path), False)]
+        finally:
+            doc_path.unlink(missing_ok=True)
+
+    def test_matrix_text_only_uses_lightweight_path(self):
+        """Text-only Matrix sends should NOT go through the heavy adapter path."""
+        helper = AsyncMock()
+        lightweight = AsyncMock(return_value={"success": True, "platform": "matrix", "chat_id": "!room:ex.com", "message_id": "$txt"})
+        with patch("tools.send_message_tool._send_matrix_via_adapter", helper), \
+             patch("tools.send_message_tool._send_matrix", lightweight):
+            result = asyncio.run(
+                _send_to_platform(
+                    Platform.MATRIX,
+                    SimpleNamespace(enabled=True, token="tok", extra={"homeserver": "https://matrix.example.com"}),
+                    "!room:ex.com",
+                    "just text, no files",
+                )
+            )
+
+        assert result["success"] is True
+        helper.assert_not_awaited()
+        lightweight.assert_awaited_once()
+
+    def test_send_matrix_via_adapter_sends_document(self, tmp_path):
+        file_path = tmp_path / "report.pdf"
+        file_path.write_bytes(b"%PDF-1.4 test")
+
+        calls = []
+
+        class FakeAdapter:
+            def __init__(self, _config):
+                self.connected = False
+
+            async def connect(self):
+                self.connected = True
+                calls.append(("connect",))
+                return True
+
+            async def send(self, chat_id, message, metadata=None):
+                calls.append(("send", chat_id, message, metadata))
+                return SimpleNamespace(success=True, message_id="$text")
+
+            async def send_document(self, chat_id, file_path, metadata=None):
+                calls.append(("send_document", chat_id, file_path, metadata))
+                return SimpleNamespace(success=True, message_id="$file")
+
+            async def disconnect(self):
+                calls.append(("disconnect",))
+
+        fake_module = SimpleNamespace(MatrixAdapter=FakeAdapter)
+
+        with patch.dict(sys.modules, {"gateway.platforms.matrix": fake_module}):
+            result = asyncio.run(
+                _send_matrix_via_adapter(
+                    SimpleNamespace(enabled=True, token="tok", extra={"homeserver": "https://matrix.example.com"}),
+                    "!room:example.com",
+                    "report attached",
+                    media_files=[(str(file_path), False)],
+                )
+            )
+
+        assert result == {
+            "success": True,
+            "platform": "matrix",
+            "chat_id": "!room:example.com",
+            "message_id": "$file",
+        }
+        assert calls == [
+            ("connect",),
+            ("send", "!room:example.com", "report attached", None),
+            ("send_document", "!room:example.com", str(file_path), None),
+            ("disconnect",),
+        ]
+

 # ---------------------------------------------------------------------------
 # HTML auto-detection in Telegram send
@ -658,6 +756,17 @@ class TestSendTelegramHtmlDetection:
        kwargs = bot.send_message.await_args.kwargs
        assert kwargs["parse_mode"] == "MarkdownV2"

+    def test_disable_link_previews_sets_disable_web_page_preview(self, monkeypatch):
+        bot = self._make_bot()
+        _install_telegram_mock(monkeypatch, bot)
+
+        asyncio.run(
+            _send_telegram("tok", "123", "https://example.com", disable_link_previews=True)
+        )
+
+        kwargs = bot.send_message.await_args.kwargs
+        assert kwargs["disable_web_page_preview"] is True
+
    def test_html_with_code_and_pre_tags(self, monkeypatch):
        bot = self._make_bot()
        _install_telegram_mock(monkeypatch, bot)
@ -707,6 +816,23 @@ class TestSendTelegramHtmlDetection:
        second_call = bot.send_message.await_args_list[1].kwargs
        assert second_call["parse_mode"] is None

+    def test_transient_bad_gateway_retries_text_send(self, monkeypatch):
+        bot = self._make_bot()
+        bot.send_message = AsyncMock(
+            side_effect=[
+                Exception("502 Bad Gateway"),
+                SimpleNamespace(message_id=2),
+            ]
+        )
+        _install_telegram_mock(monkeypatch, bot)
+
+        with patch("asyncio.sleep", new=AsyncMock()) as sleep_mock:
+            result = asyncio.run(_send_telegram("tok", "123", "hello"))
+
+        assert result["success"] is True
+        assert bot.send_message.await_count == 2
+        sleep_mock.assert_awaited_once()
+

 # ---------------------------------------------------------------------------
 # Tests for Discord thread_id support
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -873,12 +873,37 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
        if provider is None:
            session_info = _create_local_session(task_id)
        else:
-            session_info = provider.create_session(task_id)
-            if session_info.get("cdp_url"):
-                # Some cloud providers (including Browser-Use v3) return an HTTP
-                # CDP discovery URL instead of a raw websocket endpoint.
-                session_info = dict(session_info)
-                session_info["cdp_url"] = _resolve_cdp_override(str(session_info["cdp_url"]))
+            try:
+                session_info = provider.create_session(task_id)
+                # Validate cloud provider returned a usable session
+                if not session_info or not isinstance(session_info, dict):
+                    raise ValueError(f"Cloud provider returned invalid session: {session_info!r}")
+                if session_info.get("cdp_url"):
+                    # Some cloud providers (including Browser-Use v3) return an HTTP
+                    # CDP discovery URL instead of a raw websocket endpoint.
+                    session_info = dict(session_info)
+                    session_info["cdp_url"] = _resolve_cdp_override(str(session_info["cdp_url"]))
+            except Exception as e:
+                provider_name = type(provider).__name__
+                logger.warning(
+                    "Cloud provider %s failed (%s); attempting fallback to local "
+                    "Chromium for task %s",
+                    provider_name, e, task_id,
+                    exc_info=True,
+                )
+                try:
+                    session_info = _create_local_session(task_id)
+                except Exception as local_error:
+                    raise RuntimeError(
+                        f"Cloud provider {provider_name} failed ({e}) and local "
+                        f"fallback also failed ({local_error})"
+                    ) from e
+                # Mark session as degraded for observability
+                if isinstance(session_info, dict):
+                    session_info = dict(session_info)
+                    session_info["fallback_from_cloud"] = True
+                    session_info["fallback_reason"] = str(e)
+                    session_info["fallback_provider"] = provider_name
    
    with _cleanup_lock:
        # Double-check: another thread may have created a session while we
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@ -988,7 +988,8 @@ def execute_code(
        # (terminal.env_passthrough) are passed through.
        _SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
                              "TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
-                              "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
+                              "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA",
+                              "HERMES_")
        _SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
                              "PASSWD", "AUTH")
        try:
@ -1015,10 +1016,13 @@ def execute_code(
        _existing_pp = child_env.get("PYTHONPATH", "")
        child_env["PYTHONPATH"] = _hermes_root + (os.pathsep + _existing_pp if _existing_pp else "")
        # Inject user's configured timezone so datetime.now() in sandboxed
-        # code reflects the correct wall-clock time.
+        # code reflects the correct wall-clock time.  Only TZ is set —
+        # HERMES_TIMEZONE is an internal Hermes setting and must not leak
+        # into child processes.
        _tz_name = os.getenv("HERMES_TIMEZONE", "").strip()
        if _tz_name:
            child_env["TZ"] = _tz_name
+        child_env.pop("HERMES_TIMEZONE", None)

        # Per-profile HOME isolation: redirect system tool configs into
        # {HERMES_HOME}/home/ when that directory exists.
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@ -807,21 +807,61 @@ def delegate_task(
                )
                futures[future] = i

-            for future in as_completed(futures):
-                try:
-                    entry = future.result()
-                except Exception as exc:
-                    idx = futures[future]
-                    entry = {
-                        "task_index": idx,
-                        "status": "error",
-                        "summary": None,
-                        "error": str(exc),
-                        "api_calls": 0,
-                        "duration_seconds": 0,
-                    }
-                results.append(entry)
-                completed_count += 1
+            # Poll futures with interrupt checking.  as_completed() blocks
+            # until ALL futures finish — if a child agent gets stuck,
+            # the parent blocks forever even after interrupt propagation.
+            # Instead, use wait() with a short timeout so we can bail
+            # when the parent is interrupted.
+            pending = set(futures.keys())
+            while pending:
+                if getattr(parent_agent, "_interrupt_requested", False) is True:
+                    # Parent interrupted — collect whatever finished and
+                    # abandon the rest.  Children already received the
+                    # interrupt signal; we just can't wait forever.
+                    for f in pending:
+                        idx = futures[f]
+                        if f.done():
+                            try:
+                                entry = f.result()
+                            except Exception as exc:
+                                entry = {
+                                    "task_index": idx,
+                                    "status": "error",
+                                    "summary": None,
+                                    "error": str(exc),
+                                    "api_calls": 0,
+                                    "duration_seconds": 0,
+                                }
+                        else:
+                            entry = {
+                                "task_index": idx,
+                                "status": "interrupted",
+                                "summary": None,
+                                "error": "Parent agent interrupted — child did not finish in time",
+                                "api_calls": 0,
+                                "duration_seconds": 0,
+                            }
+                        results.append(entry)
+                        completed_count += 1
+                    break
+
+                from concurrent.futures import wait as _cf_wait, FIRST_COMPLETED
+                done, pending = _cf_wait(pending, timeout=0.5, return_when=FIRST_COMPLETED)
+                for future in done:
+                    try:
+                        entry = future.result()
+                    except Exception as exc:
+                        idx = futures[future]
+                        entry = {
+                            "task_index": idx,
+                            "status": "error",
+                            "summary": None,
+                            "error": str(exc),
+                            "api_calls": 0,
+                            "duration_seconds": 0,
+                        }
+                    results.append(entry)
+                    completed_count += 1

                # Print per-task completion line above the spinner
                idx = entry["task_index"]
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@ -1166,6 +1166,14 @@ class MCPServerTask:

 _servers: Dict[str, MCPServerTask] = {}

+# Circuit breaker: consecutive error counts per server.  After
+# _CIRCUIT_BREAKER_THRESHOLD consecutive failures, the handler returns
+# a "server unreachable" message that tells the model to stop retrying,
+# preventing the 90-iteration burn loop described in #10447.
+# Reset to 0 on any successful call.
+_server_error_counts: Dict[str, int] = {}
+_CIRCUIT_BREAKER_THRESHOLD = 3
+
 # Dedicated event loop running in a background daemon thread.
 _mcp_loop: Optional[asyncio.AbstractEventLoop] = None
 _mcp_thread: Optional[threading.Thread] = None
@ -1356,9 +1364,23 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
    """

    def _handler(args: dict, **kwargs) -> str:
+        # Circuit breaker: if this server has failed too many times
+        # consecutively, short-circuit with a clear message so the model
+        # stops retrying and uses alternative approaches (#10447).
+        if _server_error_counts.get(server_name, 0) >= _CIRCUIT_BREAKER_THRESHOLD:
+            return json.dumps({
+                "error": (
+                    f"MCP server '{server_name}' is unreachable after "
+                    f"{_CIRCUIT_BREAKER_THRESHOLD} consecutive failures. "
+                    f"Do NOT retry this tool — use alternative approaches "
+                    f"or ask the user to check the MCP server."
+                )
+            }, ensure_ascii=False)
+
        with _lock:
            server = _servers.get(server_name)
        if not server or not server.session:
+            _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
            return json.dumps({
                "error": f"MCP server '{server_name}' is not connected"
            }, ensure_ascii=False)
@ -1399,10 +1421,21 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
            return json.dumps({"result": text_result}, ensure_ascii=False)

        try:
-            return _run_on_mcp_loop(_call(), timeout=tool_timeout)
+            result = _run_on_mcp_loop(_call(), timeout=tool_timeout)
+            # Check if the MCP tool itself returned an error
+            try:
+                parsed = json.loads(result)
+                if "error" in parsed:
+                    _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
+                else:
+                    _server_error_counts[server_name] = 0  # success — reset
+            except (json.JSONDecodeError, TypeError):
+                _server_error_counts[server_name] = 0  # non-JSON = success
+            return result
        except InterruptedError:
            return _interrupted_call_result()
        except Exception as exc:
+            _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1
            logger.error(
                "MCP tool %s/%s call failed: %s",
                server_name, tool_name, exc,
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@ -345,7 +345,7 @@ class ProcessRegistry:
                pty_env = _sanitize_subprocess_env(os.environ, env_vars)
                pty_env["PYTHONUNBUFFERED"] = "1"
                pty_proc = _PtyProcessCls.spawn(
-                    [user_shell, "-lic", command],
+                    [user_shell, "-lic", f"set +m; {command}"],
                    cwd=session.cwd,
                    env=pty_env,
                    dimensions=(30, 120),
@ -386,7 +386,7 @@ class ProcessRegistry:
        bg_env = _sanitize_subprocess_env(os.environ, env_vars)
        bg_env["PYTHONUNBUFFERED"] = "1"
        proc = subprocess.Popen(
-            [user_shell, "-lic", command],
+            [user_shell, "-lic", f"set +m; {command}"],
            text=True,
            cwd=session.cwd,
            env=bg_env,
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@ -5,6 +5,7 @@ Sends a message to a user or channel on any connected messaging platform
 human-friendly channel names to IDs. Works in both CLI and gateway contexts.
 """

+import asyncio
 import json
 import logging
 import os
@ -48,6 +49,49 @@ def _error(message: str) -> dict:
    return {"error": _sanitize_error_text(message)}


+def _telegram_retry_delay(exc: Exception, attempt: int) -> float | None:
+    retry_after = getattr(exc, "retry_after", None)
+    if retry_after is not None:
+        try:
+            return max(float(retry_after), 0.0)
+        except (TypeError, ValueError):
+            return 1.0
+
+    text = str(exc).lower()
+    if "timed out" in text or "timeout" in text:
+        return None
+    if (
+        "bad gateway" in text
+        or "502" in text
+        or "too many requests" in text
+        or "429" in text
+        or "service unavailable" in text
+        or "503" in text
+        or "gateway timeout" in text
+        or "504" in text
+    ):
+        return float(2 ** attempt)
+    return None
+
+
+async def _send_telegram_message_with_retry(bot, *, attempts: int = 3, **kwargs):
+    for attempt in range(attempts):
+        try:
+            return await bot.send_message(**kwargs)
+        except Exception as exc:
+            delay = _telegram_retry_delay(exc, attempt)
+            if delay is None or attempt >= attempts - 1:
+                raise
+            logger.warning(
+                "Transient Telegram send failure (attempt %d/%d), retrying in %.1fs: %s",
+                attempt + 1,
+                attempts,
+                delay,
+                _sanitize_error_text(exc),
+            )
+            await asyncio.sleep(delay)
+
+
 SEND_MESSAGE_SCHEMA = {
    "name": "send_message",
    "description": (
@ -327,10 +371,16 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
    """
    from gateway.config import Platform
    from gateway.platforms.base import BasePlatformAdapter, utf16_len
-    from gateway.platforms.telegram import TelegramAdapter
    from gateway.platforms.discord import DiscordAdapter
    from gateway.platforms.slack import SlackAdapter

+    # Telegram adapter import is optional (requires python-telegram-bot)
+    try:
+        from gateway.platforms.telegram import TelegramAdapter
+        _telegram_available = True
+    except ImportError:
+        _telegram_available = False
+
    # Feishu adapter import is optional (requires lark-oapi)
    try:
        from gateway.platforms.feishu import FeishuAdapter
@ -349,7 +399,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,

    # Platform message length limits (from adapter class attributes)
    _MAX_LENGTHS = {
-        Platform.TELEGRAM: TelegramAdapter.MAX_MESSAGE_LENGTH,
+        Platform.TELEGRAM: TelegramAdapter.MAX_MESSAGE_LENGTH if _telegram_available else 4096,
        Platform.DISCORD: DiscordAdapter.MAX_MESSAGE_LENGTH,
        Platform.SLACK: SlackAdapter.MAX_MESSAGE_LENGTH,
    }
@ -369,6 +419,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
    # --- Telegram: special handling for media attachments ---
    if platform == Platform.TELEGRAM:
        last_result = None
+        disable_link_previews = bool(getattr(pconfig, "extra", {}) and pconfig.extra.get("disable_link_previews"))
        for i, chunk in enumerate(chunks):
            is_last = (i == len(chunks) - 1)
            result = await _send_telegram(
@ -377,6 +428,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
                chunk,
                media_files=media_files if is_last else [],
                thread_id=thread_id,
+                disable_link_previews=disable_link_previews,
            )
            if isinstance(result, dict) and result.get("error"):
                return result
@ -404,11 +456,28 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
            last_result = result
        return last_result

+    # --- Matrix: use the native adapter helper when media is present ---
+    if platform == Platform.MATRIX and media_files:
+        last_result = None
+        for i, chunk in enumerate(chunks):
+            is_last = (i == len(chunks) - 1)
+            result = await _send_matrix_via_adapter(
+                pconfig,
+                chat_id,
+                chunk,
+                media_files=media_files if is_last else [],
+                thread_id=thread_id,
+            )
+            if isinstance(result, dict) and result.get("error"):
+                return result
+            last_result = result
+        return last_result
+
    # --- Non-Telegram/Discord platforms ---
    if media_files and not message.strip():
        return {
            "error": (
-                f"send_message MEDIA delivery is currently only supported for telegram, discord, and weixin; "
+                f"send_message MEDIA delivery is currently only supported for telegram, discord, matrix, and weixin; "
                f"target {platform.value} had only media attachments"
            )
        }
@ -416,7 +485,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
    if media_files:
        warning = (
            f"MEDIA attachments were omitted for {platform.value}; "
-            "native send_message media delivery is currently only supported for telegram, discord, and weixin"
+            "native send_message media delivery is currently only supported for telegram, discord, matrix, and weixin"
        )

    last_result = None
@ -461,7 +530,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
    return last_result


-async def _send_telegram(token, chat_id, message, media_files=None, thread_id=None):
+async def _send_telegram(token, chat_id, message, media_files=None, thread_id=None, disable_link_previews=False):
    """Send via Telegram Bot API (one-shot, no polling needed).

    Applies markdown→MarkdownV2 formatting (same as the gateway adapter)
@ -497,13 +566,16 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
        thread_kwargs = {}
        if thread_id is not None:
            thread_kwargs["message_thread_id"] = int(thread_id)
+        if disable_link_previews:
+            thread_kwargs["disable_web_page_preview"] = True

        last_msg = None
        warnings = []

        if formatted.strip():
            try:
-                last_msg = await bot.send_message(
+                last_msg = await _send_telegram_message_with_retry(
+                    bot,
                    chat_id=int_chat_id, text=formatted,
                    parse_mode=send_parse_mode, **thread_kwargs
                )
@ -523,7 +595,8 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
                            plain = message
                    else:
                        plain = message
-                    last_msg = await bot.send_message(
+                    last_msg = await _send_telegram_message_with_retry(
+                        bot,
                        chat_id=int_chat_id, text=plain,
                        parse_mode=None, **thread_kwargs
                    )
@ -907,6 +980,66 @@ async def _send_matrix(token, extra, chat_id, message):
        return _error(f"Matrix send failed: {e}")


+async def _send_matrix_via_adapter(pconfig, chat_id, message, media_files=None, thread_id=None):
+    """Send via the Matrix adapter so native Matrix media uploads are preserved."""
+    try:
+        from gateway.platforms.matrix import MatrixAdapter
+    except ImportError:
+        return {"error": "Matrix dependencies not installed. Run: pip install 'mautrix[encryption]'"}
+
+    media_files = media_files or []
+
+    try:
+        adapter = MatrixAdapter(pconfig)
+        connected = await adapter.connect()
+        if not connected:
+            return _error("Matrix connect failed")
+
+        metadata = {"thread_id": thread_id} if thread_id else None
+        last_result = None
+
+        if message.strip():
+            last_result = await adapter.send(chat_id, message, metadata=metadata)
+            if not last_result.success:
+                return _error(f"Matrix send failed: {last_result.error}")
+
+        for media_path, is_voice in media_files:
+            if not os.path.exists(media_path):
+                return _error(f"Media file not found: {media_path}")
+
+            ext = os.path.splitext(media_path)[1].lower()
+            if ext in _IMAGE_EXTS:
+                last_result = await adapter.send_image_file(chat_id, media_path, metadata=metadata)
+            elif ext in _VIDEO_EXTS:
+                last_result = await adapter.send_video(chat_id, media_path, metadata=metadata)
+            elif ext in _VOICE_EXTS and is_voice:
+                last_result = await adapter.send_voice(chat_id, media_path, metadata=metadata)
+            elif ext in _AUDIO_EXTS:
+                last_result = await adapter.send_voice(chat_id, media_path, metadata=metadata)
+            else:
+                last_result = await adapter.send_document(chat_id, media_path, metadata=metadata)
+
+            if not last_result.success:
+                return _error(f"Matrix media send failed: {last_result.error}")
+
+        if last_result is None:
+            return {"error": "No deliverable text or media remained after processing MEDIA tags"}
+
+        return {
+            "success": True,
+            "platform": "matrix",
+            "chat_id": chat_id,
+            "message_id": last_result.message_id,
+        }
+    except Exception as e:
+        return _error(f"Matrix send failed: {e}")
+    finally:
+        try:
+            await adapter.disconnect()
+        except Exception:
+            pass
+
+
 async def _send_homeassistant(token, extra, chat_id, message):
    """Send via Home Assistant notify service."""
    try:
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@ -1263,6 +1263,7 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
            "related_skills": related_skills,
            "content": content,
            "path": rel_path,
+            "skill_dir": str(skill_dir) if skill_dir else None,
            "linked_files": linked_files if linked_files else None,
            "usage_hint": "To view linked files, call skill_view(name, file_path) where file_path is e.g. 'references/api.md' or 'assets/config.yaml'"
            if linked_files
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@ -45,6 +45,7 @@ from hermes_constants import display_hermes_home
 logger = logging.getLogger(__name__)
 from tools.managed_tool_gateway import resolve_managed_tool_gateway
 from tools.tool_backend_helpers import managed_nous_tools_enabled, resolve_openai_audio_api_key
+from tools.xai_http import hermes_xai_user_agent

 # ---------------------------------------------------------------------------
 # Lazy imports -- providers are imported only when actually used to avoid
@ -93,6 +94,11 @@ DEFAULT_MINIMAX_VOICE_ID = "English_Graceful_Lady"
 DEFAULT_MINIMAX_BASE_URL = "https://api.minimax.io/v1/t2a_v2"
 DEFAULT_MISTRAL_TTS_MODEL = "voxtral-mini-tts-2603"
 DEFAULT_MISTRAL_TTS_VOICE_ID = "c69964a6-ab8b-4f8a-9465-ec0925096ec8"  # Paul - Neutral
+DEFAULT_XAI_VOICE_ID = "eve"
+DEFAULT_XAI_LANGUAGE = "en"
+DEFAULT_XAI_SAMPLE_RATE = 24000
+DEFAULT_XAI_BIT_RATE = 128000
+DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"

 def _get_default_output_dir() -> str:
    from hermes_constants import get_hermes_dir
@ -299,6 +305,71 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
            close()


+# ===========================================================================
+# Provider: xAI TTS
+# ===========================================================================
+def _generate_xai_tts(text: str, output_path: str, tts_config: Dict[str, Any]) -> str:
+    """
+    Generate audio using xAI TTS.
+
+    xAI exposes a dedicated /v1/tts endpoint instead of the OpenAI audio.speech
+    API shape, so this is implemented as a separate backend.
+    """
+    import requests
+
+    api_key = os.getenv("XAI_API_KEY", "").strip()
+    if not api_key:
+        raise ValueError("XAI_API_KEY not set. Get one at https://console.x.ai/")
+
+    xai_config = tts_config.get("xai", {})
+    voice_id = str(xai_config.get("voice_id", DEFAULT_XAI_VOICE_ID)).strip() or DEFAULT_XAI_VOICE_ID
+    language = str(xai_config.get("language", DEFAULT_XAI_LANGUAGE)).strip() or DEFAULT_XAI_LANGUAGE
+    sample_rate = int(xai_config.get("sample_rate", DEFAULT_XAI_SAMPLE_RATE))
+    bit_rate = int(xai_config.get("bit_rate", DEFAULT_XAI_BIT_RATE))
+    base_url = str(
+        xai_config.get("base_url")
+        or os.getenv("XAI_BASE_URL")
+        or DEFAULT_XAI_BASE_URL
+    ).strip().rstrip("/")
+
+    # Match the documented minimal POST /v1/tts shape by default. Only send
+    # output_format when Hermes actually needs a non-default format/override.
+    codec = "wav" if output_path.endswith(".wav") else "mp3"
+    payload: Dict[str, Any] = {
+        "text": text,
+        "voice_id": voice_id,
+        "language": language,
+    }
+    if (
+        codec != "mp3"
+        or sample_rate != DEFAULT_XAI_SAMPLE_RATE
+        or (codec == "mp3" and bit_rate != DEFAULT_XAI_BIT_RATE)
+    ):
+        output_format: Dict[str, Any] = {"codec": codec}
+        if sample_rate:
+            output_format["sample_rate"] = sample_rate
+        if codec == "mp3" and bit_rate:
+            output_format["bit_rate"] = bit_rate
+        payload["output_format"] = output_format
+
+    response = requests.post(
+        f"{base_url}/tts",
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+            "User-Agent": hermes_xai_user_agent(),
+        },
+        json=payload,
+        timeout=60,
+    )
+    response.raise_for_status()
+
+    with open(output_path, "wb") as f:
+        f.write(response.content)
+
+    return output_path
+
+
 # ===========================================================================
 # Provider: MiniMax TTS
 # ===========================================================================
@ -600,6 +671,10 @@ def text_to_speech_tool(
            logger.info("Generating speech with MiniMax TTS...")
            _generate_minimax_tts(text, file_str, tts_config)

+        elif provider == "xai":
+            logger.info("Generating speech with xAI TTS...")
+            _generate_xai_tts(text, file_str, tts_config)
+
        elif provider == "mistral":
            try:
                _import_mistral_client()
@ -661,7 +736,7 @@ def text_to_speech_tool(
        # Try Opus conversion for Telegram compatibility
        # Edge TTS outputs MP3, NeuTTS outputs WAV — both need ffmpeg conversion
        voice_compatible = False
-        if provider in ("edge", "neutts", "minimax") and not file_str.endswith(".ogg"):
+        if provider in ("edge", "neutts", "minimax", "xai") and not file_str.endswith(".ogg"):
            opus_path = _convert_to_opus(file_str)
            if opus_path:
                file_str = opus_path
@ -734,6 +809,8 @@ def check_tts_requirements() -> bool:
        pass
    if os.getenv("MINIMAX_API_KEY"):
        return True
+    if os.getenv("XAI_API_KEY"):
+        return True
    try:
        _import_mistral_client()
        if os.getenv("MISTRAL_API_KEY"):
--- a/tools/xai_http.py
+++ b/tools/xai_http.py
@ -0,0 +1,12 @@
+"""Shared helpers for direct xAI HTTP integrations."""
+
+from __future__ import annotations
+
+
+def hermes_xai_user_agent() -> str:
+    """Return a stable Hermes-specific User-Agent for xAI HTTP calls."""
+    try:
+        from hermes_cli import __version__
+    except Exception:
+        __version__ = "unknown"
+    return f"Hermes-Agent/{__version__}"
--- a/toolsets.py
+++ b/toolsets.py
@ -151,7 +151,7 @@ TOOLSETS = {
    },
    
    "tts": {
-        "description": "Text-to-speech: convert text to audio with Edge TTS (free), ElevenLabs, or OpenAI",
+        "description": "Text-to-speech: convert text to audio with Edge TTS (free), ElevenLabs, OpenAI, or xAI",
        "tools": ["text_to_speech"],
        "includes": []
    },
--- a/utils.py
+++ b/utils.py
@ -3,6 +3,7 @@
 import json
 import logging
 import os
+import stat
 import tempfile
 from pathlib import Path
 from typing import Any, Union
@ -31,6 +32,31 @@ def env_var_enabled(name: str, default: str = "") -> bool:
    return is_truthy_value(os.getenv(name, default), default=False)


+def _preserve_file_mode(path: Path) -> "int | None":
+    """Capture the permission bits of *path* if it exists, else ``None``."""
+    try:
+        return stat.S_IMODE(path.stat().st_mode) if path.exists() else None
+    except OSError:
+        return None
+
+
+def _restore_file_mode(path: Path, mode: "int | None") -> None:
+    """Re-apply *mode* to *path* after an atomic replace.
+
+    ``tempfile.mkstemp`` creates files with 0o600 (owner-only).  After
+    ``os.replace`` swaps the temp file into place the target inherits
+    those restrictive permissions, breaking Docker / NAS volume mounts
+    that rely on broader permissions set by the user.  Calling this
+    right after ``os.replace`` restores the original permissions.
+    """
+    if mode is None:
+        return
+    try:
+        os.chmod(path, mode)
+    except OSError:
+        pass
+
+
 def atomic_json_write(
    path: Union[str, Path],
    data: Any,
@ -54,6 +80,8 @@ def atomic_json_write(
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

+    original_mode = _preserve_file_mode(path)
+
    fd, tmp_path = tempfile.mkstemp(
        dir=str(path.parent),
        prefix=f".{path.stem}_",
@ -71,6 +99,7 @@ def atomic_json_write(
            f.flush()
            os.fsync(f.fileno())
        os.replace(tmp_path, path)
+        _restore_file_mode(path, original_mode)
    except BaseException:
        # Intentionally catch BaseException so temp-file cleanup still runs for
        # KeyboardInterrupt/SystemExit before re-raising the original signal.
@ -106,6 +135,8 @@ def atomic_yaml_write(
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

+    original_mode = _preserve_file_mode(path)
+
    fd, tmp_path = tempfile.mkstemp(
        dir=str(path.parent),
        prefix=f".{path.stem}_",
@ -119,6 +150,7 @@ def atomic_yaml_write(
            f.flush()
            os.fsync(f.fileno())
        os.replace(tmp_path, path)
+        _restore_file_mode(path, original_mode)
    except BaseException:
        # Match atomic_json_write: cleanup must also happen for process-level
        # interruptions before we re-raise them.
--- a/Show more
+++ b/Show more