diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
new file mode 100644
index 0000000000..3546e6bdb8
--- /dev/null
+++ b/agent/auxiliary_client.py
@@ -0,0 +1,128 @@
+"""Shared auxiliary OpenAI client for cheap/fast side tasks.
+
+Provides a single resolution chain so every consumer (context compression,
+session search, web extraction, vision analysis, browser vision) picks up
+the best available backend without duplicating fallback logic.
+
+Resolution order for text tasks:
+  1. OpenRouter  (OPENROUTER_API_KEY)
+  2. Nous Portal (~/.hermes/auth.json active provider)
+  3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
+  4. None
+
+Resolution order for vision/multimodal tasks:
+  1. OpenRouter
+  2. Nous Portal
+  3. None  (custom endpoints can't substitute for Gemini multimodal)
+"""
+
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Optional, Tuple
+
+from openai import OpenAI
+
+from hermes_constants import OPENROUTER_BASE_URL
+
+logger = logging.getLogger(__name__)
+
+# Default auxiliary models per provider
+_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
+_NOUS_MODEL = "gemini-3-flash"
+_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
+_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
+
+
+def _read_nous_auth() -> Optional[dict]:
+    """Read and validate ~/.hermes/auth.json for an active Nous provider.
+
+    Returns the provider state dict if Nous is active with tokens,
+    otherwise None.
+    """
+    try:
+        if not _AUTH_JSON_PATH.is_file():
+            return None
+        data = json.loads(_AUTH_JSON_PATH.read_text())
+        if data.get("active_provider") != "nous":
+            return None
+        provider = data.get("providers", {}).get("nous", {})
+        # Must have at least an access_token or agent_key
+        if not provider.get("agent_key") and not provider.get("access_token"):
+            return None
+        return provider
+    except Exception as exc:
+        logger.debug("Could not read Nous auth: %s", exc)
+        return None
+
+
+def _nous_api_key(provider: dict) -> str:
+    """Extract the best API key from a Nous provider state dict."""
+    return provider.get("agent_key") or provider.get("access_token", "")
+
+
+def _nous_base_url() -> str:
+    """Resolve the Nous inference base URL from env or default."""
+    return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
+
+
+# ── Public API ──────────────────────────────────────────────────────────────
+
+def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
+    """Return (client, model_slug) for text-only auxiliary tasks.
+
+    Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
+    """
+    # 1. OpenRouter
+    or_key = os.getenv("OPENROUTER_API_KEY")
+    if or_key:
+        logger.debug("Auxiliary text client: OpenRouter")
+        return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
+
+    # 2. Nous Portal
+    nous = _read_nous_auth()
+    if nous:
+        logger.debug("Auxiliary text client: Nous Portal")
+        return (
+            OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
+            _NOUS_MODEL,
+        )
+
+    # 3. Custom endpoint (both base URL and key must be set)
+    custom_base = os.getenv("OPENAI_BASE_URL")
+    custom_key = os.getenv("OPENAI_API_KEY")
+    if custom_base and custom_key:
+        model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
+        logger.debug("Auxiliary text client: custom endpoint (%s)", model)
+        return OpenAI(api_key=custom_key, base_url=custom_base), model
+
+    # 4. Nothing available
+    logger.debug("Auxiliary text client: none available")
+    return None, None
+
+
+def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
+    """Return (client, model_slug) for vision/multimodal auxiliary tasks.
+
+    Only OpenRouter and Nous Portal qualify — custom endpoints cannot
+    substitute for Gemini multimodal.
+    """
+    # 1. OpenRouter
+    or_key = os.getenv("OPENROUTER_API_KEY")
+    if or_key:
+        logger.debug("Auxiliary vision client: OpenRouter")
+        return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
+
+    # 2. Nous Portal
+    nous = _read_nous_auth()
+    if nous:
+        logger.debug("Auxiliary vision client: Nous Portal")
+        return (
+            OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
+            _NOUS_MODEL,
+        )
+
+    # 3. Nothing suitable
+    logger.debug("Auxiliary vision client: none available")
+    return None, None
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 7a8225cbb4..8f072a37a1 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -9,13 +9,11 @@ import logging
 import os
 from typing import Any, Dict, List
 
-from openai import OpenAI
-
+from agent.auxiliary_client import get_text_auxiliary_client
 from agent.model_metadata import (
     get_model_context_length,
     estimate_messages_tokens_rough,
 )
-from hermes_constants import OPENROUTER_BASE_URL
 
 logger = logging.getLogger(__name__)
 
@@ -31,7 +29,6 @@ class ContextCompressor:
         self,
         model: str,
         threshold_percent: float = 0.85,
-        summary_model: str = "google/gemini-3-flash-preview",
         protect_first_n: int = 3,
         protect_last_n: int = 4,
         summary_target_tokens: int = 500,
@@ -39,7 +36,6 @@ class ContextCompressor:
     ):
         self.model = model
         self.threshold_percent = threshold_percent
-        self.summary_model = summary_model
         self.protect_first_n = protect_first_n
         self.protect_last_n = protect_last_n
         self.summary_target_tokens = summary_target_tokens
@@ -53,8 +49,7 @@ class ContextCompressor:
         self.last_completion_tokens = 0
         self.last_total_tokens = 0
 
-        api_key = os.getenv("OPENROUTER_API_KEY", "")
-        self.client = OpenAI(api_key=api_key, base_url=OPENROUTER_BASE_URL) if api_key else None
+        self.client, self.summary_model = get_text_auxiliary_client()
 
     def update_from_response(self, usage: Dict[str, Any]):
         """Update tracked token usage from API response."""
@@ -155,6 +150,26 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
         if not self.quiet_mode:
             print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
             print(f"   📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
+
+        # Truncation fallback when no auxiliary model is available
+        if self.client is None:
+            print("⚠️  Context compression: no auxiliary model available. Falling back to message truncation.")
+            # Keep system message(s) at the front and the protected tail;
+            # simply drop the oldest non-system messages until under threshold.
+            kept = []
+            for msg in messages:
+                if msg.get("role") == "system":
+                    kept.append(msg.copy())
+                else:
+                    break
+            tail = messages[-self.protect_last_n:]
+            kept.extend(m.copy() for m in tail)
+            self.compression_count += 1
+            if not self.quiet_mode:
+                print(f"   ✂️  Truncated: {len(messages)} → {len(kept)} messages (dropped middle turns)")
+            return kept
+
+        if not self.quiet_mode:
             print(f"   🗜️  Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
 
         summary = self._generate_summary(turns_to_summarize)
diff --git a/agent/display.py b/agent/display.py
index bed75e3062..7320cb7ea6 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -4,11 +4,16 @@ Pure display functions and classes with no AIAgent dependency.
 Used by AIAgent._execute_tool_calls for CLI feedback.
 """
 
+import json
 import os
 import random
 import threading
 import time
 
+# ANSI escape codes for coloring tool failure indicators
+_RED = "\033[31m"
+_RESET = "\033[0m"
+
 
 # =========================================================================
 # Tool preview (one-line summary of a tool call's primary argument)
@@ -242,12 +247,46 @@ KAWAII_GENERIC = [
 # Cute tool message (completion line that replaces the spinner)
 # =========================================================================
 
-def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
+def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
+    """Inspect a tool result string for signs of failure.
+
+    Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
+    like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
+    failures.  On success, returns ``(False, "")``.
+    """
+    if result is None:
+        return False, ""
+
+    if tool_name == "terminal":
+        try:
+            data = json.loads(result)
+            exit_code = data.get("exit_code")
+            if exit_code is not None and exit_code != 0:
+                return True, f" [exit {exit_code}]"
+        except (json.JSONDecodeError, TypeError, AttributeError):
+            pass
+        return False, ""
+
+    # Generic heuristic for non-terminal tools
+    lower = result[:500].lower()
+    if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
+        return True, " [error]"
+
+    return False, ""
+
+
+def get_cute_tool_message(
+    tool_name: str, args: dict, duration: float, result: str | None = None,
+) -> str:
     """Generate a formatted tool completion line for CLI quiet mode.
 
     Format: ``| {emoji} {verb:9} {detail}  {duration}``
+
+    When *result* is provided the line is checked for failure indicators.
+    Failed tool calls get a red prefix and an informational suffix.
     """
     dur = f"{duration:.1f}s"
+    is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
 
     def _trunc(s, n=40):
         s = str(s)
@@ -257,105 +296,111 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
         p = str(p)
         return ("..." + p[-(n-3):]) if len(p) > n else p
 
+    def _wrap(line: str) -> str:
+        """Apply red coloring and failure suffix when the tool failed."""
+        if not is_failure:
+            return line
+        return f"{_RED}{line}{failure_suffix}{_RESET}"
+
     if tool_name == "web_search":
-        return f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}"
+        return _wrap(f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}")
     if tool_name == "web_extract":
         urls = args.get("urls", [])
         if urls:
             url = urls[0] if isinstance(urls, list) else str(urls)
             domain = url.replace("https://", "").replace("http://", "").split("/")[0]
             extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
-            return f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}"
-        return f"┊ 📄 fetch     pages  {dur}"
+            return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
+        return _wrap(f"┊ 📄 fetch     pages  {dur}")
     if tool_name == "web_crawl":
         url = args.get("url", "")
         domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-        return f"┊ 🕸️  crawl     {_trunc(domain, 35)}  {dur}"
+        return _wrap(f"┊ 🕸️  crawl     {_trunc(domain, 35)}  {dur}")
     if tool_name == "terminal":
-        return f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}"
+        return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
     if tool_name == "process":
         action = args.get("action", "?")
         sid = args.get("session_id", "")[:12]
         labels = {"list": "ls processes", "poll": f"poll {sid}", "log": f"log {sid}",
                   "wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"}
-        return f"┊ ⚙️  proc      {labels.get(action, f'{action} {sid}')}  {dur}"
+        return _wrap(f"┊ ⚙️  proc      {labels.get(action, f'{action} {sid}')}  {dur}")
     if tool_name == "read_file":
-        return f"┊ 📖 read      {_path(args.get('path', ''))}  {dur}"
+        return _wrap(f"┊ 📖 read      {_path(args.get('path', ''))}  {dur}")
     if tool_name == "write_file":
-        return f"┊ ✍️  write     {_path(args.get('path', ''))}  {dur}"
+        return _wrap(f"┊ ✍️  write     {_path(args.get('path', ''))}  {dur}")
     if tool_name == "patch":
-        return f"┊ 🔧 patch     {_path(args.get('path', ''))}  {dur}"
+        return _wrap(f"┊ 🔧 patch     {_path(args.get('path', ''))}  {dur}")
     if tool_name == "search_files":
         pattern = _trunc(args.get("pattern", ""), 35)
         target = args.get("target", "content")
         verb = "find" if target == "files" else "grep"
-        return f"┊ 🔎 {verb:9} {pattern}  {dur}"
+        return _wrap(f"┊ 🔎 {verb:9} {pattern}  {dur}")
     if tool_name == "browser_navigate":
         url = args.get("url", "")
         domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-        return f"┊ 🌐 navigate  {_trunc(domain, 35)}  {dur}"
+        return _wrap(f"┊ 🌐 navigate  {_trunc(domain, 35)}  {dur}")
     if tool_name == "browser_snapshot":
         mode = "full" if args.get("full") else "compact"
-        return f"┊ 📸 snapshot  {mode}  {dur}"
+        return _wrap(f"┊ 📸 snapshot  {mode}  {dur}")
     if tool_name == "browser_click":
-        return f"┊ 👆 click     {args.get('ref', '?')}  {dur}"
+        return _wrap(f"┊ 👆 click     {args.get('ref', '?')}  {dur}")
     if tool_name == "browser_type":
-        return f"┊ ⌨️  type      \"{_trunc(args.get('text', ''), 30)}\"  {dur}"
+        return _wrap(f"┊ ⌨️  type      \"{_trunc(args.get('text', ''), 30)}\"  {dur}")
     if tool_name == "browser_scroll":
         d = args.get("direction", "down")
         arrow = {"down": "↓", "up": "↑", "right": "→", "left": "←"}.get(d, "↓")
-        return f"┊ {arrow}  scroll    {d}  {dur}"
+        return _wrap(f"┊ {arrow}  scroll    {d}  {dur}")
     if tool_name == "browser_back":
-        return f"┊ ◀️  back      {dur}"
+        return _wrap(f"┊ ◀️  back      {dur}")
     if tool_name == "browser_press":
-        return f"┊ ⌨️  press     {args.get('key', '?')}  {dur}"
+        return _wrap(f"┊ ⌨️  press     {args.get('key', '?')}  {dur}")
     if tool_name == "browser_close":
-        return f"┊ 🚪 close     browser  {dur}"
+        return _wrap(f"┊ 🚪 close     browser  {dur}")
     if tool_name == "browser_get_images":
-        return f"┊ 🖼️  images    extracting  {dur}"
+        return _wrap(f"┊ 🖼️  images    extracting  {dur}")
     if tool_name == "browser_vision":
-        return f"┊ 👁️  vision    analyzing page  {dur}"
+        return _wrap(f"┊ 👁️  vision    analyzing page  {dur}")
     if tool_name == "todo":
         todos_arg = args.get("todos")
         merge = args.get("merge", False)
         if todos_arg is None:
-            return f"┊ 📋 plan      reading tasks  {dur}"
+            return _wrap(f"┊ 📋 plan      reading tasks  {dur}")
         elif merge:
-            return f"┊ 📋 plan      update {len(todos_arg)} task(s)  {dur}"
+            return _wrap(f"┊ 📋 plan      update {len(todos_arg)} task(s)  {dur}")
         else:
-            return f"┊ 📋 plan      {len(todos_arg)} task(s)  {dur}"
+            return _wrap(f"┊ 📋 plan      {len(todos_arg)} task(s)  {dur}")
     if tool_name == "session_search":
-        return f"┊ 🔍 recall    \"{_trunc(args.get('query', ''), 35)}\"  {dur}"
+        return _wrap(f"┊ 🔍 recall    \"{_trunc(args.get('query', ''), 35)}\"  {dur}")
     if tool_name == "memory":
         action = args.get("action", "?")
         target = args.get("target", "")
         if action == "add":
-            return f"┊ 🧠 memory    +{target}: \"{_trunc(args.get('content', ''), 30)}\"  {dur}"
+            return _wrap(f"┊ 🧠 memory    +{target}: \"{_trunc(args.get('content', ''), 30)}\"  {dur}")
         elif action == "replace":
-            return f"┊ 🧠 memory    ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}"
+            return _wrap(f"┊ 🧠 memory    ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}")
         elif action == "remove":
-            return f"┊ 🧠 memory    -{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}"
-        return f"┊ 🧠 memory    {action}  {dur}"
+            return _wrap(f"┊ 🧠 memory    -{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}")
+        return _wrap(f"┊ 🧠 memory    {action}  {dur}")
     if tool_name == "skills_list":
-        return f"┊ 📚 skills    list {args.get('category', 'all')}  {dur}"
+        return _wrap(f"┊ 📚 skills    list {args.get('category', 'all')}  {dur}")
     if tool_name == "skill_view":
-        return f"┊ 📚 skill     {_trunc(args.get('name', ''), 30)}  {dur}"
+        return _wrap(f"┊ 📚 skill     {_trunc(args.get('name', ''), 30)}  {dur}")
     if tool_name == "image_generate":
-        return f"┊ 🎨 create    {_trunc(args.get('prompt', ''), 35)}  {dur}"
+        return _wrap(f"┊ 🎨 create    {_trunc(args.get('prompt', ''), 35)}  {dur}")
     if tool_name == "text_to_speech":
-        return f"┊ 🔊 speak     {_trunc(args.get('text', ''), 30)}  {dur}"
+        return _wrap(f"┊ 🔊 speak     {_trunc(args.get('text', ''), 30)}  {dur}")
     if tool_name == "vision_analyze":
-        return f"┊ 👁️  vision    {_trunc(args.get('question', ''), 30)}  {dur}"
+        return _wrap(f"┊ 👁️  vision    {_trunc(args.get('question', ''), 30)}  {dur}")
     if tool_name == "mixture_of_agents":
-        return f"┊ 🧠 reason    {_trunc(args.get('user_prompt', ''), 30)}  {dur}"
+        return _wrap(f"┊ 🧠 reason    {_trunc(args.get('user_prompt', ''), 30)}  {dur}")
     if tool_name == "send_message":
-        return f"┊ 📨 send      {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\"  {dur}"
+        return _wrap(f"┊ 📨 send      {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\"  {dur}")
     if tool_name == "schedule_cronjob":
-        return f"┊ ⏰ schedule  {_trunc(args.get('name', args.get('prompt', 'task')), 30)}  {dur}"
+        return _wrap(f"┊ ⏰ schedule  {_trunc(args.get('name', args.get('prompt', 'task')), 30)}  {dur}")
     if tool_name == "list_cronjobs":
-        return f"┊ ⏰ jobs      listing  {dur}"
+        return _wrap(f"┊ ⏰ jobs      listing  {dur}")
     if tool_name == "remove_cronjob":
-        return f"┊ ⏰ remove    job {args.get('job_id', '?')}  {dur}"
+        return _wrap(f"┊ ⏰ remove    job {args.get('job_id', '?')}  {dur}")
     if tool_name.startswith("rl_"):
         rl = {
             "rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}",
@@ -364,16 +409,16 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
             "rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
             "rl_list_runs": "list runs", "rl_test_inference": "test inference",
         }
-        return f"┊ 🧪 rl        {rl.get(tool_name, tool_name.replace('rl_', ''))}  {dur}"
+        return _wrap(f"┊ 🧪 rl        {rl.get(tool_name, tool_name.replace('rl_', ''))}  {dur}")
     if tool_name == "execute_code":
         code = args.get("code", "")
         first_line = code.strip().split("\n")[0] if code.strip() else ""
-        return f"┊ 🐍 exec      {_trunc(first_line, 35)}  {dur}"
+        return _wrap(f"┊ 🐍 exec      {_trunc(first_line, 35)}  {dur}")
     if tool_name == "delegate_task":
         tasks = args.get("tasks")
         if tasks and isinstance(tasks, list):
-            return f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}"
-        return f"┊ 🔀 delegate  {_trunc(args.get('goal', ''), 35)}  {dur}"
+            return _wrap(f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}")
+        return _wrap(f"┊ 🔀 delegate  {_trunc(args.get('goal', ''), 35)}  {dur}")
 
     preview = build_tool_preview(tool_name, args) or ""
-    return f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)}  {dur}"
+    return _wrap(f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)}  {dur}")
diff --git a/cli.py b/cli.py
index 1972a18e03..91d7399996 100755
--- a/cli.py
+++ b/cli.py
@@ -339,9 +339,6 @@ def _cprint(text: str):
     """
     _pt_print(_PT_ANSI(text))
 
-# Version string
-VERSION = "v1.0.0"
-
 # ASCII Art - HERMES-AGENT logo (full width, single line - requires ~95 char terminal)
 HERMES_AGENT_LOGO = """[bold #FFD700]██╗  ██╗███████╗██████╗ ███╗   ███╗███████╗███████╗       █████╗  ██████╗ ███████╗███╗   ██╗████████╗[/]
 [bold #FFD700]██║  ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝      ██╔══██╗██╔════╝ ██╔════╝████╗  ██║╚══██╔══╝[/]
diff --git a/gateway/config.py b/gateway/config.py
index 8526c43693..16eceda672 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -8,6 +8,7 @@ Handles loading and validating configuration for:
 - Delivery preferences
 """
 
+import logging
 import os
 import json
 from pathlib import Path
@@ -15,6 +16,8 @@ from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Any
 from enum import Enum
 
+logger = logging.getLogger(__name__)
+
 
 class Platform(Enum):
     """Supported messaging platforms."""
@@ -264,6 +267,40 @@ def load_gateway_config() -> GatewayConfig:
     # Override with environment variables
     _apply_env_overrides(config)
     
+    # --- Validate loaded values ---
+    policy = config.default_reset_policy
+
+    if not (0 <= policy.at_hour <= 23):
+        logger.warning(
+            "Invalid at_hour=%s (must be 0-23). Using default 4.", policy.at_hour
+        )
+        policy.at_hour = 4
+
+    if policy.idle_minutes is None or policy.idle_minutes <= 0:
+        logger.warning(
+            "Invalid idle_minutes=%s (must be positive). Using default 1440.",
+            policy.idle_minutes,
+        )
+        policy.idle_minutes = 1440
+
+    # Warn about empty bot tokens — platforms that loaded an empty string
+    # won't connect and the cause can be confusing without a log line.
+    _token_env_names = {
+        Platform.TELEGRAM: "TELEGRAM_BOT_TOKEN",
+        Platform.DISCORD: "DISCORD_BOT_TOKEN",
+        Platform.SLACK: "SLACK_BOT_TOKEN",
+    }
+    for platform, pconfig in config.platforms.items():
+        if not pconfig.enabled:
+            continue
+        env_name = _token_env_names.get(platform)
+        if env_name and pconfig.token is not None and not pconfig.token.strip():
+            logger.warning(
+                "%s is enabled but %s is empty. "
+                "The adapter will likely fail to connect.",
+                platform.value, env_name,
+            )
+
     return config
 
 
diff --git a/gateway/delivery.py b/gateway/delivery.py
index 676c3b5ae8..0093c1fb09 100644
--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@@ -8,12 +8,18 @@ Routes messages to the appropriate destination based on:
 - Local (always saved to files)
 """
 
+import logging
 from pathlib import Path
 from datetime import datetime
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Any, Union
 from enum import Enum
 
+logger = logging.getLogger(__name__)
+
+MAX_PLATFORM_OUTPUT = 4000
+TRUNCATED_VISIBLE = 3800
+
 from .config import Platform, GatewayConfig
 from .session import SessionSource
 
@@ -245,6 +251,15 @@ class DeliveryRouter:
             "timestamp": timestamp
         }
     
+    def _save_full_output(self, content: str, job_id: str) -> Path:
+        """Save full cron output to disk and return the file path."""
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        out_dir = Path.home() / ".hermes" / "cron" / "output"
+        out_dir.mkdir(parents=True, exist_ok=True)
+        path = out_dir / f"{job_id}_{timestamp}.txt"
+        path.write_text(content)
+        return path
+
     async def _deliver_to_platform(
         self,
         target: DeliveryTarget,
@@ -260,8 +275,16 @@ class DeliveryRouter:
         if not target.chat_id:
             raise ValueError(f"No chat ID for {target.platform.value} delivery")
         
-        # Call the adapter's send method
-        # Adapters should implement: async def send(chat_id: str, content: str) -> Dict
+        # Guard: truncate oversized cron output to stay within platform limits
+        if len(content) > MAX_PLATFORM_OUTPUT:
+            job_id = (metadata or {}).get("job_id", "unknown")
+            saved_path = self._save_full_output(content, job_id)
+            logger.info("Cron output truncated (%d chars) — full output: %s", len(content), saved_path)
+            content = (
+                content[:TRUNCATED_VISIBLE]
+                + f"\n\n... [truncated, full output saved to {saved_path}]"
+            )
+        
         return await adapter.send(target.chat_id, content, metadata=metadata)
 
 
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 918bc31bd6..9aef4033f5 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -659,34 +659,90 @@ class BasePlatformAdapter(ABC):
     
     def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
         """
-        Split a long message into chunks.
-        
+        Split a long message into chunks, preserving code block boundaries.
+
+        When a split falls inside a triple-backtick code block, the fence is
+        closed at the end of the current chunk and reopened (with the original
+        language tag) at the start of the next chunk.  Multi-chunk responses
+        receive indicators like ``(1/3)``.
+
         Args:
             content: The full message content
             max_length: Maximum length per chunk (platform-specific)
-        
+
         Returns:
             List of message chunks
         """
         if len(content) <= max_length:
             return [content]
-        
-        chunks = []
-        while content:
-            if len(content) <= max_length:
-                chunks.append(content)
+
+        INDICATOR_RESERVE = 10   # room for " (XX/XX)"
+        FENCE_CLOSE = "\n```"
+
+        chunks: List[str] = []
+        remaining = content
+        # When the previous chunk ended mid-code-block, this holds the
+        # language tag (possibly "") so we can reopen the fence.
+        carry_lang: Optional[str] = None
+
+        while remaining:
+            # If we're continuing a code block from the previous chunk,
+            # prepend a new opening fence with the same language tag.
+            prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
+
+            # How much body text we can fit after accounting for the prefix,
+            # a potential closing fence, and the chunk indicator.
+            headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
+            if headroom < 1:
+                headroom = max_length // 2
+
+            # Everything remaining fits in one final chunk
+            if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
+                chunks.append(prefix + remaining)
                 break
-            
-            # Try to split at a newline
-            split_idx = content.rfind("\n", 0, max_length)
-            if split_idx == -1:
-                # No newline, split at space
-                split_idx = content.rfind(" ", 0, max_length)
-            if split_idx == -1:
-                # No space either, hard split
-                split_idx = max_length
-            
-            chunks.append(content[:split_idx])
-            content = content[split_idx:].lstrip()
-        
+
+            # Find a natural split point (prefer newlines, then spaces)
+            region = remaining[:headroom]
+            split_at = region.rfind("\n")
+            if split_at < headroom // 2:
+                split_at = region.rfind(" ")
+            if split_at < 1:
+                split_at = headroom
+
+            chunk_body = remaining[:split_at]
+            remaining = remaining[split_at:].lstrip()
+
+            full_chunk = prefix + chunk_body
+
+            # Walk the chunk line-by-line to determine whether we end
+            # inside an open code block.
+            in_code = carry_lang is not None
+            lang = carry_lang or ""
+            for line in full_chunk.split("\n"):
+                stripped = line.strip()
+                if stripped.startswith("```"):
+                    if in_code:
+                        in_code = False
+                        lang = ""
+                    else:
+                        in_code = True
+                        tag = stripped[3:].strip()
+                        lang = tag.split()[0] if tag else ""
+
+            if in_code:
+                # Close the orphaned fence so the chunk is valid on its own
+                full_chunk += FENCE_CLOSE
+                carry_lang = lang
+            else:
+                carry_lang = None
+
+            chunks.append(full_chunk)
+
+        # Append chunk indicators when the response spans multiple messages
+        if len(chunks) > 1:
+            total = len(chunks)
+            chunks = [
+                f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
+            ]
+
         return chunks
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index e0d277b7b4..01bbad0db0 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -8,6 +8,7 @@ Uses python-telegram-bot library for:
 """
 
 import asyncio
+import re
 from typing import Dict, List, Optional, Any
 
 try:
@@ -49,6 +50,16 @@ def check_telegram_requirements() -> bool:
     return TELEGRAM_AVAILABLE
 
 
+# Matches every character that MarkdownV2 requires to be backslash-escaped
+# when it appears outside a code span or fenced code block.
+_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
+
+
+def _escape_mdv2(text: str) -> str:
+    """Escape Telegram MarkdownV2 special characters with a preceding backslash."""
+    return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
+
+
 class TelegramAdapter(BasePlatformAdapter):
     """
     Telegram bot adapter.
@@ -167,7 +178,7 @@ class TelegramAdapter(BasePlatformAdapter):
                     msg = await self._bot.send_message(
                         chat_id=int(chat_id),
                         text=chunk,
-                        parse_mode=ParseMode.MARKDOWN,
+                        parse_mode=ParseMode.MARKDOWN_V2,
                         reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
                         message_thread_id=int(thread_id) if thread_id else None,
                     )
@@ -297,14 +308,81 @@ class TelegramAdapter(BasePlatformAdapter):
     
     def format_message(self, content: str) -> str:
         """
-        Format message for Telegram.
-        
-        Telegram uses a subset of markdown. We'll use the simpler
-        Markdown mode (not MarkdownV2) for compatibility.
+        Convert standard markdown to Telegram MarkdownV2 format.
+
+        Protected regions (code blocks, inline code) are extracted first so
+        their contents are never modified.  Standard markdown constructs
+        (headers, bold, italic, links) are translated to MarkdownV2 syntax,
+        and all remaining special characters are escaped.
         """
-        # Basic escaping for Telegram Markdown
-        # In Markdown mode (not V2), only certain characters need escaping
-        return content
+        if not content:
+            return content
+
+        placeholders: dict = {}
+        counter = [0]
+
+        def _ph(value: str) -> str:
+            """Stash *value* behind a placeholder token that survives escaping."""
+            key = f"\x00PH{counter[0]}\x00"
+            counter[0] += 1
+            placeholders[key] = value
+            return key
+
+        text = content
+
+        # 1) Protect fenced code blocks (``` ... ```)
+        text = re.sub(
+            r'(```(?:[^\n]*\n)?[\s\S]*?```)',
+            lambda m: _ph(m.group(0)),
+            text,
+        )
+
+        # 2) Protect inline code (`...`)
+        text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
+
+        # 3) Convert markdown links – escape the display text; inside the URL
+        #    only ')' and '\' need escaping per the MarkdownV2 spec.
+        def _convert_link(m):
+            display = _escape_mdv2(m.group(1))
+            url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
+            return _ph(f'[{display}]({url})')
+
+        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
+
+        # 4) Convert markdown headers (## Title) → bold *Title*
+        def _convert_header(m):
+            inner = m.group(1).strip()
+            # Strip redundant bold markers that may appear inside a header
+            inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
+            return _ph(f'*{_escape_mdv2(inner)}*')
+
+        text = re.sub(
+            r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
+        )
+
+        # 5) Convert bold: **text** → *text* (MarkdownV2 bold)
+        text = re.sub(
+            r'\*\*(.+?)\*\*',
+            lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
+            text,
+        )
+
+        # 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
+        text = re.sub(
+            r'\*([^*]+)\*',
+            lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
+            text,
+        )
+
+        # 7) Escape remaining special characters in plain text
+        text = _escape_mdv2(text)
+
+        # 8) Restore placeholders in reverse insertion order so that
+        #    nested references (a placeholder inside another) resolve correctly.
+        for key in reversed(list(placeholders.keys())):
+            text = text.replace(key, placeholders[key])
+
+        return text
     
     async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
         """Handle incoming text messages."""
diff --git a/gateway/run.py b/gateway/run.py
index b299085d7c..11bb11ca29 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -20,6 +20,7 @@ import re
 import sys
 import signal
 import threading
+from logging.handlers import RotatingFileHandler
 from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List
@@ -402,9 +403,27 @@ class GatewayRunner:
         # Build the context prompt to inject
         context_prompt = build_session_context_prompt(context)
         
+        # If the previous session expired and was auto-reset, prepend a notice
+        # so the agent knows this is a fresh conversation (not an intentional /reset).
+        if getattr(session_entry, 'was_auto_reset', False):
+            context_prompt = (
+                "[System note: The user's previous session expired due to inactivity. "
+                "This is a fresh conversation with no prior context.]\n\n"
+                + context_prompt
+            )
+            session_entry.was_auto_reset = False
+        
         # Load conversation history from transcript
         history = self.session_store.load_transcript(session_entry.session_id)
         
+        # First-message onboarding for brand-new messaging platform users
+        if not history:
+            context_prompt += (
+                "\n\n[System note: This is the user's very first message in this session. "
+                "Briefly introduce yourself and mention that /help shows available commands. "
+                "Keep the introduction concise -- one or two sentences max.]"
+            )
+        
         # -----------------------------------------------------------------
         # Auto-analyze images sent by the user
         #
@@ -1342,15 +1361,32 @@ def _start_cron_ticker(stop_event: threading.Event, interval: int = 60):
     
     Runs inside the gateway process so cronjobs fire automatically without
     needing a separate `hermes cron daemon` or system cron entry.
+
+    Every 60th tick (~once per hour) the image/audio cache is pruned so
+    stale temp files don't accumulate.
     """
     from cron.scheduler import tick as cron_tick
+    from gateway.platforms.base import cleanup_image_cache
+
+    IMAGE_CACHE_EVERY = 60  # ticks — once per hour at default 60s interval
 
     logger.info("Cron ticker started (interval=%ds)", interval)
+    tick_count = 0
     while not stop_event.is_set():
         try:
             cron_tick(verbose=False)
         except Exception as e:
             logger.debug("Cron tick error: %s", e)
+
+        tick_count += 1
+        if tick_count % IMAGE_CACHE_EVERY == 0:
+            try:
+                removed = cleanup_image_cache(max_age_hours=24)
+                if removed:
+                    logger.info("Image cache cleanup: removed %d stale file(s)", removed)
+            except Exception as e:
+                logger.debug("Image cache cleanup error: %s", e)
+
         stop_event.wait(timeout=interval)
     logger.info("Cron ticker stopped")
 
@@ -1363,6 +1399,18 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
     Returns True if the gateway ran successfully, False if it failed to start.
     A False return causes a non-zero exit code so systemd can auto-restart.
     """
+    # Configure rotating file log so gateway output is persisted for debugging
+    log_dir = Path.home() / '.hermes' / 'logs'
+    log_dir.mkdir(parents=True, exist_ok=True)
+    file_handler = RotatingFileHandler(
+        log_dir / 'gateway.log',
+        maxBytes=5 * 1024 * 1024,
+        backupCount=3,
+    )
+    file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
+    logging.getLogger().addHandler(file_handler)
+    logging.getLogger().setLevel(logging.INFO)
+
     runner = GatewayRunner(config)
     
     # Set up signal handlers
diff --git a/gateway/session.py b/gateway/session.py
index c66c638b46..b6603ecfaf 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -219,6 +219,10 @@ class SessionEntry:
     output_tokens: int = 0
     total_tokens: int = 0
     
+    # Set when a session was created because the previous one expired;
+    # consumed once by the message handler to inject a notice into context
+    was_auto_reset: bool = False
+    
     def to_dict(self) -> Dict[str, Any]:
         result = {
             "session_key": self.session_key,
@@ -388,11 +392,14 @@ class SessionStore:
                 return entry
             else:
                 # Session is being reset -- end the old one in SQLite
+                was_auto_reset = True
                 if self._db:
                     try:
                         self._db.end_session(entry.session_id, "session_reset")
                     except Exception as e:
                         logger.debug("Session DB operation failed: %s", e)
+        else:
+            was_auto_reset = False
         
         # Create new session
         session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
@@ -406,6 +413,7 @@ class SessionStore:
             display_name=source.chat_name,
             platform=source.platform,
             chat_type=source.chat_type,
+            was_auto_reset=was_auto_reset,
         )
         
         self._entries[session_key] = entry
diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py
index d8c95978c4..7e647afc35 100644
--- a/hermes_cli/__init__.py
+++ b/hermes_cli/__init__.py
@@ -11,4 +11,4 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """
 
-__version__ = "0.1.0"
+__version__ = "v1.0.0"
diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index 2597e880d7..974dfaa15e 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -33,7 +33,7 @@ def cprint(text: str):
 # ASCII Art & Branding
 # =========================================================================
 
-VERSION = "v1.0.0"
+from hermes_cli import __version__ as VERSION
 
 HERMES_AGENT_LOGO = """[bold #FFD700]██╗  ██╗███████╗██████╗ ███╗   ███╗███████╗███████╗       █████╗  ██████╗ ███████╗███╗   ██╗████████╗[/]
 [bold #FFD700]██║  ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝      ██╔══██╗██╔════╝ ██╔════╝████╗  ██║╚══██╔══╝[/]
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 09176ba154..6a103a372f 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -44,6 +44,8 @@ def run_doctor(args):
     should_fix = getattr(args, 'fix', False)
     
     issues = []
+    manual_issues = []  # issues that can't be auto-fixed
+    fixed_count = 0
     
     print()
     print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
@@ -135,8 +137,15 @@ def run_doctor(args):
             check_ok(".env file exists (in project directory)")
         else:
             check_fail("~/.hermes/.env file missing")
-            check_info("Run 'hermes setup' to create one")
-            issues.append("Run 'hermes setup' to create .env")
+            if should_fix:
+                env_path.parent.mkdir(parents=True, exist_ok=True)
+                env_path.touch()
+                check_ok("Created empty ~/.hermes/.env")
+                check_info("Run 'hermes setup' to configure API keys")
+                fixed_count += 1
+            else:
+                check_info("Run 'hermes setup' to create one")
+                issues.append("Run 'hermes setup' to create .env")
     
     # Check ~/.hermes/config.yaml (primary) or project cli-config.yaml (fallback)
     config_path = HERMES_HOME / 'config.yaml'
@@ -147,7 +156,17 @@ def run_doctor(args):
         if fallback_config.exists():
             check_ok("cli-config.yaml exists (in project directory)")
         else:
-            check_warn("config.yaml not found", "(using defaults)")
+            example_config = PROJECT_ROOT / 'cli-config.yaml.example'
+            if should_fix and example_config.exists():
+                config_path.parent.mkdir(parents=True, exist_ok=True)
+                shutil.copy2(str(example_config), str(config_path))
+                check_ok("Created ~/.hermes/config.yaml from cli-config.yaml.example")
+                fixed_count += 1
+            elif should_fix:
+                check_warn("config.yaml not found and no example to copy from")
+                manual_issues.append("Create ~/.hermes/config.yaml manually")
+            else:
+                check_warn("config.yaml not found", "(using defaults)")
     
     # =========================================================================
     # Check: Directory structure
@@ -159,7 +178,26 @@ def run_doctor(args):
     if hermes_home.exists():
         check_ok("~/.hermes directory exists")
     else:
-        check_warn("~/.hermes not found", "(will be created on first use)")
+        if should_fix:
+            hermes_home.mkdir(parents=True, exist_ok=True)
+            check_ok("Created ~/.hermes directory")
+            fixed_count += 1
+        else:
+            check_warn("~/.hermes not found", "(will be created on first use)")
+    
+    # Check expected subdirectories
+    expected_subdirs = ["cron", "sessions", "logs", "skills", "memories"]
+    for subdir_name in expected_subdirs:
+        subdir_path = hermes_home / subdir_name
+        if subdir_path.exists():
+            check_ok(f"~/.hermes/{subdir_name}/ exists")
+        else:
+            if should_fix:
+                subdir_path.mkdir(parents=True, exist_ok=True)
+                check_ok(f"Created ~/.hermes/{subdir_name}/")
+                fixed_count += 1
+            else:
+                check_warn(f"~/.hermes/{subdir_name}/ not found", "(will be created on first use)")
     
     # Check for SOUL.md persona file
     soul_path = hermes_home / "SOUL.md"
@@ -175,14 +213,25 @@ def run_doctor(args):
         check_warn("~/.hermes/SOUL.md not found", "(create it to give Hermes a custom personality)")
         if should_fix:
             soul_path.parent.mkdir(parents=True, exist_ok=True)
-            soul_path.write_text("# Hermes Agent Persona\n\n<!-- Edit this file to customize how Hermes communicates. -->\n", encoding="utf-8")
-            check_ok("Created ~/.hermes/SOUL.md")
+            soul_path.write_text(
+                "# Hermes Agent Persona\n\n"
+                "<!-- Edit this file to customize how Hermes communicates. -->\n\n"
+                "You are Hermes, a helpful AI assistant.\n",
+                encoding="utf-8",
+            )
+            check_ok("Created ~/.hermes/SOUL.md with basic template")
+            fixed_count += 1
     
     logs_dir = PROJECT_ROOT / "logs"
     if logs_dir.exists():
-        check_ok("logs/ directory exists")
+        check_ok("logs/ directory exists (project root)")
     else:
-        check_warn("logs/ not found", "(will be created on first use)")
+        if should_fix:
+            logs_dir.mkdir(parents=True, exist_ok=True)
+            check_ok("Created logs/ directory")
+            fixed_count += 1
+        else:
+            check_warn("logs/ not found", "(will be created on first use)")
     
     # Check memory directory
     memories_dir = hermes_home / "memories"
@@ -205,6 +254,7 @@ def run_doctor(args):
         if should_fix:
             memories_dir.mkdir(parents=True, exist_ok=True)
             check_ok("Created ~/.hermes/memories/")
+            fixed_count += 1
     
     # Check SQLite session store
     state_db_path = hermes_home / "state.db"
@@ -299,6 +349,7 @@ def run_doctor(args):
     
     openrouter_key = os.getenv("OPENROUTER_API_KEY")
     if openrouter_key:
+        print("  Checking OpenRouter API...", end="", flush=True)
         try:
             import httpx
             response = httpx.get(
@@ -307,20 +358,21 @@ def run_doctor(args):
                 timeout=10
             )
             if response.status_code == 200:
-                check_ok("OpenRouter API")
+                print(f"\r  {color('✓', Colors.GREEN)} OpenRouter API                          ")
             elif response.status_code == 401:
-                check_fail("OpenRouter API", "(invalid API key)")
+                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)}                ")
                 issues.append("Check OPENROUTER_API_KEY in .env")
             else:
-                check_fail("OpenRouter API", f"(HTTP {response.status_code})")
+                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)}                ")
         except Exception as e:
-            check_fail("OpenRouter API", f"({e})")
+            print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)}                ")
             issues.append("Check network connectivity")
     else:
         check_warn("OpenRouter API", "(not configured)")
     
     anthropic_key = os.getenv("ANTHROPIC_API_KEY")
     if anthropic_key:
+        print("  Checking Anthropic API...", end="", flush=True)
         try:
             import httpx
             response = httpx.get(
@@ -332,14 +384,14 @@ def run_doctor(args):
                 timeout=10
             )
             if response.status_code == 200:
-                check_ok("Anthropic API")
+                print(f"\r  {color('✓', Colors.GREEN)} Anthropic API                           ")
             elif response.status_code == 401:
-                check_fail("Anthropic API", "(invalid API key)")
+                print(f"\r  {color('✗', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)}                 ")
             else:
-                # Note: Anthropic may not have /models endpoint
-                check_warn("Anthropic API", "(couldn't verify)")
+                msg = "(couldn't verify)"
+                print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)}                 ")
         except Exception as e:
-            check_warn("Anthropic API", f"({e})")
+            print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)}                 ")
     
     # =========================================================================
     # Check: Submodules
@@ -440,17 +492,28 @@ def run_doctor(args):
     # Summary
     # =========================================================================
     print()
-    if issues:
-        print(color("─" * 60, Colors.YELLOW))
-        print(color(f"  Found {len(issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
+    remaining_issues = issues + manual_issues
+    if should_fix and fixed_count > 0:
+        print(color("─" * 60, Colors.GREEN))
+        print(color(f"  Fixed {fixed_count} issue(s).", Colors.GREEN, Colors.BOLD), end="")
+        if remaining_issues:
+            print(color(f" {len(remaining_issues)} issue(s) require manual intervention.", Colors.YELLOW, Colors.BOLD))
+        else:
+            print()
         print()
-        for i, issue in enumerate(issues, 1):
+        if remaining_issues:
+            for i, issue in enumerate(remaining_issues, 1):
+                print(f"  {i}. {issue}")
+            print()
+    elif remaining_issues:
+        print(color("─" * 60, Colors.YELLOW))
+        print(color(f"  Found {len(remaining_issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
+        print()
+        for i, issue in enumerate(remaining_issues, 1):
             print(f"  {i}. {issue}")
         print()
-        
-        if should_fix:
-            print(color("  Attempting auto-fix is not yet implemented.", Colors.DIM))
-            print(color("  Please resolve issues manually.", Colors.DIM))
+        if not should_fix:
+            print(color("  Tip: run 'hermes doctor --fix' to auto-fix what's possible.", Colors.DIM))
     else:
         print(color("─" * 60, Colors.GREEN))
         print(color("  All checks passed! 🎉", Colors.GREEN, Colors.BOLD))
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 3406994b36..4264730c69 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -47,8 +47,66 @@ from hermes_constants import OPENROUTER_BASE_URL
 logger = logging.getLogger(__name__)
 
 
+def _has_any_provider_configured() -> bool:
+    """Check if at least one inference provider is usable."""
+    from hermes_cli.config import get_env_path, get_hermes_home
+
+    # Check env vars (may be set by .env or shell)
+    if os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"):
+        return True
+
+    # Check .env file for keys
+    env_file = get_env_path()
+    if env_file.exists():
+        try:
+            for line in env_file.read_text().splitlines():
+                line = line.strip()
+                if line.startswith("#") or "=" not in line:
+                    continue
+                key, _, val = line.partition("=")
+                val = val.strip().strip("'\"")
+                if key.strip() in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY") and val:
+                    return True
+        except Exception:
+            pass
+
+    # Check for Nous Portal OAuth credentials
+    auth_file = get_hermes_home() / "auth.json"
+    if auth_file.exists():
+        try:
+            import json
+            auth = json.loads(auth_file.read_text())
+            active = auth.get("active_provider")
+            if active:
+                state = auth.get("providers", {}).get(active, {})
+                if state.get("access_token") or state.get("refresh_token"):
+                    return True
+        except Exception:
+            pass
+
+    return False
+
+
 def cmd_chat(args):
     """Run interactive chat CLI."""
+    # First-run guard: check if any provider is configured before launching
+    if not _has_any_provider_configured():
+        print()
+        print("It looks like Hermes isn't configured yet -- no API keys or providers found.")
+        print()
+        print("  Run:  hermes setup")
+        print()
+        try:
+            reply = input("Run setup now? [Y/n] ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            reply = "n"
+        if reply in ("", "y", "yes"):
+            cmd_setup(args)
+            return
+        print()
+        print("You can run 'hermes setup' at any time to configure.")
+        sys.exit(1)
+
     # Import and run the CLI
     from cli import main as cli_main
     
@@ -219,20 +277,10 @@ def _model_flow_openrouter(config, current_model=""):
         print("API key saved.")
         print()
 
-    OPENROUTER_MODELS = [
-        "anthropic/claude-opus-4.6",
-        "anthropic/claude-sonnet-4.5",
-        "anthropic/claude-opus-4.5",
-        "openai/gpt-5.2",
-        "openai/gpt-5.2-codex",
-        "google/gemini-3-pro-preview",
-        "google/gemini-3-flash-preview",
-        "z-ai/glm-4.7",
-        "moonshotai/kimi-k2.5",
-        "minimax/minimax-m2.1",
-    ]
+    from hermes_cli.models import model_ids
+    openrouter_models = model_ids()
 
-    selected = _prompt_model_selection(OPENROUTER_MODELS, current_model=current_model)
+    selected = _prompt_model_selection(openrouter_models, current_model=current_model)
     if selected:
         # Clear any custom endpoint and set provider to openrouter
         if get_env_value("OPENAI_BASE_URL"):
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
new file mode 100644
index 0000000000..789c51e867
--- /dev/null
+++ b/hermes_cli/models.py
@@ -0,0 +1,33 @@
+"""
+Canonical list of OpenRouter models offered in CLI and setup wizards.
+
+Add, remove, or reorder entries here — both `hermes setup` and
+`hermes` provider-selection will pick up the change automatically.
+"""
+
+# (model_id, display description shown in menus)
+OPENROUTER_MODELS: list[tuple[str, str]] = [
+    ("anthropic/claude-opus-4.6",       "recommended"),
+    ("anthropic/claude-sonnet-4.5",     ""),
+    ("anthropic/claude-opus-4.5",       ""),
+    ("openai/gpt-5.2",                  ""),
+    ("openai/gpt-5.2-codex",            ""),
+    ("google/gemini-3-pro-preview",     ""),
+    ("google/gemini-3-flash-preview",   ""),
+    ("z-ai/glm-4.7",                    ""),
+    ("moonshotai/kimi-k2.5",            ""),
+    ("minimax/minimax-m2.1",            ""),
+]
+
+
+def model_ids() -> list[str]:
+    """Return just the model-id strings (convenience helper)."""
+    return [mid for mid, _ in OPENROUTER_MODELS]
+
+
+def menu_labels() -> list[str]:
+    """Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
+    labels = []
+    for mid, desc in OPENROUTER_MODELS:
+        labels.append(f"{mid} ({desc})" if desc else mid)
+    return labels
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 1dd670858d..a50975b354 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -611,46 +611,27 @@ def run_setup_wizard(args):
                     save_env_value("LLM_MODEL", custom)
             # else: keep current
         else:
-            # Static list for OpenRouter / fallback
-            model_choices = [
-                "anthropic/claude-opus-4.6 (recommended)",
-                "anthropic/claude-sonnet-4.5",
-                "anthropic/claude-opus-4.5",
-                "openai/gpt-5.2",
-                "openai/gpt-5.2-codex",
-                "google/gemini-3-pro-preview",
-                "google/gemini-3-flash-preview",
-                "z-ai/glm-4.7",
-                "moonshotai/kimi-k2.5",
-                "minimax/minimax-m2.1",
+            # Static list for OpenRouter / fallback (from canonical list)
+            from hermes_cli.models import model_ids, menu_labels
+
+            ids = model_ids()
+            model_choices = menu_labels() + [
                 "Custom model",
-                f"Keep current ({current_model})"
+                f"Keep current ({current_model})",
             ]
 
-            model_idx = prompt_choice("Select default model:", model_choices, 11)
+            keep_idx = len(model_choices) - 1
+            model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
 
-            model_map = {
-                0: "anthropic/claude-opus-4.6",
-                1: "anthropic/claude-sonnet-4.5",
-                2: "anthropic/claude-opus-4.5",
-                3: "openai/gpt-5.2",
-                4: "openai/gpt-5.2-codex",
-                5: "google/gemini-3-pro-preview",
-                6: "google/gemini-3-flash-preview",
-                7: "z-ai/glm-4.7",
-                8: "moonshotai/kimi-k2.5",
-                9: "minimax/minimax-m2.1",
-            }
-
-            if model_idx in model_map:
-                config['model'] = model_map[model_idx]
-                save_env_value("LLM_MODEL", model_map[model_idx])
-            elif model_idx == 10:  # Custom
+            if model_idx < len(ids):
+                config['model'] = ids[model_idx]
+                save_env_value("LLM_MODEL", ids[model_idx])
+            elif model_idx == len(ids):  # Custom
                 custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
                 if custom:
                     config['model'] = custom
                     save_env_value("LLM_MODEL", custom)
-            # else: Keep current (model_idx == 11)
+            # else: Keep current
     
     # =========================================================================
     # Step 4: Terminal Backend
diff --git a/run_agent.py b/run_agent.py
index 882d10b294..3aa1df6865 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -69,840 +69,12 @@ from agent.prompt_builder import build_skills_system_prompt, build_context_files
 from agent.display import (
     KawaiiSpinner, build_tool_preview as _build_tool_preview,
     get_cute_tool_message as _get_cute_tool_message_impl,
-    KAWAII_SEARCH, KAWAII_READ, KAWAII_TERMINAL, KAWAII_BROWSER,
-    KAWAII_CREATE, KAWAII_SKILL, KAWAII_THINK, KAWAII_GENERIC,
 )
 from agent.trajectory import (
     convert_scratchpad_to_think, has_incomplete_scratchpad,
     save_trajectory as _save_trajectory_to_file,
 )
 
-# Model metadata functions (fetch_model_metadata, get_model_context_length,
-# estimate_tokens_rough, estimate_messages_tokens_rough) are now in
-# agent/model_metadata.py -- imported above.
-
-
-class ContextCompressor:
-    """
-    Compresses conversation context when approaching model's context limit.
-    
-    Uses similar logic to trajectory_compressor but operates in real-time:
-    1. Protects first few turns (system, initial user, first assistant response)
-    2. Protects last N turns (recent context is most relevant)
-    3. Summarizes middle turns when threshold is reached
-    
-    Token tracking uses actual counts from API responses (usage.prompt_tokens)
-    rather than estimates for accuracy.
-    """
-    
-    def __init__(
-        self,
-        model: str,
-        threshold_percent: float = 0.85,
-        summary_model: str = "google/gemini-3-flash-preview",
-        protect_first_n: int = 3,
-        protect_last_n: int = 4,
-        summary_target_tokens: int = 500,
-        quiet_mode: bool = False,
-    ):
-        """
-        Initialize the context compressor.
-        
-        Args:
-            model: The main model being used (to determine context limit)
-            threshold_percent: Trigger compression at this % of context (default 85%)
-            summary_model: Model to use for generating summaries (cheap/fast)
-            protect_first_n: Number of initial turns to always keep
-            protect_last_n: Number of recent turns to always keep
-            summary_target_tokens: Target token count for summaries
-            quiet_mode: Suppress compression notifications
-        """
-        self.model = model
-        self.threshold_percent = threshold_percent
-        self.summary_model = summary_model
-        self.protect_first_n = protect_first_n
-        self.protect_last_n = protect_last_n
-        self.summary_target_tokens = summary_target_tokens
-        self.quiet_mode = quiet_mode
-        
-        self.context_length = get_model_context_length(model)
-        self.threshold_tokens = int(self.context_length * threshold_percent)
-        self.compression_count = 0
-        
-        # Track actual token usage from API responses
-        self.last_prompt_tokens = 0
-        self.last_completion_tokens = 0
-        self.last_total_tokens = 0
-        
-        # Initialize OpenRouter client for summarization
-        api_key = os.getenv("OPENROUTER_API_KEY", "")
-        self.client = OpenAI(
-            api_key=api_key,
-            base_url=OPENROUTER_BASE_URL
-        ) if api_key else None
-    
-    def update_from_response(self, usage: Dict[str, Any]):
-        """
-        Update tracked token usage from API response.
-        
-        Args:
-            usage: The usage dict from response (contains prompt_tokens, completion_tokens, total_tokens)
-        """
-        self.last_prompt_tokens = usage.get("prompt_tokens", 0)
-        self.last_completion_tokens = usage.get("completion_tokens", 0)
-        self.last_total_tokens = usage.get("total_tokens", 0)
-    
-    def should_compress(self, prompt_tokens: int = None) -> bool:
-        """
-        Check if context exceeds the compression threshold.
-        
-        Uses actual token count from API response for accuracy.
-        
-        Args:
-            prompt_tokens: Actual prompt tokens from last API response.
-                          If None, uses last tracked value.
-            
-        Returns:
-            True if compression should be triggered
-        """
-        tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
-        return tokens >= self.threshold_tokens
-    
-    def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
-        """
-        Quick pre-flight check using rough estimate (before API call).
-        
-        Use this to avoid making an API call that would fail due to context overflow.
-        For post-response compression decisions, use should_compress() with actual tokens.
-        
-        Args:
-            messages: Current conversation messages
-            
-        Returns:
-            True if compression is likely needed
-        """
-        rough_estimate = estimate_messages_tokens_rough(messages)
-        return rough_estimate >= self.threshold_tokens
-    
-    def get_status(self) -> Dict[str, Any]:
-        """
-        Get current compression status for display/logging.
-        
-        Returns:
-            Dict with token usage and threshold info
-        """
-        return {
-            "last_prompt_tokens": self.last_prompt_tokens,
-            "threshold_tokens": self.threshold_tokens,
-            "context_length": self.context_length,
-            "usage_percent": (self.last_prompt_tokens / self.context_length * 100) if self.context_length else 0,
-            "compression_count": self.compression_count,
-        }
-    
-    def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> str:
-        """
-        Generate a concise summary of conversation turns using a fast model.
-        
-        Args:
-            turns_to_summarize: List of message dicts to summarize
-            
-        Returns:
-            Summary string
-        """
-        if not self.client:
-            # Fallback if no API key
-            return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed to save space. The assistant performed various actions and received responses."
-        
-        # Format turns for summarization
-        parts = []
-        for i, msg in enumerate(turns_to_summarize):
-            role = msg.get("role", "unknown")
-            content = msg.get("content", "")
-            
-            # Truncate very long content
-            if len(content) > 2000:
-                content = content[:1000] + "\n...[truncated]...\n" + content[-500:]
-            
-            # Include tool call info if present
-            tool_calls = msg.get("tool_calls", [])
-            if tool_calls:
-                tool_names = [tc.get("function", {}).get("name", "?") for tc in tool_calls if isinstance(tc, dict)]
-                content += f"\n[Tool calls: {', '.join(tool_names)}]"
-            
-            parts.append(f"[{role.upper()}]: {content}")
-        
-        content_to_summarize = "\n\n".join(parts)
-        
-        prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
-
-Write from a neutral perspective describing:
-1. What actions were taken (tool calls, searches, file operations)
-2. Key information or results obtained
-3. Important decisions or findings
-4. Relevant data, file names, or outputs
-
-Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
-
----
-TURNS TO SUMMARIZE:
-{content_to_summarize}
----
-
-Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
-
-        try:
-            response = self.client.chat.completions.create(
-                model=self.summary_model,
-                messages=[{"role": "user", "content": prompt}],
-                temperature=0.3,
-                max_tokens=self.summary_target_tokens * 2,
-                timeout=30.0,
-            )
-            
-            summary = response.choices[0].message.content.strip()
-            if not summary.startswith("[CONTEXT SUMMARY]:"):
-                summary = "[CONTEXT SUMMARY]: " + summary
-            
-            return summary
-            
-        except Exception as e:
-            logging.warning(f"Failed to generate context summary: {e}")
-            return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed. The assistant performed tool calls and received responses."
-    
-    def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]:
-        """
-        Compress conversation messages by summarizing middle turns.
-        
-        Algorithm:
-        1. Keep first N turns (system prompt, initial context)
-        2. Keep last N turns (recent/relevant context)
-        3. Summarize everything in between
-        4. Insert summary as a user message
-        
-        Args:
-            messages: Current conversation messages
-            current_tokens: Actual token count from API (for logging). If None, uses estimate.
-            
-        Returns:
-            Compressed message list
-        """
-        n_messages = len(messages)
-        
-        # Not enough messages to compress
-        if n_messages <= self.protect_first_n + self.protect_last_n + 1:
-            if not self.quiet_mode:
-                print(f"⚠️  Cannot compress: only {n_messages} messages (need > {self.protect_first_n + self.protect_last_n + 1})")
-            return messages
-        
-        # Determine compression boundaries
-        compress_start = self.protect_first_n
-        compress_end = n_messages - self.protect_last_n
-        
-        # Nothing to compress
-        if compress_start >= compress_end:
-            return messages
-        
-        # Extract turns to summarize
-        turns_to_summarize = messages[compress_start:compress_end]
-        
-        # Use actual token count if provided, otherwise estimate
-        display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
-        
-        if not self.quiet_mode:
-            print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
-            print(f"   📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
-            print(f"   🗜️  Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
-        
-        # Generate summary
-        summary = self._generate_summary(turns_to_summarize)
-        
-        # Build compressed messages
-        compressed = []
-        
-        # Keep protected head turns
-        for i in range(compress_start):
-            msg = messages[i].copy()
-            # Add notice to system message on first compression
-            if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
-                msg["content"] = msg.get("content", "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
-            compressed.append(msg)
-        
-        # Add summary as user message
-        compressed.append({
-            "role": "user",
-            "content": summary
-        })
-        
-        # Keep protected tail turns
-        for i in range(compress_end, n_messages):
-            compressed.append(messages[i].copy())
-        
-        self.compression_count += 1
-        
-        if not self.quiet_mode:
-            # Estimate new size (actual will be known after next API call)
-            new_estimate = estimate_messages_tokens_rough(compressed)
-            saved_estimate = display_tokens - new_estimate
-            print(f"   ✅ Compressed: {n_messages} → {len(compressed)} messages (~{saved_estimate:,} tokens saved)")
-            print(f"   💡 Compression #{self.compression_count} complete")
-        
-        return compressed
-
-
-# =============================================================================
-# Anthropic Prompt Caching (system_and_3 strategy)
-# =============================================================================
-# Reduces input token costs by ~75% on multi-turn conversations by caching
-# the conversation prefix. Uses 4 cache_control breakpoints (Anthropic max):
-#   1. System prompt (stable across all turns)
-#   2-4. Last 3 non-system messages (rolling window)
-#
-# Cached tokens are read at 0.1x input price. Cache writes cost 1.25x (5m TTL)
-# or 2x (1h TTL). Only applied to Claude models via OpenRouter.
-
-def _apply_cache_marker(msg: dict, cache_marker: dict) -> None:
-    """
-    Add cache_control to a single message, handling all format variations.
-
-    - tool messages: cache_control at message level (Anthropic API quirk)
-    - string content: converted to multipart content array
-    - list content: marker added to last item
-    - None content (assistant with tool_calls): message level
-    """
-    role = msg.get("role", "")
-    content = msg.get("content")
-
-    if role == "tool":
-        msg["cache_control"] = cache_marker
-        return
-
-    if content is None:
-        msg["cache_control"] = cache_marker
-        return
-
-    if isinstance(content, str):
-        msg["content"] = [{"type": "text", "text": content, "cache_control": cache_marker}]
-        return
-
-    if isinstance(content, list) and content:
-        last = content[-1]
-        if isinstance(last, dict):
-            last["cache_control"] = cache_marker
-
-
-def apply_anthropic_cache_control(
-    api_messages: List[Dict[str, Any]],
-    cache_ttl: str = "5m",
-) -> List[Dict[str, Any]]:
-    """
-    Apply system_and_3 caching strategy to messages for Anthropic models.
-
-    Places up to 4 cache_control breakpoints:
-      1. System prompt (index 0, stable across all turns)
-      2-4. Last 3 non-system messages (rolling cache frontier)
-
-    Each breakpoint tells Anthropic "cache everything from the start up to here."
-    Multiple breakpoints create a ladder of cached prefixes at different depths,
-    which provides robust cache hits even when the most recent cache entry hasn't
-    propagated yet.
-
-    Args:
-        api_messages: Fully assembled message list (system prompt first).
-        cache_ttl: "5m" (default, 1.25x write cost) or "1h" (2x write cost).
-
-    Returns:
-        Deep copy of messages with cache_control breakpoints injected.
-    """
-    messages = copy.deepcopy(api_messages)
-    if not messages:
-        return messages
-
-    marker = {"type": "ephemeral"}
-    if cache_ttl == "1h":
-        marker["ttl"] = "1h"
-
-    breakpoints_used = 0
-
-    # Breakpoint 1: System prompt (always stable, gives a guaranteed minimum hit)
-    if messages[0].get("role") == "system":
-        _apply_cache_marker(messages[0], marker)
-        breakpoints_used += 1
-
-    # Breakpoints 2-4: Last 3 non-system messages (rolling window)
-    remaining = 4 - breakpoints_used
-    non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
-    for idx in non_sys[-remaining:]:
-        _apply_cache_marker(messages[idx], marker)
-
-    return messages
-
-
-# =============================================================================
-# Default System Prompt Components
-# =============================================================================
-
-# Skills guidance - embeds a compact skill index in the system prompt so
-# the model can match skills at a glance without extra tool calls.
-def build_skills_system_prompt() -> str:
-    """
-    Build a dynamic skills system prompt by scanning both bundled and user skill directories.
-    
-    Returns a prompt section that lists all skill categories (with descriptions
-    from DESCRIPTION.md) and their skill names inline, so the model can
-    immediately see if a relevant skill exists and load it with a single
-    skill_view(name) call -- no discovery tool calls needed.
-    
-    Returns:
-        str: The skills system prompt section, or empty string if no skills found.
-    """
-    import os
-    from pathlib import Path
-    
-    hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
-    skills_dir = hermes_home / "skills"
-    
-    if not skills_dir.exists():
-        return ""
-    
-    # Scan for SKILL.md files grouped by category
-    skills_by_category = {}
-    for skill_file in skills_dir.rglob("SKILL.md"):
-        rel_path = skill_file.relative_to(skills_dir)
-        parts = rel_path.parts
-        if len(parts) >= 2:
-            category = parts[0]
-            skill_name = parts[-2]
-        else:
-            category = "general"
-            skill_name = skill_file.parent.name
-        skills_by_category.setdefault(category, []).append(skill_name)
-    
-    if not skills_by_category:
-        return ""
-    
-    # Load category descriptions from DESCRIPTION.md files
-    category_descriptions = {}
-    for category in skills_by_category:
-        desc_file = skills_dir / category / "DESCRIPTION.md"
-        if desc_file.exists():
-            try:
-                content = desc_file.read_text(encoding="utf-8")
-                match = re.search(r"^---\s*\n.*?description:\s*(.+?)\s*\n.*?^---", content, re.MULTILINE | re.DOTALL)
-                if match:
-                    category_descriptions[category] = match.group(1).strip()
-            except Exception as e:
-                logger.debug("Could not read skill description %s: %s", desc_file, e)
-    
-    index_lines = []
-    for category in sorted(skills_by_category.keys()):
-        desc = category_descriptions.get(category, "")
-        names = ", ".join(sorted(set(skills_by_category[category])))
-        if desc:
-            index_lines.append(f"  {category}: {desc}")
-        else:
-            index_lines.append(f"  {category}:")
-        index_lines.append(f"    skills: {names}")
-    
-    return (
-        "## Skills (mandatory)\n"
-        "Before replying, scan the skills below. If one clearly matches your task, "
-        "load it with skill_view(name) and follow its instructions. "
-        "If a skill has issues, fix it with skill_manage(action='patch').\n"
-        "\n"
-        "<available_skills>\n"
-        + "\n".join(index_lines) + "\n"
-        "</available_skills>\n"
-        "\n"
-        "If none match, proceed normally without loading a skill."
-    )
-
-
-# =============================================================================
-# Context File Injection (SOUL.md, AGENTS.md, .cursorrules)
-# =============================================================================
-
-# Maximum characters per context file before truncation
-CONTEXT_FILE_MAX_CHARS = 20_000
-# Truncation strategy: keep 70% from the head, 20% from the tail
-CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
-CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
-
-
-def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
-    """
-    Truncate content if it exceeds max_chars using a head/tail strategy.
-    
-    Keeps 70% from the start and 20% from the end, with a truncation
-    marker in the middle so the model knows content was cut.
-    """
-    if len(content) <= max_chars:
-        return content
-    
-    head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
-    tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
-    head = content[:head_chars]
-    tail = content[-tail_chars:]
-    
-    marker = f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of {len(content)} chars. Use file tools to read the full file.]\n\n"
-    return head + marker + tail
-
-
-def build_context_files_prompt(cwd: str = None) -> str:
-    """
-    Discover and load context files (SOUL.md, AGENTS.md, .cursorrules)
-    for injection into the system prompt.
-    
-    Discovery rules:
-    - AGENTS.md: Recursively search from cwd (only if top-level exists).
-                 Each file becomes a ## section with its relative path.
-    - .cursorrules: Check cwd for .cursorrules file and .cursor/rules/*.mdc
-    - SOUL.md: Check cwd first, then ~/.hermes/SOUL.md as global fallback
-    
-    Args:
-        cwd: Working directory to search from. Defaults to os.getcwd().
-    
-    Returns:
-        str: The context files prompt section, or empty string if none found.
-    """
-    import os
-    import glob as glob_mod
-    from pathlib import Path
-    
-    if cwd is None:
-        cwd = os.getcwd()
-    
-    cwd_path = Path(cwd).resolve()
-    sections = []
-    
-    # ----- AGENTS.md (hierarchical, recursive) -----
-    top_level_agents = None
-    for name in ["AGENTS.md", "agents.md"]:
-        candidate = cwd_path / name
-        if candidate.exists():
-            top_level_agents = candidate
-            break
-    
-    if top_level_agents:
-        # Recursively find all AGENTS.md files (case-insensitive)
-        agents_files = []
-        for root, dirs, files in os.walk(cwd_path):
-            # Skip hidden directories and common non-project dirs
-            dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('node_modules', '__pycache__', 'venv', '.venv')]
-            for f in files:
-                if f.lower() == "agents.md":
-                    agents_files.append(Path(root) / f)
-        
-        # Sort by path depth (top-level first, then deeper)
-        agents_files.sort(key=lambda p: len(p.parts))
-        
-        total_agents_content = ""
-        for agents_path in agents_files:
-            try:
-                content = agents_path.read_text(encoding="utf-8").strip()
-                if content:
-                    rel_path = agents_path.relative_to(cwd_path)
-                    total_agents_content += f"## {rel_path}\n\n{content}\n\n"
-            except Exception as e:
-                logger.debug("Could not read %s: %s", agents_path, e)
-        
-        if total_agents_content:
-            total_agents_content = _truncate_content(total_agents_content, "AGENTS.md")
-            sections.append(total_agents_content)
-    
-    # ----- .cursorrules -----
-    cursorrules_content = ""
-    
-    # Check for .cursorrules file
-    cursorrules_file = cwd_path / ".cursorrules"
-    if cursorrules_file.exists():
-        try:
-            content = cursorrules_file.read_text(encoding="utf-8").strip()
-            if content:
-                cursorrules_content += f"## .cursorrules\n\n{content}\n\n"
-        except Exception as e:
-            logger.debug("Could not read .cursorrules: %s", e)
-    
-    # Check for .cursor/rules/*.mdc files
-    cursor_rules_dir = cwd_path / ".cursor" / "rules"
-    if cursor_rules_dir.exists() and cursor_rules_dir.is_dir():
-        mdc_files = sorted(cursor_rules_dir.glob("*.mdc"))
-        for mdc_file in mdc_files:
-            try:
-                content = mdc_file.read_text(encoding="utf-8").strip()
-                if content:
-                    cursorrules_content += f"## .cursor/rules/{mdc_file.name}\n\n{content}\n\n"
-            except Exception as e:
-                logger.debug("Could not read %s: %s", mdc_file, e)
-    
-    if cursorrules_content:
-        cursorrules_content = _truncate_content(cursorrules_content, ".cursorrules")
-        sections.append(cursorrules_content)
-    
-    # ----- SOUL.md (cwd first, then ~/.hermes/ fallback) -----
-    soul_content = ""
-    soul_path = None
-    
-    for name in ["SOUL.md", "soul.md"]:
-        candidate = cwd_path / name
-        if candidate.exists():
-            soul_path = candidate
-            break
-    
-    if not soul_path:
-        # Global fallback
-        global_soul = Path.home() / ".hermes" / "SOUL.md"
-        if global_soul.exists():
-            soul_path = global_soul
-    
-    if soul_path:
-        try:
-            content = soul_path.read_text(encoding="utf-8").strip()
-            if content:
-                content = _truncate_content(content, "SOUL.md")
-                soul_content = f"## SOUL.md\n\nIf SOUL.md is present, embody its persona and tone. Avoid stiff, generic replies; follow its guidance unless higher-priority instructions override it.\n\n{content}"
-                sections.append(soul_content)
-        except Exception as e:
-            logger.debug("Could not read SOUL.md from %s: %s", soul_path, e)
-    
-    # ----- Assemble -----
-    if not sections:
-        return ""
-    
-    return "# Project Context\n\nThe following project context files have been loaded and should be followed:\n\n" + "\n".join(sections)
-
-
-def _build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str:
-    """
-    Build a short preview of a tool call's primary argument for display.
-    
-    Returns a truncated string showing the most informative argument,
-    or None if no meaningful preview is available.
-    
-    Args:
-        tool_name: Name of the tool being called
-        args: The tool call arguments dict
-        max_len: Maximum preview length before truncation
-    
-    Returns:
-        str or None: Short preview string, or None
-    """
-    # Map tool names to their primary argument key(s)
-    primary_args = {
-        "terminal": "command",
-        "web_search": "query",
-        "web_extract": "urls",
-        "read_file": "path",
-        "write_file": "path",
-        "patch": "path",
-        "search_files": "pattern",
-        "browser_navigate": "url",
-        "browser_click": "ref",
-        "browser_type": "text",
-        "image_generate": "prompt",
-        "text_to_speech": "text",
-        "vision_analyze": "question",
-        "mixture_of_agents": "user_prompt",
-        "skill_view": "name",
-        "skills_list": "category",
-        "schedule_cronjob": "name",
-    }
-    
-    # Special handling for tools with composite previews
-    if tool_name == "process":
-        action = args.get("action", "")
-        session_id = args.get("session_id", "")
-        data = args.get("data", "")
-        timeout = args.get("timeout")
-        parts = [action]
-        if session_id:
-            parts.append(session_id[:16])
-        if data:
-            parts.append(f'"{data[:20]}"')
-        if timeout and action == "wait":
-            parts.append(f"{timeout}s")
-        return " ".join(parts) if parts else None
-    
-    if tool_name == "todo":
-        todos_arg = args.get("todos")
-        merge = args.get("merge", False)
-        if todos_arg is None:
-            return "reading task list"
-        elif merge:
-            return f"updating {len(todos_arg)} task(s)"
-        else:
-            return f"planning {len(todos_arg)} task(s)"
-    
-    if tool_name == "session_search":
-        query = args.get("query", "")
-        return f"recall: \"{query[:25]}{'...' if len(query) > 25 else ''}\""
-
-    if tool_name == "memory":
-        action = args.get("action", "")
-        target = args.get("target", "")
-        if action == "add":
-            content = args.get("content", "")
-            return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
-        elif action == "replace":
-            return f"~{target}: \"{args.get('old_text', '')[:20]}\""
-        elif action == "remove":
-            return f"-{target}: \"{args.get('old_text', '')[:20]}\""
-        return action
-    
-    if tool_name == "send_message":
-        target = args.get("target", "?")
-        msg = args.get("message", "")
-        if len(msg) > 20:
-            msg = msg[:17] + "..."
-        return f"to {target}: \"{msg}\""
-    
-    if tool_name.startswith("rl_"):
-        rl_previews = {
-            "rl_list_environments": "listing envs",
-            "rl_select_environment": args.get("name", ""),
-            "rl_get_current_config": "reading config",
-            "rl_edit_config": f"{args.get('field', '')}={args.get('value', '')}",
-            "rl_start_training": "starting",
-            "rl_check_status": args.get("run_id", "")[:16],
-            "rl_stop_training": f"stopping {args.get('run_id', '')[:16]}",
-            "rl_get_results": args.get("run_id", "")[:16],
-            "rl_list_runs": "listing runs",
-            "rl_test_inference": f"{args.get('num_steps', 3)} steps",
-        }
-        return rl_previews.get(tool_name)
-
-    key = primary_args.get(tool_name)
-    if not key:
-        # Try common arg names as fallback
-        for fallback_key in ("query", "text", "command", "path", "name", "prompt"):
-            if fallback_key in args:
-                key = fallback_key
-                break
-    
-    if not key or key not in args:
-        return None
-    
-    value = args[key]
-    
-    # Handle list values (e.g., urls)
-    if isinstance(value, list):
-        value = value[0] if value else ""
-    
-    preview = str(value).strip()
-    if not preview:
-        return None
-    
-    # Truncate
-    if len(preview) > max_len:
-        preview = preview[:max_len - 3] + "..."
-    
-    return preview
-
-
-class KawaiiSpinner:
-    """
-    Animated spinner with kawaii faces for CLI feedback during tool execution.
-    Runs in a background thread and can be stopped when the operation completes.
-    
-    Uses stdout with carriage return to animate in place.
-    """
-    
-    # Different spinner animation sets
-    SPINNERS = {
-        'dots': ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'],
-        'bounce': ['⠁', '⠂', '⠄', '⡀', '⢀', '⠠', '⠐', '⠈'],
-        'grow': ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█', '▇', '▆', '▅', '▄', '▃', '▂'],
-        'arrows': ['←', '↖', '↑', '↗', '→', '↘', '↓', '↙'],
-        'star': ['✶', '✷', '✸', '✹', '✺', '✹', '✸', '✷'],
-        'moon': ['🌑', '🌒', '🌓', '🌔', '🌕', '🌖', '🌗', '🌘'],
-        'pulse': ['◜', '◠', '◝', '◞', '◡', '◟'],
-        'brain': ['🧠', '💭', '💡', '✨', '💫', '🌟', '💡', '💭'],
-        'sparkle': ['⁺', '˚', '*', '✧', '✦', '✧', '*', '˚'],
-    }
-    
-    # General waiting faces
-    KAWAII_WAITING = [
-        "(｡◕‿◕｡)", "(◕‿◕✿)", "٩(◕‿◕｡)۶", "(✿◠‿◠)", "( ˘▽˘)っ",
-        "♪(´ε` )", "(◕ᴗ◕✿)", "ヾ(＾∇＾)", "(≧◡≦)", "(★ω★)",
-    ]
-    
-    # Thinking-specific faces and messages
-    KAWAII_THINKING = [
-        "(｡•́︿•̀｡)", "(◔_◔)", "(¬‿¬)", "( •_•)>⌐■-■", "(⌐■_■)",
-        "(´･_･`)", "◉_◉", "(°ロ°)", "( ˘⌣˘)♡", "ヽ(>∀<☆)☆",
-        "٩(๑❛ᴗ❛๑)۶", "(⊙_⊙)", "(¬_¬)", "( ͡° ͜ʖ ͡°)", "ಠ_ಠ",
-    ]
-    
-    THINKING_VERBS = [
-        "pondering", "contemplating", "musing", "cogitating", "ruminating",
-        "deliberating", "mulling", "reflecting", "processing", "reasoning",
-        "analyzing", "computing", "synthesizing", "formulating", "brainstorming",
-    ]
-    
-    def __init__(self, message: str = "", spinner_type: str = 'dots'):
-        self.message = message
-        self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
-        self.running = False
-        self.thread = None
-        self.frame_idx = 0
-        self.start_time = None
-        self.last_line_len = 0
-        
-    def _animate(self):
-        """Animation loop that runs in background thread."""
-        while self.running:
-            # Check for pause signal (e.g., during sudo password prompt)
-            if os.getenv("HERMES_SPINNER_PAUSE"):
-                time.sleep(0.1)
-                continue
-            
-            frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
-            elapsed = time.time() - self.start_time
-            
-            # Build the spinner line
-            line = f"  {frame} {self.message} ({elapsed:.1f}s)"
-            
-            # Clear previous line and write new one
-            clear = '\r' + ' ' * self.last_line_len + '\r'
-            print(clear + line, end='', flush=True)
-            self.last_line_len = len(line)
-            
-            self.frame_idx += 1
-            time.sleep(0.12)  # ~8 FPS animation
-    
-    def start(self):
-        """Start the spinner animation."""
-        if self.running:
-            return
-        self.running = True
-        self.start_time = time.time()
-        self.thread = threading.Thread(target=self._animate, daemon=True)
-        self.thread.start()
-    
-    def update_text(self, new_message: str):
-        """Update the spinner message text while it's running."""
-        self.message = new_message
-
-    def stop(self, final_message: str = None):
-        """Stop the spinner and optionally print a final message."""
-        self.running = False
-        if self.thread:
-            self.thread.join(timeout=0.5)
-        
-        # Clear the spinner line
-        print('\r' + ' ' * (self.last_line_len + 5) + '\r', end='', flush=True)
-        
-        # Print final message if provided
-        if final_message:
-            print(f"  {final_message}", flush=True)
-    
-    def __enter__(self):
-        self.start()
-        return self
-    
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.stop()
-        return False
-
 
 class AIAgent:
     """
@@ -1238,256 +410,6 @@ class AIAgent:
             else:
                 print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
     
-    # Pools of kawaii faces for random selection
-    KAWAII_SEARCH = [
-        "♪(´ε` )", "(｡◕‿◕｡)", "ヾ(＾∇＾)", "(◕ᴗ◕✿)", "( ˘▽˘)っ",
-        "٩(◕‿◕｡)۶", "(✿◠‿◠)", "♪～(´ε｀ )", "(ノ´ヮ`)ノ*:・゚✧", "＼(◎o◎)／",
-    ]
-    KAWAII_READ = [
-        "φ(゜▽゜*)♪", "( ˘▽˘)っ", "(⌐■_■)", "٩(｡•́‿•̀｡)۶", "(◕‿◕✿)",
-        "ヾ(＠⌒ー⌒＠)ノ", "(✧ω✧)", "♪(๑ᴖ◡ᴖ๑)♪", "(≧◡≦)", "( ´ ▽ ` )ノ",
-    ]
-    KAWAII_TERMINAL = [
-        "ヽ(>∀<☆)ノ", "(ノ°∀°)ノ", "٩(^ᴗ^)۶", "ヾ(⌐■_■)ノ♪", "(•̀ᴗ•́)و",
-        "┗(＾0＾)┓", "(｀・ω・´)", "＼(￣▽￣)／", "(ง •̀_•́)ง", "ヽ(´▽`)/",
-    ]
-    KAWAII_BROWSER = [
-        "(ノ°∀°)ノ", "(☞゚ヮ゚)☞", "( ͡° ͜ʖ ͡°)", "┌( ಠ_ಠ)┘", "(⊙_⊙)？",
-        "ヾ(•ω•`)o", "(￣ω￣)", "( ˇωˇ )", "(ᵔᴥᵔ)", "＼(◎o◎)／",
-    ]
-    KAWAII_CREATE = [
-        "✧*。٩(ˊᗜˋ*)و✧", "(ﾉ◕ヮ◕)ﾉ*:・ﾟ✧", "ヽ(>∀<☆)ノ", "٩(♡ε♡)۶", "(◕‿◕)♡",
-        "✿◕ ‿ ◕✿", "(*≧▽≦)", "ヾ(＾-＾)ノ", "(☆▽☆)", "°˖✧◝(⁰▿⁰)◜✧˖°",
-    ]
-    KAWAII_SKILL = [
-        "ヾ(＠⌒ー⌒＠)ノ", "(๑˃ᴗ˂)ﻭ", "٩(◕‿◕｡)۶", "(✿╹◡╹)", "ヽ(・∀・)ノ",
-        "(ノ´ヮ`)ノ*:・ﾟ✧", "♪(๑ᴖ◡ᴖ๑)♪", "(◠‿◠)", "٩(ˊᗜˋ*)و", "(＾▽＾)",
-        "ヾ(＾∇＾)", "(★ω★)/", "٩(｡•́‿•̀｡)۶", "(◕ᴗ◕✿)", "＼(◎o◎)／",
-        "(✧ω✧)", "ヽ(>∀<☆)ノ", "( ˘▽˘)っ", "(≧◡≦) ♡", "ヾ(￣▽￣)",
-    ]
-    KAWAII_THINK = [
-        "(っ°Д°;)っ", "(；′⌒`)", "(・_・ヾ", "( ´_ゝ`)", "(￣ヘ￣)",
-        "(。-`ω´-)", "( ˘︹˘ )", "(¬_¬)", "ヽ(ー_ー )ノ", "(；一_一)",
-    ]
-    KAWAII_GENERIC = [
-        "♪(´ε` )", "(◕‿◕✿)", "ヾ(＾∇＾)", "٩(◕‿◕｡)۶", "(✿◠‿◠)",
-        "(ノ´ヮ`)ノ*:・ﾟ✧", "ヽ(>∀<☆)ノ", "(☆▽☆)", "( ˘▽˘)っ", "(≧◡≦)",
-    ]
-    
-    def _get_cute_tool_message(self, tool_name: str, args: dict, duration: float) -> str:
-        """
-        Generate a clean, aligned tool activity line for CLI quiet mode.
-
-        Format: ┊ {emoji} {verb:9} {detail}  {duration}
-
-        Kawaii faces live in the animated spinner (while the tool runs).
-        This completion message replaces the spinner with a permanent log line.
-        """
-        dur = f"{duration:.1f}s"
-
-        def _trunc(s, n=40):
-            s = str(s)
-            return (s[:n-3] + "...") if len(s) > n else s
-
-        def _path(p, n=35):
-            p = str(p)
-            return ("..." + p[-(n-3):]) if len(p) > n else p
-
-        # ── Web ──
-        if tool_name == "web_search":
-            q = _trunc(args.get("query", ""), 42)
-            return f"┊ 🔍 search    {q}  {dur}"
-
-        if tool_name == "web_extract":
-            urls = args.get("urls", [])
-            if urls:
-                url = urls[0] if isinstance(urls, list) else str(urls)
-                domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-                extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
-                return f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}"
-            return f"┊ 📄 fetch     pages  {dur}"
-
-        if tool_name == "web_crawl":
-            url = args.get("url", "")
-            domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-            return f"┊ 🕸️  crawl     {_trunc(domain, 35)}  {dur}"
-
-        # ── Terminal & Process ──
-        if tool_name == "terminal":
-            cmd = _trunc(args.get("command", ""), 42)
-            return f"┊ 💻 $         {cmd}  {dur}"
-
-        if tool_name == "process":
-            action = args.get("action", "?")
-            sid = args.get("session_id", "")[:12]
-            labels = {
-                "list": "ls processes", "poll": f"poll {sid}",
-                "log": f"log {sid}", "wait": f"wait {sid}",
-                "kill": f"kill {sid}", "write": f"write {sid}",
-                "submit": f"submit {sid}",
-            }
-            detail = labels.get(action, f"{action} {sid}")
-            return f"┊ ⚙️  proc      {detail}  {dur}"
-
-        # ── Files ──
-        if tool_name == "read_file":
-            return f"┊ 📖 read      {_path(args.get('path', ''))}  {dur}"
-
-        if tool_name == "write_file":
-            return f"┊ ✍️  write     {_path(args.get('path', ''))}  {dur}"
-
-        if tool_name == "patch":
-            return f"┊ 🔧 patch     {_path(args.get('path', ''))}  {dur}"
-
-        if tool_name == "search_files":
-            pattern = _trunc(args.get("pattern", ""), 35)
-            target = args.get("target", "content")
-            verb = "find" if target == "files" else "grep"
-            return f"┊ 🔎 {verb:9} {pattern}  {dur}"
-
-        # ── Browser ──
-        if tool_name == "browser_navigate":
-            url = args.get("url", "")
-            domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-            return f"┊ 🌐 navigate  {_trunc(domain, 35)}  {dur}"
-
-        if tool_name == "browser_snapshot":
-            mode = "full" if args.get("full") else "compact"
-            return f"┊ 📸 snapshot  {mode}  {dur}"
-
-        if tool_name == "browser_click":
-            return f"┊ 👆 click     {args.get('ref', '?')}  {dur}"
-
-        if tool_name == "browser_type":
-            text = _trunc(args.get("text", ""), 30)
-            return f"┊ ⌨️  type      \"{text}\"  {dur}"
-
-        if tool_name == "browser_scroll":
-            d = args.get("direction", "down")
-            arrow = {"down": "↓", "up": "↑", "right": "→", "left": "←"}.get(d, "↓")
-            return f"┊ {arrow}  scroll    {d}  {dur}"
-
-        if tool_name == "browser_back":
-            return f"┊ ◀️  back      {dur}"
-
-        if tool_name == "browser_press":
-            return f"┊ ⌨️  press     {args.get('key', '?')}  {dur}"
-
-        if tool_name == "browser_close":
-            return f"┊ 🚪 close     browser  {dur}"
-
-        if tool_name == "browser_get_images":
-            return f"┊ 🖼️  images    extracting  {dur}"
-
-        if tool_name == "browser_vision":
-            return f"┊ 👁️  vision    analyzing page  {dur}"
-
-        # ── Planning ──
-        if tool_name == "todo":
-            todos_arg = args.get("todos")
-            merge = args.get("merge", False)
-            if todos_arg is None:
-                return f"┊ 📋 plan      reading tasks  {dur}"
-            elif merge:
-                return f"┊ 📋 plan      update {len(todos_arg)} task(s)  {dur}"
-            else:
-                return f"┊ 📋 plan      {len(todos_arg)} task(s)  {dur}"
-
-        # ── Session Search ──
-        if tool_name == "session_search":
-            query = _trunc(args.get("query", ""), 35)
-            return f"┊ 🔍 recall    \"{query}\"  {dur}"
-
-        # ── Memory ──
-        if tool_name == "memory":
-            action = args.get("action", "?")
-            target = args.get("target", "")
-            if action == "add":
-                preview = _trunc(args.get("content", ""), 30)
-                return f"┊ 🧠 memory    +{target}: \"{preview}\"  {dur}"
-            elif action == "replace":
-                snippet = _trunc(args.get("old_text", ""), 20)
-                return f"┊ 🧠 memory    ~{target}: \"{snippet}\"  {dur}"
-            elif action == "remove":
-                snippet = _trunc(args.get("old_text", ""), 20)
-                return f"┊ 🧠 memory    -{target}: \"{snippet}\"  {dur}"
-            elif action == "search_sessions":
-                query = _trunc(args.get("content", ""), 30)
-                return f"┊ 🧠 recall    \"{query}\"  {dur}"
-            else:
-                return f"┊ 🧠 memory    {action}  {dur}"
-
-        # ── Skills ──
-        if tool_name == "skills_list":
-            return f"┊ 📚 skills    list {args.get('category', 'all')}  {dur}"
-
-        if tool_name == "skill_view":
-            return f"┊ 📚 skill     {_trunc(args.get('name', ''), 30)}  {dur}"
-
-        # ── Generation & Media ──
-        if tool_name == "image_generate":
-            return f"┊ 🎨 create    {_trunc(args.get('prompt', ''), 35)}  {dur}"
-
-        if tool_name == "text_to_speech":
-            return f"┊ 🔊 speak     {_trunc(args.get('text', ''), 30)}  {dur}"
-
-        if tool_name == "vision_analyze":
-            return f"┊ 👁️  vision    {_trunc(args.get('question', ''), 30)}  {dur}"
-
-        if tool_name == "mixture_of_agents":
-            return f"┊ 🧠 reason    {_trunc(args.get('user_prompt', ''), 30)}  {dur}"
-
-        # ── Messaging & Scheduling ──
-        if tool_name == "send_message":
-            target = args.get("target", "?")
-            msg = _trunc(args.get("message", ""), 25)
-            return f"┊ 📨 send      {target}: \"{msg}\"  {dur}"
-
-        if tool_name == "schedule_cronjob":
-            name = _trunc(args.get("name", args.get("prompt", "task")), 30)
-            return f"┊ ⏰ schedule  {name}  {dur}"
-
-        if tool_name == "list_cronjobs":
-            return f"┊ ⏰ jobs      listing  {dur}"
-
-        if tool_name == "remove_cronjob":
-            return f"┊ ⏰ remove    job {args.get('job_id', '?')}  {dur}"
-
-        # ── RL Training ──
-        if tool_name.startswith("rl_"):
-            rl = {
-                "rl_list_environments": "list envs",
-                "rl_select_environment": f"select {args.get('name', '')}",
-                "rl_get_current_config": "get config",
-                "rl_edit_config": f"set {args.get('field', '?')}",
-                "rl_start_training": "start training",
-                "rl_check_status": f"status {args.get('run_id', '?')[:12]}",
-                "rl_stop_training": f"stop {args.get('run_id', '?')[:12]}",
-                "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
-                "rl_list_runs": "list runs",
-                "rl_test_inference": "test inference",
-            }
-            detail = rl.get(tool_name, tool_name.replace("rl_", ""))
-            return f"┊ 🧪 rl        {detail}  {dur}"
-
-        # ── Code Execution Sandbox ──
-        if tool_name == "execute_code":
-            code = args.get("code", "")
-            first_line = code.strip().split("\n")[0] if code.strip() else ""
-            return f"┊ 🐍 exec      {_trunc(first_line, 35)}  {dur}"
-
-        # ── Subagent Delegation ──
-        if tool_name == "delegate_task":
-            tasks = args.get("tasks")
-            if tasks and isinstance(tasks, list):
-                return f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}"
-            goal = _trunc(args.get("goal", ""), 35)
-            return f"┊ 🔀 delegate  {goal}  {dur}"
-
-        # ── Fallback ──
-        preview = _build_tool_preview(tool_name, args) or ""
-        return f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)}  {dur}"
-    
     def _has_content_after_think_block(self, content: str) -> bool:
         """
         Check if content has actual text after any <think></think> blocks.
@@ -2330,7 +1252,7 @@ class AIAgent:
                 )
                 tool_duration = time.time() - tool_start_time
                 if self.quiet_mode:
-                    print(f"  {self._get_cute_tool_message('todo', function_args, tool_duration)}")
+                    print(f"  {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
             elif function_name == "session_search" and self._session_db:
                 from tools.session_search_tool import session_search as _session_search
                 function_result = _session_search(
@@ -2341,7 +1263,7 @@ class AIAgent:
                 )
                 tool_duration = time.time() - tool_start_time
                 if self.quiet_mode:
-                    print(f"  {self._get_cute_tool_message('session_search', function_args, tool_duration)}")
+                    print(f"  {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
             elif function_name == "memory":
                 from tools.memory_tool import memory_tool as _memory_tool
                 function_result = _memory_tool(
@@ -2353,7 +1275,7 @@ class AIAgent:
                 )
                 tool_duration = time.time() - tool_start_time
                 if self.quiet_mode:
-                    print(f"  {self._get_cute_tool_message('memory', function_args, tool_duration)}")
+                    print(f"  {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
             elif function_name == "clarify":
                 from tools.clarify_tool import clarify_tool as _clarify_tool
                 function_result = _clarify_tool(
@@ -2363,7 +1285,7 @@ class AIAgent:
                 )
                 tool_duration = time.time() - tool_start_time
                 if self.quiet_mode:
-                    print(f"  {self._get_cute_tool_message('clarify', function_args, tool_duration)}")
+                    print(f"  {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
             elif function_name == "delegate_task":
                 from tools.delegate_tool import delegate_task as _delegate_task
                 tasks_arg = function_args.get("tasks")
@@ -2378,6 +1300,7 @@ class AIAgent:
                     spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots')
                     spinner.start()
                 self._delegate_spinner = spinner
+                _delegate_result = None
                 try:
                     function_result = _delegate_task(
                         goal=function_args.get("goal"),
@@ -2388,10 +1311,11 @@ class AIAgent:
                         max_iterations=function_args.get("max_iterations"),
                         parent_agent=self,
                     )
+                    _delegate_result = function_result
                 finally:
                     self._delegate_spinner = None
                     tool_duration = time.time() - tool_start_time
-                    cute_msg = self._get_cute_tool_message('delegate_task', function_args, tool_duration)
+                    cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
                     if spinner:
                         spinner.stop(cute_msg)
                     elif self.quiet_mode:
@@ -2420,11 +1344,13 @@ class AIAgent:
                     preview = preview[:27] + "..."
                 spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots')
                 spinner.start()
+                _spinner_result = None
                 try:
                     function_result = handle_function_call(function_name, function_args, effective_task_id)
+                    _spinner_result = function_result
                 finally:
                     tool_duration = time.time() - tool_start_time
-                    cute_msg = self._get_cute_tool_message(function_name, function_args, tool_duration)
+                    cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
                     spinner.stop(cute_msg)
             else:
                 function_result = handle_function_call(function_name, function_args, effective_task_id)
@@ -3187,11 +2113,40 @@ class AIAgent:
                 if self.verbose_logging:
                     logging.exception("Detailed error information:")
                 
-                # Add error to conversation and try to continue
-                messages.append({
-                    "role": "assistant",
-                    "content": f"I encountered an error: {error_msg}. Let me try a different approach."
-                })
+                # If an assistant message with tool_calls was already appended,
+                # the API expects a role="tool" result for every tool_call_id.
+                # Fill in error results for any that weren't answered yet.
+                pending_handled = False
+                for idx in range(len(messages) - 1, -1, -1):
+                    msg = messages[idx]
+                    if not isinstance(msg, dict):
+                        break
+                    if msg.get("role") == "tool":
+                        continue
+                    if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                        answered_ids = {
+                            m["tool_call_id"]
+                            for m in messages[idx + 1:]
+                            if isinstance(m, dict) and m.get("role") == "tool"
+                        }
+                        for tc in msg["tool_calls"]:
+                            if tc["id"] not in answered_ids:
+                                messages.append({
+                                    "role": "tool",
+                                    "tool_call_id": tc["id"],
+                                    "content": f"Error executing tool: {error_msg}",
+                                })
+                        pending_handled = True
+                    break
+                
+                if not pending_handled:
+                    # Error happened before tool processing (e.g. response parsing).
+                    # Use a user-role message so the model can see what went wrong
+                    # without confusing the API with a fabricated assistant turn.
+                    messages.append({
+                        "role": "user",
+                        "content": f"[System error during processing: {error_msg}]",
+                    })
                 
                 # If we're near the limit, break to avoid infinite loops
                 if api_call_count >= self.max_iterations - 1:
diff --git a/tools/approval.py b/tools/approval.py
index 2db8424cb7..18f9b67431 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -281,7 +281,12 @@ def check_dangerous_command(command: str, env_type: str,
                                        approval_callback=approval_callback)
 
     if choice == "deny":
-        return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."}
+        return {
+            "approved": False,
+            "message": f"BLOCKED: User denied this potentially dangerous command (matched '{description}' pattern). Do NOT retry this command - the user has explicitly rejected it.",
+            "pattern_key": pattern_key,
+            "description": description,
+        }
 
     if choice == "session":
         approve_session(session_key, pattern_key)
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 4467b890d3..b76b886bc8 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -51,25 +51,16 @@ import signal
 import subprocess
 import shutil
 import sys
-import asyncio
 import tempfile
 import threading
 import time
 import requests
 from typing import Dict, Any, Optional, List
 from pathlib import Path
-from hermes_constants import OPENROUTER_CHAT_URL
+from agent.auxiliary_client import get_vision_auxiliary_client
 
 logger = logging.getLogger(__name__)
 
-# Try to import httpx for async LLM calls
-try:
-    import httpx
-    HTTPX_AVAILABLE = True
-except ImportError:
-    HTTPX_AVAILABLE = False
-
-
 # ============================================================================
 # Configuration
 # ============================================================================
@@ -83,8 +74,8 @@ DEFAULT_SESSION_TIMEOUT = 300
 # Max tokens for snapshot content before summarization
 SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
 
-# Model for task-aware extraction
-EXTRACTION_MODEL = "google/gemini-3-flash-preview"
+# Resolve vision auxiliary client for extraction/vision tasks
+_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client()
 
 # Track active sessions per task
 # Now stores tuple of (session_name, browserbase_session_id, cdp_url)
@@ -782,87 +773,49 @@ def _run_browser_command(
         return {"success": False, "error": str(e)}
 
 
-async def _extract_relevant_content(
+def _extract_relevant_content(
     snapshot_text: str,
     user_task: Optional[str] = None
 ) -> str:
+    """Use LLM to extract relevant content from a snapshot based on the user's task.
+
+    Falls back to simple truncation when no auxiliary vision model is configured.
     """
-    Use LLM to extract relevant content from a snapshot based on the user's task.
-    
-    This provides task-aware summarization that preserves meaningful text content
-    (paragraphs, prices, descriptions) relevant to what the user is trying to accomplish.
-    
-    Args:
-        snapshot_text: The full snapshot text
-        user_task: The user's current task/goal (optional)
-        
-    Returns:
-        Summarized/extracted content
-    """
-    if not HTTPX_AVAILABLE:
-        # Fall back to simple truncation
+    if _aux_vision_client is None or EXTRACTION_MODEL is None:
         return _truncate_snapshot(snapshot_text)
-    
-    # Get API key
-    api_key = os.environ.get("OPENROUTER_API_KEY")
-    if not api_key:
-        return _truncate_snapshot(snapshot_text)
-    
-    # Build extraction prompt
+
     if user_task:
-        extraction_prompt = f"""You are a content extractor for a browser automation agent.
-
-The user's task is: {user_task}
-
-Given the following page snapshot (accessibility tree representation), extract and summarize the most relevant information for completing this task. Focus on:
-1. Interactive elements (buttons, links, inputs) that might be needed
-2. Text content relevant to the task (prices, descriptions, headings, important info)
-3. Navigation structure if relevant
-
-Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.
-
-Page Snapshot:
-{snapshot_text}
-
-Provide a concise summary that preserves actionable information and relevant content."""
+        extraction_prompt = (
+            f"You are a content extractor for a browser automation agent.\n\n"
+            f"The user's task is: {user_task}\n\n"
+            f"Given the following page snapshot (accessibility tree representation), "
+            f"extract and summarize the most relevant information for completing this task. Focus on:\n"
+            f"1. Interactive elements (buttons, links, inputs) that might be needed\n"
+            f"2. Text content relevant to the task (prices, descriptions, headings, important info)\n"
+            f"3. Navigation structure if relevant\n\n"
+            f"Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.\n\n"
+            f"Page Snapshot:\n{snapshot_text}\n\n"
+            f"Provide a concise summary that preserves actionable information and relevant content."
+        )
     else:
-        extraction_prompt = f"""Summarize this page snapshot, preserving:
-1. All interactive elements with their ref IDs (like [ref=e5])
-2. Key text content and headings
-3. Important information visible on the page
-
-Page Snapshot:
-{snapshot_text}
-
-Provide a concise summary focused on interactive elements and key content."""
+        extraction_prompt = (
+            f"Summarize this page snapshot, preserving:\n"
+            f"1. All interactive elements with their ref IDs (like [ref=e5])\n"
+            f"2. Key text content and headings\n"
+            f"3. Important information visible on the page\n\n"
+            f"Page Snapshot:\n{snapshot_text}\n\n"
+            f"Provide a concise summary focused on interactive elements and key content."
+        )
 
     try:
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            response = await client.post(
-                OPENROUTER_CHAT_URL,
-                headers={
-                    "Authorization": f"Bearer {api_key}",
-                    "Content-Type": "application/json"
-                },
-                json={
-                    "model": EXTRACTION_MODEL,
-                    "messages": [
-                        {"role": "user", "content": extraction_prompt}
-                    ],
-                    "max_tokens": 4000,
-                    "temperature": 0.1
-                }
-            )
-            
-            if response.status_code == 200:
-                result = response.json()
-                return result["choices"][0]["message"]["content"]
-            else:
-                # Fall back to truncation on API error
-                return _truncate_snapshot(snapshot_text)
-                
+        response = _aux_vision_client.chat.completions.create(
+            model=EXTRACTION_MODEL,
+            messages=[{"role": "user", "content": extraction_prompt}],
+            max_tokens=4000,
+            temperature=0.1,
+        )
+        return response.choices[0].message.content
     except Exception:
-        # Fall back to truncation on any error
         return _truncate_snapshot(snapshot_text)
 
 
@@ -991,16 +944,7 @@ def browser_snapshot(
         
         # Check if snapshot needs summarization
         if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD and user_task:
-            # Run async extraction
-            try:
-                loop = asyncio.get_event_loop()
-            except RuntimeError:
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-            
-            snapshot_text = loop.run_until_complete(
-                _extract_relevant_content(snapshot_text, user_task)
-            )
+            snapshot_text = _extract_relevant_content(snapshot_text, user_task)
         elif len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
             snapshot_text = _truncate_snapshot(snapshot_text)
         
@@ -1286,12 +1230,12 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
     
     effective_task_id = task_id or "default"
     
-    # Check for OpenRouter API key
-    api_key = os.environ.get("OPENROUTER_API_KEY")
-    if not api_key:
+    # Check auxiliary vision client
+    if _aux_vision_client is None or EXTRACTION_MODEL is None:
         return json.dumps({
             "success": False,
-            "error": "OPENROUTER_API_KEY not set. Vision analysis requires this API key."
+            "error": "Browser vision unavailable: no auxiliary vision model configured. "
+                     "Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
         }, ensure_ascii=False)
     
     # Create a temporary file for the screenshot
@@ -1325,110 +1269,36 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
         image_base64 = base64.b64encode(image_data).decode("ascii")
         data_url = f"data:image/png;base64,{image_base64}"
         
-        # Prepare the vision prompt
-        vision_prompt = f"""You are analyzing a screenshot of a web browser.
+        vision_prompt = (
+            f"You are analyzing a screenshot of a web browser.\n\n"
+            f"User's question: {question}\n\n"
+            f"Provide a detailed and helpful answer based on what you see in the screenshot. "
+            f"If there are interactive elements, describe them. If there are verification challenges "
+            f"or CAPTCHAs, describe what type they are and what action might be needed. "
+            f"Focus on answering the user's specific question."
+        )
 
-User's question: {question}
-
-Provide a detailed and helpful answer based on what you see in the screenshot. 
-If there are interactive elements, describe them. If there are verification challenges 
-or CAPTCHAs, describe what type they are and what action might be needed.
-Focus on answering the user's specific question."""
-
-        # Call OpenRouter/Gemini for vision analysis
-        if HTTPX_AVAILABLE:
-            import asyncio
-            
-            async def analyze_screenshot():
-                async with httpx.AsyncClient(timeout=60.0) as client:
-                    response = await client.post(
-                        OPENROUTER_CHAT_URL,
-                        headers={
-                            "Authorization": f"Bearer {api_key}",
-                            "Content-Type": "application/json"
-                        },
-                        json={
-                            "model": "google/gemini-3-flash-preview",
-                            "messages": [
-                                {
-                                    "role": "user",
-                                    "content": [
-                                        {"type": "text", "text": vision_prompt},
-                                        {
-                                            "type": "image_url",
-                                            "image_url": {"url": data_url}
-                                        }
-                                    ]
-                                }
-                            ],
-                            "max_tokens": 2000,
-                            "temperature": 0.1
-                        }
-                    )
-                    
-                    if response.status_code != 200:
-                        return {
-                            "success": False,
-                            "error": f"Vision API error: {response.status_code} - {response.text[:200]}"
-                        }
-                    
-                    result_data = response.json()
-                    analysis = result_data["choices"][0]["message"]["content"]
-                    return {
-                        "success": True,
-                        "analysis": analysis
-                    }
-            
-            # Run the async function
-            try:
-                loop = asyncio.get_event_loop()
-            except RuntimeError:
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-            
-            vision_result = loop.run_until_complete(analyze_screenshot())
-            return json.dumps(vision_result, ensure_ascii=False)
-        
-        else:
-            # Fallback: use synchronous requests
-            response = requests.post(
-                OPENROUTER_CHAT_URL,
-                headers={
-                    "Authorization": f"Bearer {api_key}",
-                    "Content-Type": "application/json"
-                },
-                json={
-                    "model": "google/gemini-3-flash-preview",
-                    "messages": [
-                        {
-                            "role": "user",
-                            "content": [
-                                {"type": "text", "text": vision_prompt},
-                                {
-                                    "type": "image_url",
-                                    "image_url": {"url": data_url}
-                                }
-                            ]
-                        }
+        # Use the sync auxiliary vision client directly
+        response = _aux_vision_client.chat.completions.create(
+            model=EXTRACTION_MODEL,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": vision_prompt},
+                        {"type": "image_url", "image_url": {"url": data_url}},
                     ],
-                    "max_tokens": 2000,
-                    "temperature": 0.1
-                },
-                timeout=60
-            )
-            
-            if response.status_code != 200:
-                return json.dumps({
-                    "success": False,
-                    "error": f"Vision API error: {response.status_code} - {response.text[:200]}"
-                }, ensure_ascii=False)
-            
-            result_data = response.json()
-            analysis = result_data["choices"][0]["message"]["content"]
-            return json.dumps({
-                "success": True,
-                "analysis": analysis
-            }, ensure_ascii=False)
+                }
+            ],
+            max_tokens=2000,
+            temperature=0.1,
+        )
+        
+        analysis = response.choices[0].message.content
+        return json.dumps({
+            "success": True,
+            "analysis": analysis,
+        }, ensure_ascii=False)
     
     except Exception as e:
         return json.dumps({
diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py
index 07c39989e8..16508e9762 100644
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -22,9 +22,19 @@ import os
 import logging
 from typing import Dict, Any, List, Optional
 
-from tools.openrouter_client import get_async_client as _get_client
+from openai import AsyncOpenAI, OpenAI
 
-SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
+from agent.auxiliary_client import get_text_auxiliary_client
+
+# Resolve the auxiliary client at import time so we have the model slug.
+# We build an AsyncOpenAI from the same credentials for async summarization.
+_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
+_async_aux_client: AsyncOpenAI | None = None
+if _aux_client is not None:
+    _async_aux_client = AsyncOpenAI(
+        api_key=_aux_client.api_key,
+        base_url=str(_aux_client.base_url),
+    )
 MAX_SESSION_CHARS = 100_000
 MAX_SUMMARY_TOKENS = 2000
 
@@ -126,11 +136,15 @@ async def _summarize_session(
         f"Summarize this conversation with focus on: {query}"
     )
 
+    if _async_aux_client is None or _SUMMARIZER_MODEL is None:
+        logging.warning("No auxiliary model available for session summarization")
+        return None
+
     max_retries = 3
     for attempt in range(max_retries):
         try:
-            response = await _get_client().chat.completions.create(
-                model=SUMMARIZER_MODEL,
+            response = await _async_aux_client.chat.completions.create(
+                model=_SUMMARIZER_MODEL,
                 messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": user_prompt},
@@ -252,8 +266,8 @@ def session_search(
 
 
 def check_session_search_requirements() -> bool:
-    """Requires SQLite state database and OpenRouter API key."""
-    if not os.getenv("OPENROUTER_API_KEY"):
+    """Requires SQLite state database and an auxiliary text model."""
+    if _async_aux_client is None:
         return False
     try:
         from hermes_state import DEFAULT_DB_PATH
@@ -316,5 +330,4 @@ registry.register(
         limit=args.get("limit", 3),
         db=kw.get("db")),
     check_fn=check_session_search_requirements,
-    requires_env=["OPENROUTER_API_KEY"],
 )
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 6b95d185db..09d1ff31dc 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -359,7 +359,6 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p
 
 # Global state for environment lifecycle management
 _active_environments: Dict[str, Any] = {}
-_task_workdirs: Dict[str, str] = {}  # Maps task_id to working directory
 _last_activity: Dict[str, float] = {}
 _env_lock = threading.Lock()
 _creation_locks: Dict[str, threading.Lock] = {}  # Per-task locks for sandbox creation
@@ -530,7 +529,6 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
             if current_time - last_time > lifetime_seconds:
                 env = _active_environments.pop(task_id, None)
                 _last_activity.pop(task_id, None)
-                _task_workdirs.pop(task_id, None)
                 if env is not None:
                     envs_to_stop.append((task_id, env))
 
@@ -609,7 +607,7 @@ def get_active_environments_info() -> Dict[str, Any]:
     info = {
         "count": len(_active_environments),
         "task_ids": list(_active_environments.keys()),
-        "workdirs": dict(_task_workdirs),
+        "workdirs": {},
     }
     
     # Calculate total disk usage
@@ -632,7 +630,7 @@ def get_active_environments_info() -> Dict[str, Any]:
 
 def cleanup_all_environments():
     """Clean up ALL active environments. Use with caution."""
-    global _active_environments, _last_activity, _task_workdirs
+    global _active_environments, _last_activity
     
     task_ids = list(_active_environments.keys())
     cleaned = 0
@@ -661,7 +659,7 @@ def cleanup_all_environments():
 
 def cleanup_vm(task_id: str):
     """Manually clean up a specific environment by task_id."""
-    global _active_environments, _last_activity, _task_workdirs
+    global _active_environments, _last_activity
 
     # Remove from tracking dicts while holding the lock, but defer the
     # actual (potentially slow) env.cleanup() call to outside the lock
@@ -669,7 +667,6 @@ def cleanup_vm(task_id: str):
     env = None
     with _env_lock:
         env = _active_environments.pop(task_id, None)
-        _task_workdirs.pop(task_id, None)
         _last_activity.pop(task_id, None)
 
     # Clean up per-task creation lock
@@ -782,17 +779,6 @@ def terminal_tool(
         default_timeout = config["timeout"]
         effective_timeout = timeout or default_timeout
 
-        # For local environment in batch mode, create a unique subdirectory per task
-        # This prevents parallel tasks from overwriting each other's files
-        # In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories
-        if env_type == "local" and not os.getenv("HERMES_QUIET"):
-            with _env_lock:
-                if effective_task_id not in _task_workdirs:
-                    task_workdir = Path(cwd) / f"hermes-{effective_task_id}-{uuid.uuid4().hex[:8]}"
-                    task_workdir.mkdir(parents=True, exist_ok=True)
-                    _task_workdirs[effective_task_id] = str(task_workdir)
-                cwd = _task_workdirs[effective_task_id]
-
         # Start cleanup thread
         _start_cleanup_thread()
 
@@ -874,11 +860,16 @@ def terminal_tool(
                         "description": approval.get("description", "dangerous command"),
                         "pattern_key": approval.get("pattern_key", ""),
                     }, ensure_ascii=False)
-                # Command was blocked - return informative message
+                # Command was blocked - include the pattern category so the caller knows why
+                desc = approval.get("description", "potentially dangerous operation")
+                fallback_msg = (
+                    f"Command denied: matches '{desc}' pattern. "
+                    "Use the approval prompt to allow it, or rephrase the command."
+                )
                 return json.dumps({
                     "output": "",
                     "exit_code": -1,
-                    "error": approval.get("message", "Command denied - potentially dangerous operation"),
+                    "error": approval.get("message", fallback_msg),
                     "status": "blocked"
                 }, ensure_ascii=False)
 
@@ -996,11 +987,17 @@ def terminal_tool(
             # Add helpful message for sudo failures in messaging context
             output = _handle_sudo_failure(output, env_type)
             
-            # Truncate output if too long
+            # Truncate output if too long, keeping both head and tail
             MAX_OUTPUT_CHARS = 50000
             if len(output) > MAX_OUTPUT_CHARS:
-                truncated_notice = f"\n\n... [OUTPUT TRUNCATED - showing last {MAX_OUTPUT_CHARS} chars of {len(output)} total] ..."
-                output = truncated_notice + output[-MAX_OUTPUT_CHARS:]
+                head_chars = int(MAX_OUTPUT_CHARS * 0.4)  # 40% head (error messages often appear early)
+                tail_chars = MAX_OUTPUT_CHARS - head_chars  # 60% tail (most recent/relevant output)
+                omitted = len(output) - head_chars - tail_chars
+                truncated_notice = (
+                    f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
+                    f"out of {len(output)} total] ...\n\n"
+                )
+                output = output[:head_chars] + truncated_notice + output[-tail_chars:]
 
             return json.dumps({
                 "output": output.strip() if output else "",
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index 7750485d5d..90c0d430c2 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -36,13 +36,20 @@ import base64
 from pathlib import Path
 from typing import Dict, Any, Optional
 import httpx
-from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key
+from openai import AsyncOpenAI
+from agent.auxiliary_client import get_vision_auxiliary_client
 from tools.debug_helpers import DebugSession
 
 logger = logging.getLogger(__name__)
 
-# Configuration for vision processing
-DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview"
+# Resolve vision auxiliary client at module level; build an async wrapper.
+_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
+_aux_async_client: AsyncOpenAI | None = None
+if _aux_sync_client is not None:
+    _aux_async_client = AsyncOpenAI(
+        api_key=_aux_sync_client.api_key,
+        base_url=str(_aux_sync_client.base_url),
+    )
 
 _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
 
@@ -230,9 +237,13 @@ async def vision_analyze_tool(
         logger.info("Analyzing image: %s", image_url[:60])
         logger.info("User prompt: %s", user_prompt[:100])
         
-        # Check API key availability
-        if not os.getenv("OPENROUTER_API_KEY"):
-            raise ValueError("OPENROUTER_API_KEY environment variable not set")
+        # Check auxiliary vision client availability
+        if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
+            return json.dumps({
+                "success": False,
+                "analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
+                            "Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
+            }, indent=2, ensure_ascii=False)
         
         # Determine if this is a local file path or a remote URL
         local_path = Path(image_url)
@@ -291,18 +302,12 @@ async def vision_analyze_tool(
         
         logger.info("Processing image with %s...", model)
         
-        # Call the vision API with reasoning enabled
-        response = await _get_openrouter_client().chat.completions.create(
+        # Call the vision API
+        response = await _aux_async_client.chat.completions.create(
             model=model,
             messages=messages,
-            temperature=0.1,  # Low temperature for consistent analysis
-            max_tokens=2000,  # Generous limit for detailed analysis
-            extra_body={
-                "reasoning": {
-                    "enabled": True,
-                    "effort": "xhigh"
-                }
-            }
+            temperature=0.1,
+            max_tokens=2000,
         )
         
         # Extract the analysis
@@ -353,13 +358,8 @@ async def vision_analyze_tool(
 
 
 def check_vision_requirements() -> bool:
-    """
-    Check if all requirements for vision tools are met.
-    
-    Returns:
-        bool: True if requirements are met, False otherwise
-    """
-    return check_openrouter_api_key()
+    """Check if an auxiliary vision model is available."""
+    return _aux_async_client is not None
 
 
 def get_debug_session_info() -> Dict[str, Any]:
@@ -379,16 +379,15 @@ if __name__ == "__main__":
     print("👁️ Vision Tools Module")
     print("=" * 40)
     
-    # Check if API key is available
-    api_available = check_openrouter_api_key()
+    # Check if vision model is available
+    api_available = check_vision_requirements()
     
     if not api_available:
-        print("❌ OPENROUTER_API_KEY environment variable not set")
-        print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
-        print("Get API key at: https://openrouter.ai/")
+        print("❌ No auxiliary vision model available")
+        print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
         exit(1)
     else:
-        print("✅ OpenRouter API key found")
+        print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
     
     print("🛠️ Vision tools ready for use!")
     print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
@@ -455,7 +454,8 @@ def _handle_vision_analyze(args, **kw):
     image_url = args.get("image_url", "")
     question = args.get("question", "")
     full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
-    return vision_analyze_tool(image_url, full_prompt, "google/gemini-3-flash-preview")
+    model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
+    return vision_analyze_tool(image_url, full_prompt, model)
 
 
 registry.register(
@@ -464,6 +464,5 @@ registry.register(
     schema=VISION_ANALYZE_SCHEMA,
     handler=_handle_vision_analyze,
     check_fn=check_vision_requirements,
-    requires_env=["OPENROUTER_API_KEY"],
     is_async=True,
 )
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 5809a26faf..868abb9420 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -47,7 +47,8 @@ import re
 import asyncio
 from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
-from tools.openrouter_client import get_async_client as _get_openrouter_client
+from openai import AsyncOpenAI
+from agent.auxiliary_client import get_text_auxiliary_client
 from tools.debug_helpers import DebugSession
 
 logger = logging.getLogger(__name__)
@@ -64,9 +65,17 @@ def _get_firecrawl_client():
         _firecrawl_client = Firecrawl(api_key=api_key)
     return _firecrawl_client
 
-DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
 
+# Resolve auxiliary text client at module level; build an async wrapper.
+_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
+_aux_async_client: AsyncOpenAI | None = None
+if _aux_sync_client is not None:
+    _aux_async_client = AsyncOpenAI(
+        api_key=_aux_sync_client.api_key,
+        base_url=str(_aux_sync_client.base_url),
+    )
+
 _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
 
 
@@ -223,7 +232,10 @@ Create a markdown summary that captures all key information in a well-organized,
 
     for attempt in range(max_retries):
         try:
-            response = await _get_openrouter_client().chat.completions.create(
+            if _aux_async_client is None:
+                logger.warning("No auxiliary model available for web content processing")
+                return None
+            response = await _aux_async_client.chat.completions.create(
                 model=model,
                 messages=[
                     {"role": "system", "content": system_prompt},
@@ -231,12 +243,6 @@ Create a markdown summary that captures all key information in a well-organized,
                 ],
                 temperature=0.1,
                 max_tokens=max_tokens,
-                extra_body={
-                    "reasoning": {
-                        "enabled": True,
-                        "effort": "xhigh"
-                    }
-                }
             )
             return response.choices[0].message.content.strip()
         except Exception as api_error:
@@ -342,7 +348,14 @@ Synthesize these into ONE cohesive, comprehensive summary that:
 Create a single, unified markdown summary."""
 
     try:
-        response = await _get_openrouter_client().chat.completions.create(
+        if _aux_async_client is None:
+            logger.warning("No auxiliary model for synthesis, concatenating summaries")
+            fallback = "\n\n".join(summaries)
+            if len(fallback) > max_output_size:
+                fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
+            return fallback
+
+        response = await _aux_async_client.chat.completions.create(
             model=model,
             messages=[
                 {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
@@ -350,12 +363,6 @@ Create a single, unified markdown summary."""
             ],
             temperature=0.1,
             max_tokens=4000,
-            extra_body={
-                "reasoning": {
-                    "enabled": True,
-                    "effort": "xhigh"
-                }
-            }
         )
         final_summary = response.choices[0].message.content.strip()
         
@@ -677,8 +684,8 @@ async def web_extract_tool(
         debug_call_data["pages_extracted"] = pages_extracted
         debug_call_data["original_response_size"] = len(json.dumps(response))
         
-        # Process each result with LLM if enabled
-        if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
+        # Process each result with LLM if enabled and auxiliary client is available
+        if use_llm_processing and _aux_async_client is not None:
             logger.info("Processing extracted content with LLM (parallel)...")
             debug_call_data["processing_applied"].append("llm_processing")
             
@@ -744,8 +751,8 @@ async def web_extract_tool(
                 else:
                     logger.warning("%s (no content to process)", url)
         else:
-            if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
-                logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
+            if use_llm_processing and _aux_async_client is None:
+                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
                 debug_call_data["processing_applied"].append("llm_processing_unavailable")
             
             # Print summary of extracted pages for debugging (original behavior)
@@ -973,8 +980,8 @@ async def web_crawl_tool(
         debug_call_data["pages_crawled"] = pages_crawled
         debug_call_data["original_response_size"] = len(json.dumps(response))
         
-        # Process each result with LLM if enabled
-        if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
+        # Process each result with LLM if enabled and auxiliary client is available
+        if use_llm_processing and _aux_async_client is not None:
             logger.info("Processing crawled content with LLM (parallel)...")
             debug_call_data["processing_applied"].append("llm_processing")
             
@@ -1040,8 +1047,8 @@ async def web_crawl_tool(
                 else:
                     logger.warning("%s (no content to process)", page_url)
         else:
-            if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
-                logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
+            if use_llm_processing and _aux_async_client is None:
+                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
                 debug_call_data["processing_applied"].append("llm_processing_unavailable")
             
             # Print summary of crawled pages for debugging (original behavior)
@@ -1096,14 +1103,9 @@ def check_firecrawl_api_key() -> bool:
     return bool(os.getenv("FIRECRAWL_API_KEY"))
 
 
-def check_nous_api_key() -> bool:
-    """
-    Check if the Nous Research API key is available in environment variables.
-    
-    Returns:
-        bool: True if API key is set, False otherwise
-    """
-    return bool(os.getenv("OPENROUTER_API_KEY"))
+def check_auxiliary_model() -> bool:
+    """Check if an auxiliary text model is available for LLM content processing."""
+    return _aux_async_client is not None
 
 
 def get_debug_session_info() -> Dict[str, Any]:
@@ -1120,7 +1122,7 @@ if __name__ == "__main__":
     
     # Check if API keys are available
     firecrawl_available = check_firecrawl_api_key()
-    nous_available = check_nous_api_key()
+    nous_available = check_auxiliary_model()
     
     if not firecrawl_available:
         print("❌ FIRECRAWL_API_KEY environment variable not set")
@@ -1130,12 +1132,11 @@ if __name__ == "__main__":
         print("✅ Firecrawl API key found")
     
     if not nous_available:
-        print("❌ OPENROUTER_API_KEY environment variable not set")
-        print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")  
-        print("Get API key at: https://inference-api.nousresearch.com/")
-        print("⚠️  Without Nous API key, LLM content processing will be disabled")
+        print("❌ No auxiliary model available for LLM content processing")
+        print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY")
+        print("⚠️  Without an auxiliary model, LLM content processing will be disabled")
     else:
-        print("✅ Nous Research API key found")
+        print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
     
     if not firecrawl_available:
         exit(1)
@@ -1143,7 +1144,7 @@ if __name__ == "__main__":
     print("🛠️  Web tools ready for use!")
     
     if nous_available:
-        print("🧠 LLM content processing available with Gemini 3 Flash Preview via OpenRouter")
+        print(f"🧠 LLM content processing available with {DEFAULT_SUMMARIZER_MODEL}")
         print(f"   Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars")
     
     # Show debug mode status