Hermes Agent UX Improvements

2026-06-09 08:21:50 +00:00 · 2026-02-22 02:16:11 -08:00 · 2026-02-22 02:16:11 -08:00 · ededaaa874
commit ededaaa874
parent b1f55e3ee5
23 changed files with 945 additions and 1545 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -0,0 +1,128 @@
+"""Shared auxiliary OpenAI client for cheap/fast side tasks.
+
+Provides a single resolution chain so every consumer (context compression,
+session search, web extraction, vision analysis, browser vision) picks up
+the best available backend without duplicating fallback logic.
+
+Resolution order for text tasks:
+  1. OpenRouter  (OPENROUTER_API_KEY)
+  2. Nous Portal (~/.hermes/auth.json active provider)
+  3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
+  4. None
+
+Resolution order for vision/multimodal tasks:
+  1. OpenRouter
+  2. Nous Portal
+  3. None  (custom endpoints can't substitute for Gemini multimodal)
+"""
+
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Optional, Tuple
+
+from openai import OpenAI
+
+from hermes_constants import OPENROUTER_BASE_URL
+
+logger = logging.getLogger(__name__)
+
+# Default auxiliary models per provider
+_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
+_NOUS_MODEL = "gemini-3-flash"
+_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
+_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
+
+
+def _read_nous_auth() -> Optional[dict]:
+    """Read and validate ~/.hermes/auth.json for an active Nous provider.
+
+    Returns the provider state dict if Nous is active with tokens,
+    otherwise None.
+    """
+    try:
+        if not _AUTH_JSON_PATH.is_file():
+            return None
+        data = json.loads(_AUTH_JSON_PATH.read_text())
+        if data.get("active_provider") != "nous":
+            return None
+        provider = data.get("providers", {}).get("nous", {})
+        # Must have at least an access_token or agent_key
+        if not provider.get("agent_key") and not provider.get("access_token"):
+            return None
+        return provider
+    except Exception as exc:
+        logger.debug("Could not read Nous auth: %s", exc)
+        return None
+
+
+def _nous_api_key(provider: dict) -> str:
+    """Extract the best API key from a Nous provider state dict."""
+    return provider.get("agent_key") or provider.get("access_token", "")
+
+
+def _nous_base_url() -> str:
+    """Resolve the Nous inference base URL from env or default."""
+    return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
+
+
+# ── Public API ──────────────────────────────────────────────────────────────
+
+def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
+    """Return (client, model_slug) for text-only auxiliary tasks.
+
+    Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
+    """
+    # 1. OpenRouter
+    or_key = os.getenv("OPENROUTER_API_KEY")
+    if or_key:
+        logger.debug("Auxiliary text client: OpenRouter")
+        return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
+
+    # 2. Nous Portal
+    nous = _read_nous_auth()
+    if nous:
+        logger.debug("Auxiliary text client: Nous Portal")
+        return (
+            OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
+            _NOUS_MODEL,
+        )
+
+    # 3. Custom endpoint (both base URL and key must be set)
+    custom_base = os.getenv("OPENAI_BASE_URL")
+    custom_key = os.getenv("OPENAI_API_KEY")
+    if custom_base and custom_key:
+        model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
+        logger.debug("Auxiliary text client: custom endpoint (%s)", model)
+        return OpenAI(api_key=custom_key, base_url=custom_base), model
+
+    # 4. Nothing available
+    logger.debug("Auxiliary text client: none available")
+    return None, None
+
+
+def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
+    """Return (client, model_slug) for vision/multimodal auxiliary tasks.
+
+    Only OpenRouter and Nous Portal qualify — custom endpoints cannot
+    substitute for Gemini multimodal.
+    """
+    # 1. OpenRouter
+    or_key = os.getenv("OPENROUTER_API_KEY")
+    if or_key:
+        logger.debug("Auxiliary vision client: OpenRouter")
+        return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
+
+    # 2. Nous Portal
+    nous = _read_nous_auth()
+    if nous:
+        logger.debug("Auxiliary vision client: Nous Portal")
+        return (
+            OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
+            _NOUS_MODEL,
+        )
+
+    # 3. Nothing suitable
+    logger.debug("Auxiliary vision client: none available")
+    return None, None
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -9,13 +9,11 @@ import logging
 import os
 from typing import Any, Dict, List

-from openai import OpenAI
-
+from agent.auxiliary_client import get_text_auxiliary_client
 from agent.model_metadata import (
    get_model_context_length,
    estimate_messages_tokens_rough,
 )
-from hermes_constants import OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)

@ -31,7 +29,6 @@ class ContextCompressor:
        self,
        model: str,
        threshold_percent: float = 0.85,
-        summary_model: str = "google/gemini-3-flash-preview",
        protect_first_n: int = 3,
        protect_last_n: int = 4,
        summary_target_tokens: int = 500,
@ -39,7 +36,6 @@ class ContextCompressor:
    ):
        self.model = model
        self.threshold_percent = threshold_percent
-        self.summary_model = summary_model
        self.protect_first_n = protect_first_n
        self.protect_last_n = protect_last_n
        self.summary_target_tokens = summary_target_tokens
@ -53,8 +49,7 @@ class ContextCompressor:
        self.last_completion_tokens = 0
        self.last_total_tokens = 0

-        api_key = os.getenv("OPENROUTER_API_KEY", "")
-        self.client = OpenAI(api_key=api_key, base_url=OPENROUTER_BASE_URL) if api_key else None
+        self.client, self.summary_model = get_text_auxiliary_client()

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@ -155,6 +150,26 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
        if not self.quiet_mode:
            print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
            print(f"   📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
+
+        # Truncation fallback when no auxiliary model is available
+        if self.client is None:
+            print("⚠️  Context compression: no auxiliary model available. Falling back to message truncation.")
+            # Keep system message(s) at the front and the protected tail;
+            # simply drop the oldest non-system messages until under threshold.
+            kept = []
+            for msg in messages:
+                if msg.get("role") == "system":
+                    kept.append(msg.copy())
+                else:
+                    break
+            tail = messages[-self.protect_last_n:]
+            kept.extend(m.copy() for m in tail)
+            self.compression_count += 1
+            if not self.quiet_mode:
+                print(f"   ✂️  Truncated: {len(messages)} → {len(kept)} messages (dropped middle turns)")
+            return kept
+
+        if not self.quiet_mode:
            print(f"   🗜️  Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")

        summary = self._generate_summary(turns_to_summarize)
--- a/agent/display.py
+++ b/agent/display.py
@ -4,11 +4,16 @@ Pure display functions and classes with no AIAgent dependency.
 Used by AIAgent._execute_tool_calls for CLI feedback.
 """

+import json
 import os
 import random
 import threading
 import time

+# ANSI escape codes for coloring tool failure indicators
+_RED = "\033[31m"
+_RESET = "\033[0m"
+

 # =========================================================================
 # Tool preview (one-line summary of a tool call's primary argument)
@ -242,12 +247,46 @@ KAWAII_GENERIC = [
 # Cute tool message (completion line that replaces the spinner)
 # =========================================================================

-def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
+def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
+    """Inspect a tool result string for signs of failure.
+
+    Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
+    like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
+    failures.  On success, returns ``(False, "")``.
+    """
+    if result is None:
+        return False, ""
+
+    if tool_name == "terminal":
+        try:
+            data = json.loads(result)
+            exit_code = data.get("exit_code")
+            if exit_code is not None and exit_code != 0:
+                return True, f" [exit {exit_code}]"
+        except (json.JSONDecodeError, TypeError, AttributeError):
+            pass
+        return False, ""
+
+    # Generic heuristic for non-terminal tools
+    lower = result[:500].lower()
+    if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
+        return True, " [error]"
+
+    return False, ""
+
+
+def get_cute_tool_message(
+    tool_name: str, args: dict, duration: float, result: str | None = None,
+) -> str:
    """Generate a formatted tool completion line for CLI quiet mode.

    Format: ``| {emoji} {verb:9} {detail}  {duration}``
+
+    When *result* is provided the line is checked for failure indicators.
+    Failed tool calls get a red prefix and an informational suffix.
    """
    dur = f"{duration:.1f}s"
+    is_failure, failure_suffix = _detect_tool_failure(tool_name, result)

    def _trunc(s, n=40):
        s = str(s)
@ -257,105 +296,111 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
        p = str(p)
        return ("..." + p[-(n-3):]) if len(p) > n else p

+    def _wrap(line: str) -> str:
+        """Apply red coloring and failure suffix when the tool failed."""
+        if not is_failure:
+            return line
+        return f"{_RED}{line}{failure_suffix}{_RESET}"
+
    if tool_name == "web_search":
-        return f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}"
+        return _wrap(f"┊ 🔍 search    {_trunc(args.get('query', ''), 42)}  {dur}")
    if tool_name == "web_extract":
        urls = args.get("urls", [])
        if urls:
            url = urls[0] if isinstance(urls, list) else str(urls)
            domain = url.replace("https://", "").replace("http://", "").split("/")[0]
            extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
-            return f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}"
-        return f"┊ 📄 fetch     pages  {dur}"
+            return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
+        return _wrap(f"┊ 📄 fetch     pages  {dur}")
    if tool_name == "web_crawl":
        url = args.get("url", "")
        domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-        return f"┊ 🕸️  crawl     {_trunc(domain, 35)}  {dur}"
+        return _wrap(f"┊ 🕸️  crawl     {_trunc(domain, 35)}  {dur}")
    if tool_name == "terminal":
-        return f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}"
+        return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
    if tool_name == "process":
        action = args.get("action", "?")
        sid = args.get("session_id", "")[:12]
        labels = {"list": "ls processes", "poll": f"poll {sid}", "log": f"log {sid}",
                  "wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"}
-        return f"┊ ⚙️  proc      {labels.get(action, f'{action} {sid}')}  {dur}"
+        return _wrap(f"┊ ⚙️  proc      {labels.get(action, f'{action} {sid}')}  {dur}")
    if tool_name == "read_file":
-        return f"┊ 📖 read      {_path(args.get('path', ''))}  {dur}"
+        return _wrap(f"┊ 📖 read      {_path(args.get('path', ''))}  {dur}")
    if tool_name == "write_file":
-        return f"┊ ✍️  write     {_path(args.get('path', ''))}  {dur}"
+        return _wrap(f"┊ ✍️  write     {_path(args.get('path', ''))}  {dur}")
    if tool_name == "patch":
-        return f"┊ 🔧 patch     {_path(args.get('path', ''))}  {dur}"
+        return _wrap(f"┊ 🔧 patch     {_path(args.get('path', ''))}  {dur}")
    if tool_name == "search_files":
        pattern = _trunc(args.get("pattern", ""), 35)
        target = args.get("target", "content")
        verb = "find" if target == "files" else "grep"
-        return f"┊ 🔎 {verb:9} {pattern}  {dur}"
+        return _wrap(f"┊ 🔎 {verb:9} {pattern}  {dur}")
    if tool_name == "browser_navigate":
        url = args.get("url", "")
        domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-        return f"┊ 🌐 navigate  {_trunc(domain, 35)}  {dur}"
+        return _wrap(f"┊ 🌐 navigate  {_trunc(domain, 35)}  {dur}")
    if tool_name == "browser_snapshot":
        mode = "full" if args.get("full") else "compact"
-        return f"┊ 📸 snapshot  {mode}  {dur}"
+        return _wrap(f"┊ 📸 snapshot  {mode}  {dur}")
    if tool_name == "browser_click":
-        return f"┊ 👆 click     {args.get('ref', '?')}  {dur}"
+        return _wrap(f"┊ 👆 click     {args.get('ref', '?')}  {dur}")
    if tool_name == "browser_type":
-        return f"┊ ⌨️  type      \"{_trunc(args.get('text', ''), 30)}\"  {dur}"
+        return _wrap(f"┊ ⌨️  type      \"{_trunc(args.get('text', ''), 30)}\"  {dur}")
    if tool_name == "browser_scroll":
        d = args.get("direction", "down")
        arrow = {"down": "↓", "up": "↑", "right": "→", "left": "←"}.get(d, "↓")
-        return f"┊ {arrow}  scroll    {d}  {dur}"
+        return _wrap(f"┊ {arrow}  scroll    {d}  {dur}")
    if tool_name == "browser_back":
-        return f"┊ ◀️  back      {dur}"
+        return _wrap(f"┊ ◀️  back      {dur}")
    if tool_name == "browser_press":
-        return f"┊ ⌨️  press     {args.get('key', '?')}  {dur}"
+        return _wrap(f"┊ ⌨️  press     {args.get('key', '?')}  {dur}")
    if tool_name == "browser_close":
-        return f"┊ 🚪 close     browser  {dur}"
+        return _wrap(f"┊ 🚪 close     browser  {dur}")
    if tool_name == "browser_get_images":
-        return f"┊ 🖼️  images    extracting  {dur}"
+        return _wrap(f"┊ 🖼️  images    extracting  {dur}")
    if tool_name == "browser_vision":
-        return f"┊ 👁️  vision    analyzing page  {dur}"
+        return _wrap(f"┊ 👁️  vision    analyzing page  {dur}")
    if tool_name == "todo":
        todos_arg = args.get("todos")
        merge = args.get("merge", False)
        if todos_arg is None:
-            return f"┊ 📋 plan      reading tasks  {dur}"
+            return _wrap(f"┊ 📋 plan      reading tasks  {dur}")
        elif merge:
-            return f"┊ 📋 plan      update {len(todos_arg)} task(s)  {dur}"
+            return _wrap(f"┊ 📋 plan      update {len(todos_arg)} task(s)  {dur}")
        else:
-            return f"┊ 📋 plan      {len(todos_arg)} task(s)  {dur}"
+            return _wrap(f"┊ 📋 plan      {len(todos_arg)} task(s)  {dur}")
    if tool_name == "session_search":
-        return f"┊ 🔍 recall    \"{_trunc(args.get('query', ''), 35)}\"  {dur}"
+        return _wrap(f"┊ 🔍 recall    \"{_trunc(args.get('query', ''), 35)}\"  {dur}")
    if tool_name == "memory":
        action = args.get("action", "?")
        target = args.get("target", "")
        if action == "add":
-            return f"┊ 🧠 memory    +{target}: \"{_trunc(args.get('content', ''), 30)}\"  {dur}"
+            return _wrap(f"┊ 🧠 memory    +{target}: \"{_trunc(args.get('content', ''), 30)}\"  {dur}")
        elif action == "replace":
-            return f"┊ 🧠 memory    ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}"
+            return _wrap(f"┊ 🧠 memory    ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}")
        elif action == "remove":
-            return f"┊ 🧠 memory    -{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}"
-        return f"┊ 🧠 memory    {action}  {dur}"
+            return _wrap(f"┊ 🧠 memory    -{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}")
+        return _wrap(f"┊ 🧠 memory    {action}  {dur}")
    if tool_name == "skills_list":
-        return f"┊ 📚 skills    list {args.get('category', 'all')}  {dur}"
+        return _wrap(f"┊ 📚 skills    list {args.get('category', 'all')}  {dur}")
    if tool_name == "skill_view":
-        return f"┊ 📚 skill     {_trunc(args.get('name', ''), 30)}  {dur}"
+        return _wrap(f"┊ 📚 skill     {_trunc(args.get('name', ''), 30)}  {dur}")
    if tool_name == "image_generate":
-        return f"┊ 🎨 create    {_trunc(args.get('prompt', ''), 35)}  {dur}"
+        return _wrap(f"┊ 🎨 create    {_trunc(args.get('prompt', ''), 35)}  {dur}")
    if tool_name == "text_to_speech":
-        return f"┊ 🔊 speak     {_trunc(args.get('text', ''), 30)}  {dur}"
+        return _wrap(f"┊ 🔊 speak     {_trunc(args.get('text', ''), 30)}  {dur}")
    if tool_name == "vision_analyze":
-        return f"┊ 👁️  vision    {_trunc(args.get('question', ''), 30)}  {dur}"
+        return _wrap(f"┊ 👁️  vision    {_trunc(args.get('question', ''), 30)}  {dur}")
    if tool_name == "mixture_of_agents":
-        return f"┊ 🧠 reason    {_trunc(args.get('user_prompt', ''), 30)}  {dur}"
+        return _wrap(f"┊ 🧠 reason    {_trunc(args.get('user_prompt', ''), 30)}  {dur}")
    if tool_name == "send_message":
-        return f"┊ 📨 send      {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\"  {dur}"
+        return _wrap(f"┊ 📨 send      {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\"  {dur}")
    if tool_name == "schedule_cronjob":
-        return f"┊ ⏰ schedule  {_trunc(args.get('name', args.get('prompt', 'task')), 30)}  {dur}"
+        return _wrap(f"┊ ⏰ schedule  {_trunc(args.get('name', args.get('prompt', 'task')), 30)}  {dur}")
    if tool_name == "list_cronjobs":
-        return f"┊ ⏰ jobs      listing  {dur}"
+        return _wrap(f"┊ ⏰ jobs      listing  {dur}")
    if tool_name == "remove_cronjob":
-        return f"┊ ⏰ remove    job {args.get('job_id', '?')}  {dur}"
+        return _wrap(f"┊ ⏰ remove    job {args.get('job_id', '?')}  {dur}")
    if tool_name.startswith("rl_"):
        rl = {
            "rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}",
@ -364,16 +409,16 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
            "rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
            "rl_list_runs": "list runs", "rl_test_inference": "test inference",
        }
-        return f"┊ 🧪 rl        {rl.get(tool_name, tool_name.replace('rl_', ''))}  {dur}"
+        return _wrap(f"┊ 🧪 rl        {rl.get(tool_name, tool_name.replace('rl_', ''))}  {dur}")
    if tool_name == "execute_code":
        code = args.get("code", "")
        first_line = code.strip().split("\n")[0] if code.strip() else ""
-        return f"┊ 🐍 exec      {_trunc(first_line, 35)}  {dur}"
+        return _wrap(f"┊ 🐍 exec      {_trunc(first_line, 35)}  {dur}")
    if tool_name == "delegate_task":
        tasks = args.get("tasks")
        if tasks and isinstance(tasks, list):
-            return f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}"
-        return f"┊ 🔀 delegate  {_trunc(args.get('goal', ''), 35)}  {dur}"
+            return _wrap(f"┊ 🔀 delegate  {len(tasks)} parallel tasks  {dur}")
+        return _wrap(f"┊ 🔀 delegate  {_trunc(args.get('goal', ''), 35)}  {dur}")

    preview = build_tool_preview(tool_name, args) or ""
-    return f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)}  {dur}"
+    return _wrap(f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)}  {dur}")
--- a/cli.py
+++ b/cli.py
@ -339,9 +339,6 @@ def _cprint(text: str):
    """
    _pt_print(_PT_ANSI(text))

-# Version string
-VERSION = "v1.0.0"
-
 # ASCII Art - HERMES-AGENT logo (full width, single line - requires ~95 char terminal)
 HERMES_AGENT_LOGO = """[bold #FFD700]██╗  ██╗███████╗██████╗ ███╗   ███╗███████╗███████╗       █████╗  ██████╗ ███████╗███╗   ██╗████████╗[/]
 [bold #FFD700]██║  ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝      ██╔══██╗██╔════╝ ██╔════╝████╗  ██║╚══██╔══╝[/]
--- a/gateway/config.py
+++ b/gateway/config.py
@ -8,6 +8,7 @@ Handles loading and validating configuration for:
 - Delivery preferences
 """

+import logging
 import os
 import json
 from pathlib import Path
@ -15,6 +16,8 @@ from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Any
 from enum import Enum

+logger = logging.getLogger(__name__)
+

 class Platform(Enum):
    """Supported messaging platforms."""
@ -264,6 +267,40 @@ def load_gateway_config() -> GatewayConfig:
    # Override with environment variables
    _apply_env_overrides(config)
    
+    # --- Validate loaded values ---
+    policy = config.default_reset_policy
+
+    if not (0 <= policy.at_hour <= 23):
+        logger.warning(
+            "Invalid at_hour=%s (must be 0-23). Using default 4.", policy.at_hour
+        )
+        policy.at_hour = 4
+
+    if policy.idle_minutes is None or policy.idle_minutes <= 0:
+        logger.warning(
+            "Invalid idle_minutes=%s (must be positive). Using default 1440.",
+            policy.idle_minutes,
+        )
+        policy.idle_minutes = 1440
+
+    # Warn about empty bot tokens — platforms that loaded an empty string
+    # won't connect and the cause can be confusing without a log line.
+    _token_env_names = {
+        Platform.TELEGRAM: "TELEGRAM_BOT_TOKEN",
+        Platform.DISCORD: "DISCORD_BOT_TOKEN",
+        Platform.SLACK: "SLACK_BOT_TOKEN",
+    }
+    for platform, pconfig in config.platforms.items():
+        if not pconfig.enabled:
+            continue
+        env_name = _token_env_names.get(platform)
+        if env_name and pconfig.token is not None and not pconfig.token.strip():
+            logger.warning(
+                "%s is enabled but %s is empty. "
+                "The adapter will likely fail to connect.",
+                platform.value, env_name,
+            )
+
    return config


--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@ -8,12 +8,18 @@ Routes messages to the appropriate destination based on:
 - Local (always saved to files)
 """

+import logging
 from pathlib import Path
 from datetime import datetime
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Any, Union
 from enum import Enum

+logger = logging.getLogger(__name__)
+
+MAX_PLATFORM_OUTPUT = 4000
+TRUNCATED_VISIBLE = 3800
+
 from .config import Platform, GatewayConfig
 from .session import SessionSource

@ -245,6 +251,15 @@ class DeliveryRouter:
            "timestamp": timestamp
        }
    
+    def _save_full_output(self, content: str, job_id: str) -> Path:
+        """Save full cron output to disk and return the file path."""
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        out_dir = Path.home() / ".hermes" / "cron" / "output"
+        out_dir.mkdir(parents=True, exist_ok=True)
+        path = out_dir / f"{job_id}_{timestamp}.txt"
+        path.write_text(content)
+        return path
+
    async def _deliver_to_platform(
        self,
        target: DeliveryTarget,
@ -260,8 +275,16 @@ class DeliveryRouter:
        if not target.chat_id:
            raise ValueError(f"No chat ID for {target.platform.value} delivery")
        
-        # Call the adapter's send method
-        # Adapters should implement: async def send(chat_id: str, content: str) -> Dict
+        # Guard: truncate oversized cron output to stay within platform limits
+        if len(content) > MAX_PLATFORM_OUTPUT:
+            job_id = (metadata or {}).get("job_id", "unknown")
+            saved_path = self._save_full_output(content, job_id)
+            logger.info("Cron output truncated (%d chars) — full output: %s", len(content), saved_path)
+            content = (
+                content[:TRUNCATED_VISIBLE]
+                + f"\n\n... [truncated, full output saved to {saved_path}]"
+            )
+        
        return await adapter.send(target.chat_id, content, metadata=metadata)


--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@ -659,34 +659,90 @@ class BasePlatformAdapter(ABC):
    
    def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
        """
-        Split a long message into chunks.
-        
+        Split a long message into chunks, preserving code block boundaries.
+
+        When a split falls inside a triple-backtick code block, the fence is
+        closed at the end of the current chunk and reopened (with the original
+        language tag) at the start of the next chunk.  Multi-chunk responses
+        receive indicators like ``(1/3)``.
+
        Args:
            content: The full message content
            max_length: Maximum length per chunk (platform-specific)
-        
+
        Returns:
            List of message chunks
        """
        if len(content) <= max_length:
            return [content]
-        
-        chunks = []
-        while content:
-            if len(content) <= max_length:
-                chunks.append(content)
+
+        INDICATOR_RESERVE = 10   # room for " (XX/XX)"
+        FENCE_CLOSE = "\n```"
+
+        chunks: List[str] = []
+        remaining = content
+        # When the previous chunk ended mid-code-block, this holds the
+        # language tag (possibly "") so we can reopen the fence.
+        carry_lang: Optional[str] = None
+
+        while remaining:
+            # If we're continuing a code block from the previous chunk,
+            # prepend a new opening fence with the same language tag.
+            prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
+
+            # How much body text we can fit after accounting for the prefix,
+            # a potential closing fence, and the chunk indicator.
+            headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
+            if headroom < 1:
+                headroom = max_length // 2
+
+            # Everything remaining fits in one final chunk
+            if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
+                chunks.append(prefix + remaining)
                break
-            
-            # Try to split at a newline
-            split_idx = content.rfind("\n", 0, max_length)
-            if split_idx == -1:
-                # No newline, split at space
-                split_idx = content.rfind(" ", 0, max_length)
-            if split_idx == -1:
-                # No space either, hard split
-                split_idx = max_length
-            
-            chunks.append(content[:split_idx])
-            content = content[split_idx:].lstrip()
-        
+
+            # Find a natural split point (prefer newlines, then spaces)
+            region = remaining[:headroom]
+            split_at = region.rfind("\n")
+            if split_at < headroom // 2:
+                split_at = region.rfind(" ")
+            if split_at < 1:
+                split_at = headroom
+
+            chunk_body = remaining[:split_at]
+            remaining = remaining[split_at:].lstrip()
+
+            full_chunk = prefix + chunk_body
+
+            # Walk the chunk line-by-line to determine whether we end
+            # inside an open code block.
+            in_code = carry_lang is not None
+            lang = carry_lang or ""
+            for line in full_chunk.split("\n"):
+                stripped = line.strip()
+                if stripped.startswith("```"):
+                    if in_code:
+                        in_code = False
+                        lang = ""
+                    else:
+                        in_code = True
+                        tag = stripped[3:].strip()
+                        lang = tag.split()[0] if tag else ""
+
+            if in_code:
+                # Close the orphaned fence so the chunk is valid on its own
+                full_chunk += FENCE_CLOSE
+                carry_lang = lang
+            else:
+                carry_lang = None
+
+            chunks.append(full_chunk)
+
+        # Append chunk indicators when the response spans multiple messages
+        if len(chunks) > 1:
+            total = len(chunks)
+            chunks = [
+                f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
+            ]
+
        return chunks
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@ -8,6 +8,7 @@ Uses python-telegram-bot library for:
 """

 import asyncio
+import re
 from typing import Dict, List, Optional, Any

 try:
@ -49,6 +50,16 @@ def check_telegram_requirements() -> bool:
    return TELEGRAM_AVAILABLE


+# Matches every character that MarkdownV2 requires to be backslash-escaped
+# when it appears outside a code span or fenced code block.
+_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
+
+
+def _escape_mdv2(text: str) -> str:
+    """Escape Telegram MarkdownV2 special characters with a preceding backslash."""
+    return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
+
+
 class TelegramAdapter(BasePlatformAdapter):
    """
    Telegram bot adapter.
@ -167,7 +178,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    msg = await self._bot.send_message(
                        chat_id=int(chat_id),
                        text=chunk,
-                        parse_mode=ParseMode.MARKDOWN,
+                        parse_mode=ParseMode.MARKDOWN_V2,
                        reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
                        message_thread_id=int(thread_id) if thread_id else None,
                    )
@ -297,14 +308,81 @@ class TelegramAdapter(BasePlatformAdapter):
    
    def format_message(self, content: str) -> str:
        """
-        Format message for Telegram.
-        
-        Telegram uses a subset of markdown. We'll use the simpler
-        Markdown mode (not MarkdownV2) for compatibility.
+        Convert standard markdown to Telegram MarkdownV2 format.
+
+        Protected regions (code blocks, inline code) are extracted first so
+        their contents are never modified.  Standard markdown constructs
+        (headers, bold, italic, links) are translated to MarkdownV2 syntax,
+        and all remaining special characters are escaped.
        """
-        # Basic escaping for Telegram Markdown
-        # In Markdown mode (not V2), only certain characters need escaping
-        return content
+        if not content:
+            return content
+
+        placeholders: dict = {}
+        counter = [0]
+
+        def _ph(value: str) -> str:
+            """Stash *value* behind a placeholder token that survives escaping."""
+            key = f"\x00PH{counter[0]}\x00"
+            counter[0] += 1
+            placeholders[key] = value
+            return key
+
+        text = content
+
+        # 1) Protect fenced code blocks (``` ... ```)
+        text = re.sub(
+            r'(```(?:[^\n]*\n)?[\s\S]*?```)',
+            lambda m: _ph(m.group(0)),
+            text,
+        )
+
+        # 2) Protect inline code (`...`)
+        text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
+
+        # 3) Convert markdown links – escape the display text; inside the URL
+        #    only ')' and '\' need escaping per the MarkdownV2 spec.
+        def _convert_link(m):
+            display = _escape_mdv2(m.group(1))
+            url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
+            return _ph(f'[{display}]({url})')
+
+        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
+
+        # 4) Convert markdown headers (## Title) → bold *Title*
+        def _convert_header(m):
+            inner = m.group(1).strip()
+            # Strip redundant bold markers that may appear inside a header
+            inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
+            return _ph(f'*{_escape_mdv2(inner)}*')
+
+        text = re.sub(
+            r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
+        )
+
+        # 5) Convert bold: **text** → *text* (MarkdownV2 bold)
+        text = re.sub(
+            r'\*\*(.+?)\*\*',
+            lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
+            text,
+        )
+
+        # 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
+        text = re.sub(
+            r'\*([^*]+)\*',
+            lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
+            text,
+        )
+
+        # 7) Escape remaining special characters in plain text
+        text = _escape_mdv2(text)
+
+        # 8) Restore placeholders in reverse insertion order so that
+        #    nested references (a placeholder inside another) resolve correctly.
+        for key in reversed(list(placeholders.keys())):
+            text = text.replace(key, placeholders[key])
+
+        return text
    
    async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming text messages."""
--- a/gateway/run.py
+++ b/gateway/run.py
@ -20,6 +20,7 @@ import re
 import sys
 import signal
 import threading
+from logging.handlers import RotatingFileHandler
 from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List
@ -402,9 +403,27 @@ class GatewayRunner:
        # Build the context prompt to inject
        context_prompt = build_session_context_prompt(context)
        
+        # If the previous session expired and was auto-reset, prepend a notice
+        # so the agent knows this is a fresh conversation (not an intentional /reset).
+        if getattr(session_entry, 'was_auto_reset', False):
+            context_prompt = (
+                "[System note: The user's previous session expired due to inactivity. "
+                "This is a fresh conversation with no prior context.]\n\n"
+                + context_prompt
+            )
+            session_entry.was_auto_reset = False
+        
        # Load conversation history from transcript
        history = self.session_store.load_transcript(session_entry.session_id)
        
+        # First-message onboarding for brand-new messaging platform users
+        if not history:
+            context_prompt += (
+                "\n\n[System note: This is the user's very first message in this session. "
+                "Briefly introduce yourself and mention that /help shows available commands. "
+                "Keep the introduction concise -- one or two sentences max.]"
+            )
+        
        # -----------------------------------------------------------------
        # Auto-analyze images sent by the user
        #
@ -1342,15 +1361,32 @@ def _start_cron_ticker(stop_event: threading.Event, interval: int = 60):
    
    Runs inside the gateway process so cronjobs fire automatically without
    needing a separate `hermes cron daemon` or system cron entry.
+
+    Every 60th tick (~once per hour) the image/audio cache is pruned so
+    stale temp files don't accumulate.
    """
    from cron.scheduler import tick as cron_tick
+    from gateway.platforms.base import cleanup_image_cache
+
+    IMAGE_CACHE_EVERY = 60  # ticks — once per hour at default 60s interval

    logger.info("Cron ticker started (interval=%ds)", interval)
+    tick_count = 0
    while not stop_event.is_set():
        try:
            cron_tick(verbose=False)
        except Exception as e:
            logger.debug("Cron tick error: %s", e)
+
+        tick_count += 1
+        if tick_count % IMAGE_CACHE_EVERY == 0:
+            try:
+                removed = cleanup_image_cache(max_age_hours=24)
+                if removed:
+                    logger.info("Image cache cleanup: removed %d stale file(s)", removed)
+            except Exception as e:
+                logger.debug("Image cache cleanup error: %s", e)
+
        stop_event.wait(timeout=interval)
    logger.info("Cron ticker stopped")

@ -1363,6 +1399,18 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
    Returns True if the gateway ran successfully, False if it failed to start.
    A False return causes a non-zero exit code so systemd can auto-restart.
    """
+    # Configure rotating file log so gateway output is persisted for debugging
+    log_dir = Path.home() / '.hermes' / 'logs'
+    log_dir.mkdir(parents=True, exist_ok=True)
+    file_handler = RotatingFileHandler(
+        log_dir / 'gateway.log',
+        maxBytes=5 * 1024 * 1024,
+        backupCount=3,
+    )
+    file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
+    logging.getLogger().addHandler(file_handler)
+    logging.getLogger().setLevel(logging.INFO)
+
    runner = GatewayRunner(config)
    
    # Set up signal handlers
--- a/gateway/session.py
+++ b/gateway/session.py
@ -219,6 +219,10 @@ class SessionEntry:
    output_tokens: int = 0
    total_tokens: int = 0
    
+    # Set when a session was created because the previous one expired;
+    # consumed once by the message handler to inject a notice into context
+    was_auto_reset: bool = False
+    
    def to_dict(self) -> Dict[str, Any]:
        result = {
            "session_key": self.session_key,
@ -388,11 +392,14 @@ class SessionStore:
                return entry
            else:
                # Session is being reset -- end the old one in SQLite
+                was_auto_reset = True
                if self._db:
                    try:
                        self._db.end_session(entry.session_id, "session_reset")
                    except Exception as e:
                        logger.debug("Session DB operation failed: %s", e)
+        else:
+            was_auto_reset = False
        
        # Create new session
        session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
@ -406,6 +413,7 @@ class SessionStore:
            display_name=source.chat_name,
            platform=source.platform,
            chat_type=source.chat_type,
+            was_auto_reset=was_auto_reset,
        )
        
        self._entries[session_key] = entry
--- a/hermes_cli/init.py
+++ b/hermes_cli/init.py
@ -11,4 +11,4 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.1.0"
+__version__ = "v1.0.0"
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@ -33,7 +33,7 @@ def cprint(text: str):
 # ASCII Art & Branding
 # =========================================================================

-VERSION = "v1.0.0"
+from hermes_cli import __version__ as VERSION

 HERMES_AGENT_LOGO = """[bold #FFD700]██╗  ██╗███████╗██████╗ ███╗   ███╗███████╗███████╗       █████╗  ██████╗ ███████╗███╗   ██╗████████╗[/]
 [bold #FFD700]██║  ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝      ██╔══██╗██╔════╝ ██╔════╝████╗  ██║╚══██╔══╝[/]
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -44,6 +44,8 @@ def run_doctor(args):
    should_fix = getattr(args, 'fix', False)
    
    issues = []
+    manual_issues = []  # issues that can't be auto-fixed
+    fixed_count = 0
    
    print()
    print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
@ -135,8 +137,15 @@ def run_doctor(args):
            check_ok(".env file exists (in project directory)")
        else:
            check_fail("~/.hermes/.env file missing")
-            check_info("Run 'hermes setup' to create one")
-            issues.append("Run 'hermes setup' to create .env")
+            if should_fix:
+                env_path.parent.mkdir(parents=True, exist_ok=True)
+                env_path.touch()
+                check_ok("Created empty ~/.hermes/.env")
+                check_info("Run 'hermes setup' to configure API keys")
+                fixed_count += 1
+            else:
+                check_info("Run 'hermes setup' to create one")
+                issues.append("Run 'hermes setup' to create .env")
    
    # Check ~/.hermes/config.yaml (primary) or project cli-config.yaml (fallback)
    config_path = HERMES_HOME / 'config.yaml'
@ -147,7 +156,17 @@ def run_doctor(args):
        if fallback_config.exists():
            check_ok("cli-config.yaml exists (in project directory)")
        else:
-            check_warn("config.yaml not found", "(using defaults)")
+            example_config = PROJECT_ROOT / 'cli-config.yaml.example'
+            if should_fix and example_config.exists():
+                config_path.parent.mkdir(parents=True, exist_ok=True)
+                shutil.copy2(str(example_config), str(config_path))
+                check_ok("Created ~/.hermes/config.yaml from cli-config.yaml.example")
+                fixed_count += 1
+            elif should_fix:
+                check_warn("config.yaml not found and no example to copy from")
+                manual_issues.append("Create ~/.hermes/config.yaml manually")
+            else:
+                check_warn("config.yaml not found", "(using defaults)")
    
    # =========================================================================
    # Check: Directory structure
@ -159,7 +178,26 @@ def run_doctor(args):
    if hermes_home.exists():
        check_ok("~/.hermes directory exists")
    else:
-        check_warn("~/.hermes not found", "(will be created on first use)")
+        if should_fix:
+            hermes_home.mkdir(parents=True, exist_ok=True)
+            check_ok("Created ~/.hermes directory")
+            fixed_count += 1
+        else:
+            check_warn("~/.hermes not found", "(will be created on first use)")
+    
+    # Check expected subdirectories
+    expected_subdirs = ["cron", "sessions", "logs", "skills", "memories"]
+    for subdir_name in expected_subdirs:
+        subdir_path = hermes_home / subdir_name
+        if subdir_path.exists():
+            check_ok(f"~/.hermes/{subdir_name}/ exists")
+        else:
+            if should_fix:
+                subdir_path.mkdir(parents=True, exist_ok=True)
+                check_ok(f"Created ~/.hermes/{subdir_name}/")
+                fixed_count += 1
+            else:
+                check_warn(f"~/.hermes/{subdir_name}/ not found", "(will be created on first use)")
    
    # Check for SOUL.md persona file
    soul_path = hermes_home / "SOUL.md"
@ -175,14 +213,25 @@ def run_doctor(args):
        check_warn("~/.hermes/SOUL.md not found", "(create it to give Hermes a custom personality)")
        if should_fix:
            soul_path.parent.mkdir(parents=True, exist_ok=True)
-            soul_path.write_text("# Hermes Agent Persona\n\n<!-- Edit this file to customize how Hermes communicates. -->\n", encoding="utf-8")
-            check_ok("Created ~/.hermes/SOUL.md")
+            soul_path.write_text(
+                "# Hermes Agent Persona\n\n"
+                "<!-- Edit this file to customize how Hermes communicates. -->\n\n"
+                "You are Hermes, a helpful AI assistant.\n",
+                encoding="utf-8",
+            )
+            check_ok("Created ~/.hermes/SOUL.md with basic template")
+            fixed_count += 1
    
    logs_dir = PROJECT_ROOT / "logs"
    if logs_dir.exists():
-        check_ok("logs/ directory exists")
+        check_ok("logs/ directory exists (project root)")
    else:
-        check_warn("logs/ not found", "(will be created on first use)")
+        if should_fix:
+            logs_dir.mkdir(parents=True, exist_ok=True)
+            check_ok("Created logs/ directory")
+            fixed_count += 1
+        else:
+            check_warn("logs/ not found", "(will be created on first use)")
    
    # Check memory directory
    memories_dir = hermes_home / "memories"
@ -205,6 +254,7 @@ def run_doctor(args):
        if should_fix:
            memories_dir.mkdir(parents=True, exist_ok=True)
            check_ok("Created ~/.hermes/memories/")
+            fixed_count += 1
    
    # Check SQLite session store
    state_db_path = hermes_home / "state.db"
@ -299,6 +349,7 @@ def run_doctor(args):
    
    openrouter_key = os.getenv("OPENROUTER_API_KEY")
    if openrouter_key:
+        print("  Checking OpenRouter API...", end="", flush=True)
        try:
            import httpx
            response = httpx.get(
@ -307,20 +358,21 @@ def run_doctor(args):
                timeout=10
            )
            if response.status_code == 200:
-                check_ok("OpenRouter API")
+                print(f"\r  {color('✓', Colors.GREEN)} OpenRouter API                          ")
            elif response.status_code == 401:
-                check_fail("OpenRouter API", "(invalid API key)")
+                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)}                ")
                issues.append("Check OPENROUTER_API_KEY in .env")
            else:
-                check_fail("OpenRouter API", f"(HTTP {response.status_code})")
+                print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)}                ")
        except Exception as e:
-            check_fail("OpenRouter API", f"({e})")
+            print(f"\r  {color('✗', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)}                ")
            issues.append("Check network connectivity")
    else:
        check_warn("OpenRouter API", "(not configured)")
    
    anthropic_key = os.getenv("ANTHROPIC_API_KEY")
    if anthropic_key:
+        print("  Checking Anthropic API...", end="", flush=True)
        try:
            import httpx
            response = httpx.get(
@ -332,14 +384,14 @@ def run_doctor(args):
                timeout=10
            )
            if response.status_code == 200:
-                check_ok("Anthropic API")
+                print(f"\r  {color('✓', Colors.GREEN)} Anthropic API                           ")
            elif response.status_code == 401:
-                check_fail("Anthropic API", "(invalid API key)")
+                print(f"\r  {color('✗', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)}                 ")
            else:
-                # Note: Anthropic may not have /models endpoint
-                check_warn("Anthropic API", "(couldn't verify)")
+                msg = "(couldn't verify)"
+                print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)}                 ")
        except Exception as e:
-            check_warn("Anthropic API", f"({e})")
+            print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)}                 ")
    
    # =========================================================================
    # Check: Submodules
@ -440,17 +492,28 @@ def run_doctor(args):
    # Summary
    # =========================================================================
    print()
-    if issues:
-        print(color("─" * 60, Colors.YELLOW))
-        print(color(f"  Found {len(issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
+    remaining_issues = issues + manual_issues
+    if should_fix and fixed_count > 0:
+        print(color("─" * 60, Colors.GREEN))
+        print(color(f"  Fixed {fixed_count} issue(s).", Colors.GREEN, Colors.BOLD), end="")
+        if remaining_issues:
+            print(color(f" {len(remaining_issues)} issue(s) require manual intervention.", Colors.YELLOW, Colors.BOLD))
+        else:
+            print()
        print()
-        for i, issue in enumerate(issues, 1):
+        if remaining_issues:
+            for i, issue in enumerate(remaining_issues, 1):
+                print(f"  {i}. {issue}")
+            print()
+    elif remaining_issues:
+        print(color("─" * 60, Colors.YELLOW))
+        print(color(f"  Found {len(remaining_issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
+        print()
+        for i, issue in enumerate(remaining_issues, 1):
            print(f"  {i}. {issue}")
        print()
-        
-        if should_fix:
-            print(color("  Attempting auto-fix is not yet implemented.", Colors.DIM))
-            print(color("  Please resolve issues manually.", Colors.DIM))
+        if not should_fix:
+            print(color("  Tip: run 'hermes doctor --fix' to auto-fix what's possible.", Colors.DIM))
    else:
        print(color("─" * 60, Colors.GREEN))
        print(color("  All checks passed! 🎉", Colors.GREEN, Colors.BOLD))
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -47,8 +47,66 @@ from hermes_constants import OPENROUTER_BASE_URL
 logger = logging.getLogger(__name__)


+def _has_any_provider_configured() -> bool:
+    """Check if at least one inference provider is usable."""
+    from hermes_cli.config import get_env_path, get_hermes_home
+
+    # Check env vars (may be set by .env or shell)
+    if os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"):
+        return True
+
+    # Check .env file for keys
+    env_file = get_env_path()
+    if env_file.exists():
+        try:
+            for line in env_file.read_text().splitlines():
+                line = line.strip()
+                if line.startswith("#") or "=" not in line:
+                    continue
+                key, _, val = line.partition("=")
+                val = val.strip().strip("'\"")
+                if key.strip() in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY") and val:
+                    return True
+        except Exception:
+            pass
+
+    # Check for Nous Portal OAuth credentials
+    auth_file = get_hermes_home() / "auth.json"
+    if auth_file.exists():
+        try:
+            import json
+            auth = json.loads(auth_file.read_text())
+            active = auth.get("active_provider")
+            if active:
+                state = auth.get("providers", {}).get(active, {})
+                if state.get("access_token") or state.get("refresh_token"):
+                    return True
+        except Exception:
+            pass
+
+    return False
+
+
 def cmd_chat(args):
    """Run interactive chat CLI."""
+    # First-run guard: check if any provider is configured before launching
+    if not _has_any_provider_configured():
+        print()
+        print("It looks like Hermes isn't configured yet -- no API keys or providers found.")
+        print()
+        print("  Run:  hermes setup")
+        print()
+        try:
+            reply = input("Run setup now? [Y/n] ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            reply = "n"
+        if reply in ("", "y", "yes"):
+            cmd_setup(args)
+            return
+        print()
+        print("You can run 'hermes setup' at any time to configure.")
+        sys.exit(1)
+
    # Import and run the CLI
    from cli import main as cli_main
    
@ -219,20 +277,10 @@ def _model_flow_openrouter(config, current_model=""):
        print("API key saved.")
        print()

-    OPENROUTER_MODELS = [
-        "anthropic/claude-opus-4.6",
-        "anthropic/claude-sonnet-4.5",
-        "anthropic/claude-opus-4.5",
-        "openai/gpt-5.2",
-        "openai/gpt-5.2-codex",
-        "google/gemini-3-pro-preview",
-        "google/gemini-3-flash-preview",
-        "z-ai/glm-4.7",
-        "moonshotai/kimi-k2.5",
-        "minimax/minimax-m2.1",
-    ]
+    from hermes_cli.models import model_ids
+    openrouter_models = model_ids()

-    selected = _prompt_model_selection(OPENROUTER_MODELS, current_model=current_model)
+    selected = _prompt_model_selection(openrouter_models, current_model=current_model)
    if selected:
        # Clear any custom endpoint and set provider to openrouter
        if get_env_value("OPENAI_BASE_URL"):
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -0,0 +1,33 @@
+"""
+Canonical list of OpenRouter models offered in CLI and setup wizards.
+
+Add, remove, or reorder entries here — both `hermes setup` and
+`hermes` provider-selection will pick up the change automatically.
+"""
+
+# (model_id, display description shown in menus)
+OPENROUTER_MODELS: list[tuple[str, str]] = [
+    ("anthropic/claude-opus-4.6",       "recommended"),
+    ("anthropic/claude-sonnet-4.5",     ""),
+    ("anthropic/claude-opus-4.5",       ""),
+    ("openai/gpt-5.2",                  ""),
+    ("openai/gpt-5.2-codex",            ""),
+    ("google/gemini-3-pro-preview",     ""),
+    ("google/gemini-3-flash-preview",   ""),
+    ("z-ai/glm-4.7",                    ""),
+    ("moonshotai/kimi-k2.5",            ""),
+    ("minimax/minimax-m2.1",            ""),
+]
+
+
+def model_ids() -> list[str]:
+    """Return just the model-id strings (convenience helper)."""
+    return [mid for mid, _ in OPENROUTER_MODELS]
+
+
+def menu_labels() -> list[str]:
+    """Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
+    labels = []
+    for mid, desc in OPENROUTER_MODELS:
+        labels.append(f"{mid} ({desc})" if desc else mid)
+    return labels
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -611,46 +611,27 @@ def run_setup_wizard(args):
                    save_env_value("LLM_MODEL", custom)
            # else: keep current
        else:
-            # Static list for OpenRouter / fallback
-            model_choices = [
-                "anthropic/claude-opus-4.6 (recommended)",
-                "anthropic/claude-sonnet-4.5",
-                "anthropic/claude-opus-4.5",
-                "openai/gpt-5.2",
-                "openai/gpt-5.2-codex",
-                "google/gemini-3-pro-preview",
-                "google/gemini-3-flash-preview",
-                "z-ai/glm-4.7",
-                "moonshotai/kimi-k2.5",
-                "minimax/minimax-m2.1",
+            # Static list for OpenRouter / fallback (from canonical list)
+            from hermes_cli.models import model_ids, menu_labels
+
+            ids = model_ids()
+            model_choices = menu_labels() + [
                "Custom model",
-                f"Keep current ({current_model})"
+                f"Keep current ({current_model})",
            ]

-            model_idx = prompt_choice("Select default model:", model_choices, 11)
+            keep_idx = len(model_choices) - 1
+            model_idx = prompt_choice("Select default model:", model_choices, keep_idx)

-            model_map = {
-                0: "anthropic/claude-opus-4.6",
-                1: "anthropic/claude-sonnet-4.5",
-                2: "anthropic/claude-opus-4.5",
-                3: "openai/gpt-5.2",
-                4: "openai/gpt-5.2-codex",
-                5: "google/gemini-3-pro-preview",
-                6: "google/gemini-3-flash-preview",
-                7: "z-ai/glm-4.7",
-                8: "moonshotai/kimi-k2.5",
-                9: "minimax/minimax-m2.1",
-            }
-
-            if model_idx in model_map:
-                config['model'] = model_map[model_idx]
-                save_env_value("LLM_MODEL", model_map[model_idx])
-            elif model_idx == 10:  # Custom
+            if model_idx < len(ids):
+                config['model'] = ids[model_idx]
+                save_env_value("LLM_MODEL", ids[model_idx])
+            elif model_idx == len(ids):  # Custom
                custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
                if custom:
                    config['model'] = custom
                    save_env_value("LLM_MODEL", custom)
-            # else: Keep current (model_idx == 11)
+            # else: Keep current
    
    # =========================================================================
    # Step 4: Terminal Backend
--- a/run_agent.py
+++ b/run_agent.py
--- a/tools/approval.py
+++ b/tools/approval.py
@ -281,7 +281,12 @@ def check_dangerous_command(command: str, env_type: str,
                                       approval_callback=approval_callback)

    if choice == "deny":
-        return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."}
+        return {
+            "approved": False,
+            "message": f"BLOCKED: User denied this potentially dangerous command (matched '{description}' pattern). Do NOT retry this command - the user has explicitly rejected it.",
+            "pattern_key": pattern_key,
+            "description": description,
+        }

    if choice == "session":
        approve_session(session_key, pattern_key)
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -51,25 +51,16 @@ import signal
 import subprocess
 import shutil
 import sys
-import asyncio
 import tempfile
 import threading
 import time
 import requests
 from typing import Dict, Any, Optional, List
 from pathlib import Path
-from hermes_constants import OPENROUTER_CHAT_URL
+from agent.auxiliary_client import get_vision_auxiliary_client

 logger = logging.getLogger(__name__)

-# Try to import httpx for async LLM calls
-try:
-    import httpx
-    HTTPX_AVAILABLE = True
-except ImportError:
-    HTTPX_AVAILABLE = False
-
-
 # ============================================================================
 # Configuration
 # ============================================================================
@ -83,8 +74,8 @@ DEFAULT_SESSION_TIMEOUT = 300
 # Max tokens for snapshot content before summarization
 SNAPSHOT_SUMMARIZE_THRESHOLD = 8000

-# Model for task-aware extraction
-EXTRACTION_MODEL = "google/gemini-3-flash-preview"
+# Resolve vision auxiliary client for extraction/vision tasks
+_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client()

 # Track active sessions per task
 # Now stores tuple of (session_name, browserbase_session_id, cdp_url)
@ -782,87 +773,49 @@ def _run_browser_command(
        return {"success": False, "error": str(e)}


-async def _extract_relevant_content(
+def _extract_relevant_content(
    snapshot_text: str,
    user_task: Optional[str] = None
 ) -> str:
+    """Use LLM to extract relevant content from a snapshot based on the user's task.
+
+    Falls back to simple truncation when no auxiliary vision model is configured.
    """
-    Use LLM to extract relevant content from a snapshot based on the user's task.
-    
-    This provides task-aware summarization that preserves meaningful text content
-    (paragraphs, prices, descriptions) relevant to what the user is trying to accomplish.
-    
-    Args:
-        snapshot_text: The full snapshot text
-        user_task: The user's current task/goal (optional)
-        
-    Returns:
-        Summarized/extracted content
-    """
-    if not HTTPX_AVAILABLE:
-        # Fall back to simple truncation
+    if _aux_vision_client is None or EXTRACTION_MODEL is None:
        return _truncate_snapshot(snapshot_text)
-    
-    # Get API key
-    api_key = os.environ.get("OPENROUTER_API_KEY")
-    if not api_key:
-        return _truncate_snapshot(snapshot_text)
-    
-    # Build extraction prompt
+
    if user_task:
-        extraction_prompt = f"""You are a content extractor for a browser automation agent.
-
-The user's task is: {user_task}
-
-Given the following page snapshot (accessibility tree representation), extract and summarize the most relevant information for completing this task. Focus on:
-1. Interactive elements (buttons, links, inputs) that might be needed
-2. Text content relevant to the task (prices, descriptions, headings, important info)
-3. Navigation structure if relevant
-
-Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.
-
-Page Snapshot:
-{snapshot_text}
-
-Provide a concise summary that preserves actionable information and relevant content."""
+        extraction_prompt = (
+            f"You are a content extractor for a browser automation agent.\n\n"
+            f"The user's task is: {user_task}\n\n"
+            f"Given the following page snapshot (accessibility tree representation), "
+            f"extract and summarize the most relevant information for completing this task. Focus on:\n"
+            f"1. Interactive elements (buttons, links, inputs) that might be needed\n"
+            f"2. Text content relevant to the task (prices, descriptions, headings, important info)\n"
+            f"3. Navigation structure if relevant\n\n"
+            f"Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.\n\n"
+            f"Page Snapshot:\n{snapshot_text}\n\n"
+            f"Provide a concise summary that preserves actionable information and relevant content."
+        )
    else:
-        extraction_prompt = f"""Summarize this page snapshot, preserving:
-1. All interactive elements with their ref IDs (like [ref=e5])
-2. Key text content and headings
-3. Important information visible on the page
-
-Page Snapshot:
-{snapshot_text}
-
-Provide a concise summary focused on interactive elements and key content."""
+        extraction_prompt = (
+            f"Summarize this page snapshot, preserving:\n"
+            f"1. All interactive elements with their ref IDs (like [ref=e5])\n"
+            f"2. Key text content and headings\n"
+            f"3. Important information visible on the page\n\n"
+            f"Page Snapshot:\n{snapshot_text}\n\n"
+            f"Provide a concise summary focused on interactive elements and key content."
+        )

    try:
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            response = await client.post(
-                OPENROUTER_CHAT_URL,
-                headers={
-                    "Authorization": f"Bearer {api_key}",
-                    "Content-Type": "application/json"
-                },
-                json={
-                    "model": EXTRACTION_MODEL,
-                    "messages": [
-                        {"role": "user", "content": extraction_prompt}
-                    ],
-                    "max_tokens": 4000,
-                    "temperature": 0.1
-                }
-            )
-            
-            if response.status_code == 200:
-                result = response.json()
-                return result["choices"][0]["message"]["content"]
-            else:
-                # Fall back to truncation on API error
-                return _truncate_snapshot(snapshot_text)
-                
+        response = _aux_vision_client.chat.completions.create(
+            model=EXTRACTION_MODEL,
+            messages=[{"role": "user", "content": extraction_prompt}],
+            max_tokens=4000,
+            temperature=0.1,
+        )
+        return response.choices[0].message.content
    except Exception:
-        # Fall back to truncation on any error
        return _truncate_snapshot(snapshot_text)


@ -991,16 +944,7 @@ def browser_snapshot(
        
        # Check if snapshot needs summarization
        if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD and user_task:
-            # Run async extraction
-            try:
-                loop = asyncio.get_event_loop()
-            except RuntimeError:
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-            
-            snapshot_text = loop.run_until_complete(
-                _extract_relevant_content(snapshot_text, user_task)
-            )
+            snapshot_text = _extract_relevant_content(snapshot_text, user_task)
        elif len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
            snapshot_text = _truncate_snapshot(snapshot_text)
        
@ -1286,12 +1230,12 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
    
    effective_task_id = task_id or "default"
    
-    # Check for OpenRouter API key
-    api_key = os.environ.get("OPENROUTER_API_KEY")
-    if not api_key:
+    # Check auxiliary vision client
+    if _aux_vision_client is None or EXTRACTION_MODEL is None:
        return json.dumps({
            "success": False,
-            "error": "OPENROUTER_API_KEY not set. Vision analysis requires this API key."
+            "error": "Browser vision unavailable: no auxiliary vision model configured. "
+                     "Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
        }, ensure_ascii=False)
    
    # Create a temporary file for the screenshot
@ -1325,110 +1269,36 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
        image_base64 = base64.b64encode(image_data).decode("ascii")
        data_url = f"data:image/png;base64,{image_base64}"
        
-        # Prepare the vision prompt
-        vision_prompt = f"""You are analyzing a screenshot of a web browser.
+        vision_prompt = (
+            f"You are analyzing a screenshot of a web browser.\n\n"
+            f"User's question: {question}\n\n"
+            f"Provide a detailed and helpful answer based on what you see in the screenshot. "
+            f"If there are interactive elements, describe them. If there are verification challenges "
+            f"or CAPTCHAs, describe what type they are and what action might be needed. "
+            f"Focus on answering the user's specific question."
+        )

-User's question: {question}
-
-Provide a detailed and helpful answer based on what you see in the screenshot. 
-If there are interactive elements, describe them. If there are verification challenges 
-or CAPTCHAs, describe what type they are and what action might be needed.
-Focus on answering the user's specific question."""
-
-        # Call OpenRouter/Gemini for vision analysis
-        if HTTPX_AVAILABLE:
-            import asyncio
-            
-            async def analyze_screenshot():
-                async with httpx.AsyncClient(timeout=60.0) as client:
-                    response = await client.post(
-                        OPENROUTER_CHAT_URL,
-                        headers={
-                            "Authorization": f"Bearer {api_key}",
-                            "Content-Type": "application/json"
-                        },
-                        json={
-                            "model": "google/gemini-3-flash-preview",
-                            "messages": [
-                                {
-                                    "role": "user",
-                                    "content": [
-                                        {"type": "text", "text": vision_prompt},
-                                        {
-                                            "type": "image_url",
-                                            "image_url": {"url": data_url}
-                                        }
-                                    ]
-                                }
-                            ],
-                            "max_tokens": 2000,
-                            "temperature": 0.1
-                        }
-                    )
-                    
-                    if response.status_code != 200:
-                        return {
-                            "success": False,
-                            "error": f"Vision API error: {response.status_code} - {response.text[:200]}"
-                        }
-                    
-                    result_data = response.json()
-                    analysis = result_data["choices"][0]["message"]["content"]
-                    return {
-                        "success": True,
-                        "analysis": analysis
-                    }
-            
-            # Run the async function
-            try:
-                loop = asyncio.get_event_loop()
-            except RuntimeError:
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-            
-            vision_result = loop.run_until_complete(analyze_screenshot())
-            return json.dumps(vision_result, ensure_ascii=False)
-        
-        else:
-            # Fallback: use synchronous requests
-            response = requests.post(
-                OPENROUTER_CHAT_URL,
-                headers={
-                    "Authorization": f"Bearer {api_key}",
-                    "Content-Type": "application/json"
-                },
-                json={
-                    "model": "google/gemini-3-flash-preview",
-                    "messages": [
-                        {
-                            "role": "user",
-                            "content": [
-                                {"type": "text", "text": vision_prompt},
-                                {
-                                    "type": "image_url",
-                                    "image_url": {"url": data_url}
-                                }
-                            ]
-                        }
+        # Use the sync auxiliary vision client directly
+        response = _aux_vision_client.chat.completions.create(
+            model=EXTRACTION_MODEL,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": vision_prompt},
+                        {"type": "image_url", "image_url": {"url": data_url}},
                    ],
-                    "max_tokens": 2000,
-                    "temperature": 0.1
-                },
-                timeout=60
-            )
-            
-            if response.status_code != 200:
-                return json.dumps({
-                    "success": False,
-                    "error": f"Vision API error: {response.status_code} - {response.text[:200]}"
-                }, ensure_ascii=False)
-            
-            result_data = response.json()
-            analysis = result_data["choices"][0]["message"]["content"]
-            return json.dumps({
-                "success": True,
-                "analysis": analysis
-            }, ensure_ascii=False)
+                }
+            ],
+            max_tokens=2000,
+            temperature=0.1,
+        )
+        
+        analysis = response.choices[0].message.content
+        return json.dumps({
+            "success": True,
+            "analysis": analysis,
+        }, ensure_ascii=False)
    
    except Exception as e:
        return json.dumps({
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@ -22,9 +22,19 @@ import os
 import logging
 from typing import Dict, Any, List, Optional

-from tools.openrouter_client import get_async_client as _get_client
+from openai import AsyncOpenAI, OpenAI

-SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
+from agent.auxiliary_client import get_text_auxiliary_client
+
+# Resolve the auxiliary client at import time so we have the model slug.
+# We build an AsyncOpenAI from the same credentials for async summarization.
+_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
+_async_aux_client: AsyncOpenAI | None = None
+if _aux_client is not None:
+    _async_aux_client = AsyncOpenAI(
+        api_key=_aux_client.api_key,
+        base_url=str(_aux_client.base_url),
+    )
 MAX_SESSION_CHARS = 100_000
 MAX_SUMMARY_TOKENS = 2000

@ -126,11 +136,15 @@ async def _summarize_session(
        f"Summarize this conversation with focus on: {query}"
    )

+    if _async_aux_client is None or _SUMMARIZER_MODEL is None:
+        logging.warning("No auxiliary model available for session summarization")
+        return None
+
    max_retries = 3
    for attempt in range(max_retries):
        try:
-            response = await _get_client().chat.completions.create(
-                model=SUMMARIZER_MODEL,
+            response = await _async_aux_client.chat.completions.create(
+                model=_SUMMARIZER_MODEL,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt},
@ -252,8 +266,8 @@ def session_search(


 def check_session_search_requirements() -> bool:
-    """Requires SQLite state database and OpenRouter API key."""
-    if not os.getenv("OPENROUTER_API_KEY"):
+    """Requires SQLite state database and an auxiliary text model."""
+    if _async_aux_client is None:
        return False
    try:
        from hermes_state import DEFAULT_DB_PATH
@ -316,5 +330,4 @@ registry.register(
        limit=args.get("limit", 3),
        db=kw.get("db")),
    check_fn=check_session_search_requirements,
-    requires_env=["OPENROUTER_API_KEY"],
 )
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -359,7 +359,6 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p

 # Global state for environment lifecycle management
 _active_environments: Dict[str, Any] = {}
-_task_workdirs: Dict[str, str] = {}  # Maps task_id to working directory
 _last_activity: Dict[str, float] = {}
 _env_lock = threading.Lock()
 _creation_locks: Dict[str, threading.Lock] = {}  # Per-task locks for sandbox creation
@ -530,7 +529,6 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
            if current_time - last_time > lifetime_seconds:
                env = _active_environments.pop(task_id, None)
                _last_activity.pop(task_id, None)
-                _task_workdirs.pop(task_id, None)
                if env is not None:
                    envs_to_stop.append((task_id, env))

@ -609,7 +607,7 @@ def get_active_environments_info() -> Dict[str, Any]:
    info = {
        "count": len(_active_environments),
        "task_ids": list(_active_environments.keys()),
-        "workdirs": dict(_task_workdirs),
+        "workdirs": {},
    }
    
    # Calculate total disk usage
@ -632,7 +630,7 @@ def get_active_environments_info() -> Dict[str, Any]:

 def cleanup_all_environments():
    """Clean up ALL active environments. Use with caution."""
-    global _active_environments, _last_activity, _task_workdirs
+    global _active_environments, _last_activity
    
    task_ids = list(_active_environments.keys())
    cleaned = 0
@ -661,7 +659,7 @@ def cleanup_all_environments():

 def cleanup_vm(task_id: str):
    """Manually clean up a specific environment by task_id."""
-    global _active_environments, _last_activity, _task_workdirs
+    global _active_environments, _last_activity

    # Remove from tracking dicts while holding the lock, but defer the
    # actual (potentially slow) env.cleanup() call to outside the lock
@ -669,7 +667,6 @@ def cleanup_vm(task_id: str):
    env = None
    with _env_lock:
        env = _active_environments.pop(task_id, None)
-        _task_workdirs.pop(task_id, None)
        _last_activity.pop(task_id, None)

    # Clean up per-task creation lock
@ -782,17 +779,6 @@ def terminal_tool(
        default_timeout = config["timeout"]
        effective_timeout = timeout or default_timeout

-        # For local environment in batch mode, create a unique subdirectory per task
-        # This prevents parallel tasks from overwriting each other's files
-        # In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories
-        if env_type == "local" and not os.getenv("HERMES_QUIET"):
-            with _env_lock:
-                if effective_task_id not in _task_workdirs:
-                    task_workdir = Path(cwd) / f"hermes-{effective_task_id}-{uuid.uuid4().hex[:8]}"
-                    task_workdir.mkdir(parents=True, exist_ok=True)
-                    _task_workdirs[effective_task_id] = str(task_workdir)
-                cwd = _task_workdirs[effective_task_id]
-
        # Start cleanup thread
        _start_cleanup_thread()

@ -874,11 +860,16 @@ def terminal_tool(
                        "description": approval.get("description", "dangerous command"),
                        "pattern_key": approval.get("pattern_key", ""),
                    }, ensure_ascii=False)
-                # Command was blocked - return informative message
+                # Command was blocked - include the pattern category so the caller knows why
+                desc = approval.get("description", "potentially dangerous operation")
+                fallback_msg = (
+                    f"Command denied: matches '{desc}' pattern. "
+                    "Use the approval prompt to allow it, or rephrase the command."
+                )
                return json.dumps({
                    "output": "",
                    "exit_code": -1,
-                    "error": approval.get("message", "Command denied - potentially dangerous operation"),
+                    "error": approval.get("message", fallback_msg),
                    "status": "blocked"
                }, ensure_ascii=False)

@ -996,11 +987,17 @@ def terminal_tool(
            # Add helpful message for sudo failures in messaging context
            output = _handle_sudo_failure(output, env_type)
            
-            # Truncate output if too long
+            # Truncate output if too long, keeping both head and tail
            MAX_OUTPUT_CHARS = 50000
            if len(output) > MAX_OUTPUT_CHARS:
-                truncated_notice = f"\n\n... [OUTPUT TRUNCATED - showing last {MAX_OUTPUT_CHARS} chars of {len(output)} total] ..."
-                output = truncated_notice + output[-MAX_OUTPUT_CHARS:]
+                head_chars = int(MAX_OUTPUT_CHARS * 0.4)  # 40% head (error messages often appear early)
+                tail_chars = MAX_OUTPUT_CHARS - head_chars  # 60% tail (most recent/relevant output)
+                omitted = len(output) - head_chars - tail_chars
+                truncated_notice = (
+                    f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
+                    f"out of {len(output)} total] ...\n\n"
+                )
+                output = output[:head_chars] + truncated_notice + output[-tail_chars:]

            return json.dumps({
                "output": output.strip() if output else "",
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@ -36,13 +36,20 @@ import base64
 from pathlib import Path
 from typing import Dict, Any, Optional
 import httpx
-from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key
+from openai import AsyncOpenAI
+from agent.auxiliary_client import get_vision_auxiliary_client
 from tools.debug_helpers import DebugSession

 logger = logging.getLogger(__name__)

-# Configuration for vision processing
-DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview"
+# Resolve vision auxiliary client at module level; build an async wrapper.
+_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
+_aux_async_client: AsyncOpenAI | None = None
+if _aux_sync_client is not None:
+    _aux_async_client = AsyncOpenAI(
+        api_key=_aux_sync_client.api_key,
+        base_url=str(_aux_sync_client.base_url),
+    )

 _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")

@ -230,9 +237,13 @@ async def vision_analyze_tool(
        logger.info("Analyzing image: %s", image_url[:60])
        logger.info("User prompt: %s", user_prompt[:100])
        
-        # Check API key availability
-        if not os.getenv("OPENROUTER_API_KEY"):
-            raise ValueError("OPENROUTER_API_KEY environment variable not set")
+        # Check auxiliary vision client availability
+        if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
+            return json.dumps({
+                "success": False,
+                "analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
+                            "Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
+            }, indent=2, ensure_ascii=False)
        
        # Determine if this is a local file path or a remote URL
        local_path = Path(image_url)
@ -291,18 +302,12 @@ async def vision_analyze_tool(
        
        logger.info("Processing image with %s...", model)
        
-        # Call the vision API with reasoning enabled
-        response = await _get_openrouter_client().chat.completions.create(
+        # Call the vision API
+        response = await _aux_async_client.chat.completions.create(
            model=model,
            messages=messages,
-            temperature=0.1,  # Low temperature for consistent analysis
-            max_tokens=2000,  # Generous limit for detailed analysis
-            extra_body={
-                "reasoning": {
-                    "enabled": True,
-                    "effort": "xhigh"
-                }
-            }
+            temperature=0.1,
+            max_tokens=2000,
        )
        
        # Extract the analysis
@ -353,13 +358,8 @@ async def vision_analyze_tool(


 def check_vision_requirements() -> bool:
-    """
-    Check if all requirements for vision tools are met.
-    
-    Returns:
-        bool: True if requirements are met, False otherwise
-    """
-    return check_openrouter_api_key()
+    """Check if an auxiliary vision model is available."""
+    return _aux_async_client is not None


 def get_debug_session_info() -> Dict[str, Any]:
@ -379,16 +379,15 @@ if __name__ == "__main__":
    print("👁️ Vision Tools Module")
    print("=" * 40)
    
-    # Check if API key is available
-    api_available = check_openrouter_api_key()
+    # Check if vision model is available
+    api_available = check_vision_requirements()
    
    if not api_available:
-        print("❌ OPENROUTER_API_KEY environment variable not set")
-        print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
-        print("Get API key at: https://openrouter.ai/")
+        print("❌ No auxiliary vision model available")
+        print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
        exit(1)
    else:
-        print("✅ OpenRouter API key found")
+        print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
    
    print("🛠️ Vision tools ready for use!")
    print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
@ -455,7 +454,8 @@ def _handle_vision_analyze(args, **kw):
    image_url = args.get("image_url", "")
    question = args.get("question", "")
    full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
-    return vision_analyze_tool(image_url, full_prompt, "google/gemini-3-flash-preview")
+    model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
+    return vision_analyze_tool(image_url, full_prompt, model)


 registry.register(
@ -464,6 +464,5 @@ registry.register(
    schema=VISION_ANALYZE_SCHEMA,
    handler=_handle_vision_analyze,
    check_fn=check_vision_requirements,
-    requires_env=["OPENROUTER_API_KEY"],
    is_async=True,
 )
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -47,7 +47,8 @@ import re
 import asyncio
 from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
-from tools.openrouter_client import get_async_client as _get_openrouter_client
+from openai import AsyncOpenAI
+from agent.auxiliary_client import get_text_auxiliary_client
 from tools.debug_helpers import DebugSession

 logger = logging.getLogger(__name__)
@ -64,9 +65,17 @@ def _get_firecrawl_client():
        _firecrawl_client = Firecrawl(api_key=api_key)
    return _firecrawl_client

-DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000

+# Resolve auxiliary text client at module level; build an async wrapper.
+_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
+_aux_async_client: AsyncOpenAI | None = None
+if _aux_sync_client is not None:
+    _aux_async_client = AsyncOpenAI(
+        api_key=_aux_sync_client.api_key,
+        base_url=str(_aux_sync_client.base_url),
+    )
+
 _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")


@ -223,7 +232,10 @@ Create a markdown summary that captures all key information in a well-organized,

    for attempt in range(max_retries):
        try:
-            response = await _get_openrouter_client().chat.completions.create(
+            if _aux_async_client is None:
+                logger.warning("No auxiliary model available for web content processing")
+                return None
+            response = await _aux_async_client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
@ -231,12 +243,6 @@ Create a markdown summary that captures all key information in a well-organized,
                ],
                temperature=0.1,
                max_tokens=max_tokens,
-                extra_body={
-                    "reasoning": {
-                        "enabled": True,
-                        "effort": "xhigh"
-                    }
-                }
            )
            return response.choices[0].message.content.strip()
        except Exception as api_error:
@ -342,7 +348,14 @@ Synthesize these into ONE cohesive, comprehensive summary that:
 Create a single, unified markdown summary."""

    try:
-        response = await _get_openrouter_client().chat.completions.create(
+        if _aux_async_client is None:
+            logger.warning("No auxiliary model for synthesis, concatenating summaries")
+            fallback = "\n\n".join(summaries)
+            if len(fallback) > max_output_size:
+                fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
+            return fallback
+
+        response = await _aux_async_client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
@ -350,12 +363,6 @@ Create a single, unified markdown summary."""
            ],
            temperature=0.1,
            max_tokens=4000,
-            extra_body={
-                "reasoning": {
-                    "enabled": True,
-                    "effort": "xhigh"
-                }
-            }
        )
        final_summary = response.choices[0].message.content.strip()
        
@ -677,8 +684,8 @@ async def web_extract_tool(
        debug_call_data["pages_extracted"] = pages_extracted
        debug_call_data["original_response_size"] = len(json.dumps(response))
        
-        # Process each result with LLM if enabled
-        if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
+        # Process each result with LLM if enabled and auxiliary client is available
+        if use_llm_processing and _aux_async_client is not None:
            logger.info("Processing extracted content with LLM (parallel)...")
            debug_call_data["processing_applied"].append("llm_processing")
            
@ -744,8 +751,8 @@ async def web_extract_tool(
                else:
                    logger.warning("%s (no content to process)", url)
        else:
-            if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
-                logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
+            if use_llm_processing and _aux_async_client is None:
+                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
                debug_call_data["processing_applied"].append("llm_processing_unavailable")
            
            # Print summary of extracted pages for debugging (original behavior)
@ -973,8 +980,8 @@ async def web_crawl_tool(
        debug_call_data["pages_crawled"] = pages_crawled
        debug_call_data["original_response_size"] = len(json.dumps(response))
        
-        # Process each result with LLM if enabled
-        if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
+        # Process each result with LLM if enabled and auxiliary client is available
+        if use_llm_processing and _aux_async_client is not None:
            logger.info("Processing crawled content with LLM (parallel)...")
            debug_call_data["processing_applied"].append("llm_processing")
            
@ -1040,8 +1047,8 @@ async def web_crawl_tool(
                else:
                    logger.warning("%s (no content to process)", page_url)
        else:
-            if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
-                logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
+            if use_llm_processing and _aux_async_client is None:
+                logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
                debug_call_data["processing_applied"].append("llm_processing_unavailable")
            
            # Print summary of crawled pages for debugging (original behavior)
@ -1096,14 +1103,9 @@ def check_firecrawl_api_key() -> bool:
    return bool(os.getenv("FIRECRAWL_API_KEY"))


-def check_nous_api_key() -> bool:
-    """
-    Check if the Nous Research API key is available in environment variables.
-    
-    Returns:
-        bool: True if API key is set, False otherwise
-    """
-    return bool(os.getenv("OPENROUTER_API_KEY"))
+def check_auxiliary_model() -> bool:
+    """Check if an auxiliary text model is available for LLM content processing."""
+    return _aux_async_client is not None


 def get_debug_session_info() -> Dict[str, Any]:
@ -1120,7 +1122,7 @@ if __name__ == "__main__":
    
    # Check if API keys are available
    firecrawl_available = check_firecrawl_api_key()
-    nous_available = check_nous_api_key()
+    nous_available = check_auxiliary_model()
    
    if not firecrawl_available:
        print("❌ FIRECRAWL_API_KEY environment variable not set")
@ -1130,12 +1132,11 @@ if __name__ == "__main__":
        print("✅ Firecrawl API key found")
    
    if not nous_available:
-        print("❌ OPENROUTER_API_KEY environment variable not set")
-        print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")  
-        print("Get API key at: https://inference-api.nousresearch.com/")
-        print("⚠️  Without Nous API key, LLM content processing will be disabled")
+        print("❌ No auxiliary model available for LLM content processing")
+        print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY")
+        print("⚠️  Without an auxiliary model, LLM content processing will be disabled")
    else:
-        print("✅ Nous Research API key found")
+        print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
    
    if not firecrawl_available:
        exit(1)
@ -1143,7 +1144,7 @@ if __name__ == "__main__":
    print("🛠️  Web tools ready for use!")
    
    if nous_available:
-        print("🧠 LLM content processing available with Gemini 3 Flash Preview via OpenRouter")
+        print(f"🧠 LLM content processing available with {DEFAULT_SUMMARIZER_MODEL}")
        print(f"   Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars")
    
    # Show debug mode status