diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
new file mode 100644
index 0000000000..3546e6bdb8
--- /dev/null
+++ b/agent/auxiliary_client.py
@@ -0,0 +1,128 @@
+"""Shared auxiliary OpenAI client for cheap/fast side tasks.
+
+Provides a single resolution chain so every consumer (context compression,
+session search, web extraction, vision analysis, browser vision) picks up
+the best available backend without duplicating fallback logic.
+
+Resolution order for text tasks:
+ 1. OpenRouter (OPENROUTER_API_KEY)
+ 2. Nous Portal (~/.hermes/auth.json active provider)
+ 3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
+ 4. None
+
+Resolution order for vision/multimodal tasks:
+ 1. OpenRouter
+ 2. Nous Portal
+ 3. None (custom endpoints can't substitute for Gemini multimodal)
+"""
+
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Optional, Tuple
+
+from openai import OpenAI
+
+from hermes_constants import OPENROUTER_BASE_URL
+
+logger = logging.getLogger(__name__)
+
+# Default auxiliary models per provider
+_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
+_NOUS_MODEL = "gemini-3-flash"
+_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
+_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
+
+
+def _read_nous_auth() -> Optional[dict]:
+ """Read and validate ~/.hermes/auth.json for an active Nous provider.
+
+ Returns the provider state dict if Nous is active with tokens,
+ otherwise None.
+ """
+ try:
+ if not _AUTH_JSON_PATH.is_file():
+ return None
+ data = json.loads(_AUTH_JSON_PATH.read_text())
+ if data.get("active_provider") != "nous":
+ return None
+ provider = data.get("providers", {}).get("nous", {})
+ # Must have at least an access_token or agent_key
+ if not provider.get("agent_key") and not provider.get("access_token"):
+ return None
+ return provider
+ except Exception as exc:
+ logger.debug("Could not read Nous auth: %s", exc)
+ return None
+
+
+def _nous_api_key(provider: dict) -> str:
+ """Extract the best API key from a Nous provider state dict."""
+ return provider.get("agent_key") or provider.get("access_token", "")
+
+
+def _nous_base_url() -> str:
+ """Resolve the Nous inference base URL from env or default."""
+ return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
+
+
+# ── Public API ──────────────────────────────────────────────────────────────
+
+def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
+ """Return (client, model_slug) for text-only auxiliary tasks.
+
+ Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
+ """
+ # 1. OpenRouter
+ or_key = os.getenv("OPENROUTER_API_KEY")
+ if or_key:
+ logger.debug("Auxiliary text client: OpenRouter")
+ return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
+
+ # 2. Nous Portal
+ nous = _read_nous_auth()
+ if nous:
+ logger.debug("Auxiliary text client: Nous Portal")
+ return (
+ OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
+ _NOUS_MODEL,
+ )
+
+ # 3. Custom endpoint (both base URL and key must be set)
+ custom_base = os.getenv("OPENAI_BASE_URL")
+ custom_key = os.getenv("OPENAI_API_KEY")
+ if custom_base and custom_key:
+ model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
+ logger.debug("Auxiliary text client: custom endpoint (%s)", model)
+ return OpenAI(api_key=custom_key, base_url=custom_base), model
+
+ # 4. Nothing available
+ logger.debug("Auxiliary text client: none available")
+ return None, None
+
+
+def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
+ """Return (client, model_slug) for vision/multimodal auxiliary tasks.
+
+ Only OpenRouter and Nous Portal qualify — custom endpoints cannot
+ substitute for Gemini multimodal.
+ """
+ # 1. OpenRouter
+ or_key = os.getenv("OPENROUTER_API_KEY")
+ if or_key:
+ logger.debug("Auxiliary vision client: OpenRouter")
+ return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
+
+ # 2. Nous Portal
+ nous = _read_nous_auth()
+ if nous:
+ logger.debug("Auxiliary vision client: Nous Portal")
+ return (
+ OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
+ _NOUS_MODEL,
+ )
+
+ # 3. Nothing suitable
+ logger.debug("Auxiliary vision client: none available")
+ return None, None
diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 7a8225cbb4..8f072a37a1 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -9,13 +9,11 @@ import logging
import os
from typing import Any, Dict, List
-from openai import OpenAI
-
+from agent.auxiliary_client import get_text_auxiliary_client
from agent.model_metadata import (
get_model_context_length,
estimate_messages_tokens_rough,
)
-from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
@@ -31,7 +29,6 @@ class ContextCompressor:
self,
model: str,
threshold_percent: float = 0.85,
- summary_model: str = "google/gemini-3-flash-preview",
protect_first_n: int = 3,
protect_last_n: int = 4,
summary_target_tokens: int = 500,
@@ -39,7 +36,6 @@ class ContextCompressor:
):
self.model = model
self.threshold_percent = threshold_percent
- self.summary_model = summary_model
self.protect_first_n = protect_first_n
self.protect_last_n = protect_last_n
self.summary_target_tokens = summary_target_tokens
@@ -53,8 +49,7 @@ class ContextCompressor:
self.last_completion_tokens = 0
self.last_total_tokens = 0
- api_key = os.getenv("OPENROUTER_API_KEY", "")
- self.client = OpenAI(api_key=api_key, base_url=OPENROUTER_BASE_URL) if api_key else None
+ self.client, self.summary_model = get_text_auxiliary_client()
def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response."""
@@ -155,6 +150,26 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
if not self.quiet_mode:
print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
+
+ # Truncation fallback when no auxiliary model is available
+ if self.client is None:
+ print("⚠️ Context compression: no auxiliary model available. Falling back to message truncation.")
+ # Keep system message(s) at the front and the protected tail;
+ # simply drop the oldest non-system messages until under threshold.
+ kept = []
+ for msg in messages:
+ if msg.get("role") == "system":
+ kept.append(msg.copy())
+ else:
+ break
+ tail = messages[-self.protect_last_n:]
+ kept.extend(m.copy() for m in tail)
+ self.compression_count += 1
+ if not self.quiet_mode:
+ print(f" ✂️ Truncated: {len(messages)} → {len(kept)} messages (dropped middle turns)")
+ return kept
+
+ if not self.quiet_mode:
print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
summary = self._generate_summary(turns_to_summarize)
diff --git a/agent/display.py b/agent/display.py
index bed75e3062..7320cb7ea6 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -4,11 +4,16 @@ Pure display functions and classes with no AIAgent dependency.
Used by AIAgent._execute_tool_calls for CLI feedback.
"""
+import json
import os
import random
import threading
import time
+# ANSI escape codes for coloring tool failure indicators
+_RED = "\033[31m"
+_RESET = "\033[0m"
+
# =========================================================================
# Tool preview (one-line summary of a tool call's primary argument)
@@ -242,12 +247,46 @@ KAWAII_GENERIC = [
# Cute tool message (completion line that replaces the spinner)
# =========================================================================
-def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
+def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
+ """Inspect a tool result string for signs of failure.
+
+ Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
+ like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
+ failures. On success, returns ``(False, "")``.
+ """
+ if result is None:
+ return False, ""
+
+ if tool_name == "terminal":
+ try:
+ data = json.loads(result)
+ exit_code = data.get("exit_code")
+ if exit_code is not None and exit_code != 0:
+ return True, f" [exit {exit_code}]"
+ except (json.JSONDecodeError, TypeError, AttributeError):
+ pass
+ return False, ""
+
+ # Generic heuristic for non-terminal tools
+ lower = result[:500].lower()
+ if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
+ return True, " [error]"
+
+ return False, ""
+
+
+def get_cute_tool_message(
+ tool_name: str, args: dict, duration: float, result: str | None = None,
+) -> str:
"""Generate a formatted tool completion line for CLI quiet mode.
Format: ``| {emoji} {verb:9} {detail} {duration}``
+
+ When *result* is provided the line is checked for failure indicators.
+ Failed tool calls get a red prefix and an informational suffix.
"""
dur = f"{duration:.1f}s"
+ is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
def _trunc(s, n=40):
s = str(s)
@@ -257,105 +296,111 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
p = str(p)
return ("..." + p[-(n-3):]) if len(p) > n else p
+ def _wrap(line: str) -> str:
+ """Apply red coloring and failure suffix when the tool failed."""
+ if not is_failure:
+ return line
+ return f"{_RED}{line}{failure_suffix}{_RESET}"
+
if tool_name == "web_search":
- return f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}"
+ return _wrap(f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}")
if tool_name == "web_extract":
urls = args.get("urls", [])
if urls:
url = urls[0] if isinstance(urls, list) else str(urls)
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
- return f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}"
- return f"┊ 📄 fetch pages {dur}"
+ return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}")
+ return _wrap(f"┊ 📄 fetch pages {dur}")
if tool_name == "web_crawl":
url = args.get("url", "")
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
- return f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}"
+ return _wrap(f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}")
if tool_name == "terminal":
- return f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}"
+ return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}")
if tool_name == "process":
action = args.get("action", "?")
sid = args.get("session_id", "")[:12]
labels = {"list": "ls processes", "poll": f"poll {sid}", "log": f"log {sid}",
"wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"}
- return f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}"
+ return _wrap(f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}")
if tool_name == "read_file":
- return f"┊ 📖 read {_path(args.get('path', ''))} {dur}"
+ return _wrap(f"┊ 📖 read {_path(args.get('path', ''))} {dur}")
if tool_name == "write_file":
- return f"┊ ✍️ write {_path(args.get('path', ''))} {dur}"
+ return _wrap(f"┊ ✍️ write {_path(args.get('path', ''))} {dur}")
if tool_name == "patch":
- return f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}"
+ return _wrap(f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}")
if tool_name == "search_files":
pattern = _trunc(args.get("pattern", ""), 35)
target = args.get("target", "content")
verb = "find" if target == "files" else "grep"
- return f"┊ 🔎 {verb:9} {pattern} {dur}"
+ return _wrap(f"┊ 🔎 {verb:9} {pattern} {dur}")
if tool_name == "browser_navigate":
url = args.get("url", "")
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
- return f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}"
+ return _wrap(f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}")
if tool_name == "browser_snapshot":
mode = "full" if args.get("full") else "compact"
- return f"┊ 📸 snapshot {mode} {dur}"
+ return _wrap(f"┊ 📸 snapshot {mode} {dur}")
if tool_name == "browser_click":
- return f"┊ 👆 click {args.get('ref', '?')} {dur}"
+ return _wrap(f"┊ 👆 click {args.get('ref', '?')} {dur}")
if tool_name == "browser_type":
- return f"┊ ⌨️ type \"{_trunc(args.get('text', ''), 30)}\" {dur}"
+ return _wrap(f"┊ ⌨️ type \"{_trunc(args.get('text', ''), 30)}\" {dur}")
if tool_name == "browser_scroll":
d = args.get("direction", "down")
arrow = {"down": "↓", "up": "↑", "right": "→", "left": "←"}.get(d, "↓")
- return f"┊ {arrow} scroll {d} {dur}"
+ return _wrap(f"┊ {arrow} scroll {d} {dur}")
if tool_name == "browser_back":
- return f"┊ ◀️ back {dur}"
+ return _wrap(f"┊ ◀️ back {dur}")
if tool_name == "browser_press":
- return f"┊ ⌨️ press {args.get('key', '?')} {dur}"
+ return _wrap(f"┊ ⌨️ press {args.get('key', '?')} {dur}")
if tool_name == "browser_close":
- return f"┊ 🚪 close browser {dur}"
+ return _wrap(f"┊ 🚪 close browser {dur}")
if tool_name == "browser_get_images":
- return f"┊ 🖼️ images extracting {dur}"
+ return _wrap(f"┊ 🖼️ images extracting {dur}")
if tool_name == "browser_vision":
- return f"┊ 👁️ vision analyzing page {dur}"
+ return _wrap(f"┊ 👁️ vision analyzing page {dur}")
if tool_name == "todo":
todos_arg = args.get("todos")
merge = args.get("merge", False)
if todos_arg is None:
- return f"┊ 📋 plan reading tasks {dur}"
+ return _wrap(f"┊ 📋 plan reading tasks {dur}")
elif merge:
- return f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}"
+ return _wrap(f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}")
else:
- return f"┊ 📋 plan {len(todos_arg)} task(s) {dur}"
+ return _wrap(f"┊ 📋 plan {len(todos_arg)} task(s) {dur}")
if tool_name == "session_search":
- return f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}"
+ return _wrap(f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}")
if tool_name == "memory":
action = args.get("action", "?")
target = args.get("target", "")
if action == "add":
- return f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}"
+ return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}")
elif action == "replace":
- return f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}"
+ return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
elif action == "remove":
- return f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}"
- return f"┊ 🧠 memory {action} {dur}"
+ return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
+ return _wrap(f"┊ 🧠 memory {action} {dur}")
if tool_name == "skills_list":
- return f"┊ 📚 skills list {args.get('category', 'all')} {dur}"
+ return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}")
if tool_name == "skill_view":
- return f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}"
+ return _wrap(f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}")
if tool_name == "image_generate":
- return f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}"
+ return _wrap(f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}")
if tool_name == "text_to_speech":
- return f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}"
+ return _wrap(f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}")
if tool_name == "vision_analyze":
- return f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}"
+ return _wrap(f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}")
if tool_name == "mixture_of_agents":
- return f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}"
+ return _wrap(f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}")
if tool_name == "send_message":
- return f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}"
+ return _wrap(f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}")
if tool_name == "schedule_cronjob":
- return f"┊ ⏰ schedule {_trunc(args.get('name', args.get('prompt', 'task')), 30)} {dur}"
+ return _wrap(f"┊ ⏰ schedule {_trunc(args.get('name', args.get('prompt', 'task')), 30)} {dur}")
if tool_name == "list_cronjobs":
- return f"┊ ⏰ jobs listing {dur}"
+ return _wrap(f"┊ ⏰ jobs listing {dur}")
if tool_name == "remove_cronjob":
- return f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}"
+ return _wrap(f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}")
if tool_name.startswith("rl_"):
rl = {
"rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}",
@@ -364,16 +409,16 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
"rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
"rl_list_runs": "list runs", "rl_test_inference": "test inference",
}
- return f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}"
+ return _wrap(f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}")
if tool_name == "execute_code":
code = args.get("code", "")
first_line = code.strip().split("\n")[0] if code.strip() else ""
- return f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}"
+ return _wrap(f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}")
if tool_name == "delegate_task":
tasks = args.get("tasks")
if tasks and isinstance(tasks, list):
- return f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}"
- return f"┊ 🔀 delegate {_trunc(args.get('goal', ''), 35)} {dur}"
+ return _wrap(f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}")
+ return _wrap(f"┊ 🔀 delegate {_trunc(args.get('goal', ''), 35)} {dur}")
preview = build_tool_preview(tool_name, args) or ""
- return f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}"
+ return _wrap(f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}")
diff --git a/cli.py b/cli.py
index 1972a18e03..91d7399996 100755
--- a/cli.py
+++ b/cli.py
@@ -339,9 +339,6 @@ def _cprint(text: str):
"""
_pt_print(_PT_ANSI(text))
-# Version string
-VERSION = "v1.0.0"
-
# ASCII Art - HERMES-AGENT logo (full width, single line - requires ~95 char terminal)
HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/]
[bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/]
diff --git a/gateway/config.py b/gateway/config.py
index 8526c43693..16eceda672 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -8,6 +8,7 @@ Handles loading and validating configuration for:
- Delivery preferences
"""
+import logging
import os
import json
from pathlib import Path
@@ -15,6 +16,8 @@ from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any
from enum import Enum
+logger = logging.getLogger(__name__)
+
class Platform(Enum):
"""Supported messaging platforms."""
@@ -264,6 +267,40 @@ def load_gateway_config() -> GatewayConfig:
# Override with environment variables
_apply_env_overrides(config)
+ # --- Validate loaded values ---
+ policy = config.default_reset_policy
+
+ if not (0 <= policy.at_hour <= 23):
+ logger.warning(
+ "Invalid at_hour=%s (must be 0-23). Using default 4.", policy.at_hour
+ )
+ policy.at_hour = 4
+
+ if policy.idle_minutes is None or policy.idle_minutes <= 0:
+ logger.warning(
+ "Invalid idle_minutes=%s (must be positive). Using default 1440.",
+ policy.idle_minutes,
+ )
+ policy.idle_minutes = 1440
+
+ # Warn about empty bot tokens — platforms that loaded an empty string
+ # won't connect and the cause can be confusing without a log line.
+ _token_env_names = {
+ Platform.TELEGRAM: "TELEGRAM_BOT_TOKEN",
+ Platform.DISCORD: "DISCORD_BOT_TOKEN",
+ Platform.SLACK: "SLACK_BOT_TOKEN",
+ }
+ for platform, pconfig in config.platforms.items():
+ if not pconfig.enabled:
+ continue
+ env_name = _token_env_names.get(platform)
+ if env_name and pconfig.token is not None and not pconfig.token.strip():
+ logger.warning(
+ "%s is enabled but %s is empty. "
+ "The adapter will likely fail to connect.",
+ platform.value, env_name,
+ )
+
return config
diff --git a/gateway/delivery.py b/gateway/delivery.py
index 676c3b5ae8..0093c1fb09 100644
--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@@ -8,12 +8,18 @@ Routes messages to the appropriate destination based on:
- Local (always saved to files)
"""
+import logging
from pathlib import Path
from datetime import datetime
from dataclasses import dataclass
from typing import Dict, List, Optional, Any, Union
from enum import Enum
+logger = logging.getLogger(__name__)
+
+MAX_PLATFORM_OUTPUT = 4000
+TRUNCATED_VISIBLE = 3800
+
from .config import Platform, GatewayConfig
from .session import SessionSource
@@ -245,6 +251,15 @@ class DeliveryRouter:
"timestamp": timestamp
}
+ def _save_full_output(self, content: str, job_id: str) -> Path:
+ """Save full cron output to disk and return the file path."""
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ out_dir = Path.home() / ".hermes" / "cron" / "output"
+ out_dir.mkdir(parents=True, exist_ok=True)
+ path = out_dir / f"{job_id}_{timestamp}.txt"
+ path.write_text(content)
+ return path
+
async def _deliver_to_platform(
self,
target: DeliveryTarget,
@@ -260,8 +275,16 @@ class DeliveryRouter:
if not target.chat_id:
raise ValueError(f"No chat ID for {target.platform.value} delivery")
- # Call the adapter's send method
- # Adapters should implement: async def send(chat_id: str, content: str) -> Dict
+ # Guard: truncate oversized cron output to stay within platform limits
+ if len(content) > MAX_PLATFORM_OUTPUT:
+ job_id = (metadata or {}).get("job_id", "unknown")
+ saved_path = self._save_full_output(content, job_id)
+ logger.info("Cron output truncated (%d chars) — full output: %s", len(content), saved_path)
+ content = (
+ content[:TRUNCATED_VISIBLE]
+ + f"\n\n... [truncated, full output saved to {saved_path}]"
+ )
+
return await adapter.send(target.chat_id, content, metadata=metadata)
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 918bc31bd6..9aef4033f5 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -659,34 +659,90 @@ class BasePlatformAdapter(ABC):
def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
"""
- Split a long message into chunks.
-
+ Split a long message into chunks, preserving code block boundaries.
+
+ When a split falls inside a triple-backtick code block, the fence is
+ closed at the end of the current chunk and reopened (with the original
+ language tag) at the start of the next chunk. Multi-chunk responses
+ receive indicators like ``(1/3)``.
+
Args:
content: The full message content
max_length: Maximum length per chunk (platform-specific)
-
+
Returns:
List of message chunks
"""
if len(content) <= max_length:
return [content]
-
- chunks = []
- while content:
- if len(content) <= max_length:
- chunks.append(content)
+
+ INDICATOR_RESERVE = 10 # room for " (XX/XX)"
+ FENCE_CLOSE = "\n```"
+
+ chunks: List[str] = []
+ remaining = content
+ # When the previous chunk ended mid-code-block, this holds the
+ # language tag (possibly "") so we can reopen the fence.
+ carry_lang: Optional[str] = None
+
+ while remaining:
+ # If we're continuing a code block from the previous chunk,
+ # prepend a new opening fence with the same language tag.
+ prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
+
+ # How much body text we can fit after accounting for the prefix,
+ # a potential closing fence, and the chunk indicator.
+ headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
+ if headroom < 1:
+ headroom = max_length // 2
+
+ # Everything remaining fits in one final chunk
+ if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
+ chunks.append(prefix + remaining)
break
-
- # Try to split at a newline
- split_idx = content.rfind("\n", 0, max_length)
- if split_idx == -1:
- # No newline, split at space
- split_idx = content.rfind(" ", 0, max_length)
- if split_idx == -1:
- # No space either, hard split
- split_idx = max_length
-
- chunks.append(content[:split_idx])
- content = content[split_idx:].lstrip()
-
+
+ # Find a natural split point (prefer newlines, then spaces)
+ region = remaining[:headroom]
+ split_at = region.rfind("\n")
+ if split_at < headroom // 2:
+ split_at = region.rfind(" ")
+ if split_at < 1:
+ split_at = headroom
+
+ chunk_body = remaining[:split_at]
+ remaining = remaining[split_at:].lstrip()
+
+ full_chunk = prefix + chunk_body
+
+ # Walk the chunk line-by-line to determine whether we end
+ # inside an open code block.
+ in_code = carry_lang is not None
+ lang = carry_lang or ""
+ for line in full_chunk.split("\n"):
+ stripped = line.strip()
+ if stripped.startswith("```"):
+ if in_code:
+ in_code = False
+ lang = ""
+ else:
+ in_code = True
+ tag = stripped[3:].strip()
+ lang = tag.split()[0] if tag else ""
+
+ if in_code:
+ # Close the orphaned fence so the chunk is valid on its own
+ full_chunk += FENCE_CLOSE
+ carry_lang = lang
+ else:
+ carry_lang = None
+
+ chunks.append(full_chunk)
+
+ # Append chunk indicators when the response spans multiple messages
+ if len(chunks) > 1:
+ total = len(chunks)
+ chunks = [
+ f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
+ ]
+
return chunks
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index e0d277b7b4..01bbad0db0 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -8,6 +8,7 @@ Uses python-telegram-bot library for:
"""
import asyncio
+import re
from typing import Dict, List, Optional, Any
try:
@@ -49,6 +50,16 @@ def check_telegram_requirements() -> bool:
return TELEGRAM_AVAILABLE
+# Matches every character that MarkdownV2 requires to be backslash-escaped
+# when it appears outside a code span or fenced code block.
+_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
+
+
+def _escape_mdv2(text: str) -> str:
+ """Escape Telegram MarkdownV2 special characters with a preceding backslash."""
+ return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
+
+
class TelegramAdapter(BasePlatformAdapter):
"""
Telegram bot adapter.
@@ -167,7 +178,7 @@ class TelegramAdapter(BasePlatformAdapter):
msg = await self._bot.send_message(
chat_id=int(chat_id),
text=chunk,
- parse_mode=ParseMode.MARKDOWN,
+ parse_mode=ParseMode.MARKDOWN_V2,
reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
message_thread_id=int(thread_id) if thread_id else None,
)
@@ -297,14 +308,81 @@ class TelegramAdapter(BasePlatformAdapter):
def format_message(self, content: str) -> str:
"""
- Format message for Telegram.
-
- Telegram uses a subset of markdown. We'll use the simpler
- Markdown mode (not MarkdownV2) for compatibility.
+ Convert standard markdown to Telegram MarkdownV2 format.
+
+ Protected regions (code blocks, inline code) are extracted first so
+ their contents are never modified. Standard markdown constructs
+ (headers, bold, italic, links) are translated to MarkdownV2 syntax,
+ and all remaining special characters are escaped.
"""
- # Basic escaping for Telegram Markdown
- # In Markdown mode (not V2), only certain characters need escaping
- return content
+ if not content:
+ return content
+
+ placeholders: dict = {}
+ counter = [0]
+
+ def _ph(value: str) -> str:
+ """Stash *value* behind a placeholder token that survives escaping."""
+ key = f"\x00PH{counter[0]}\x00"
+ counter[0] += 1
+ placeholders[key] = value
+ return key
+
+ text = content
+
+ # 1) Protect fenced code blocks (``` ... ```)
+ text = re.sub(
+ r'(```(?:[^\n]*\n)?[\s\S]*?```)',
+ lambda m: _ph(m.group(0)),
+ text,
+ )
+
+ # 2) Protect inline code (`...`)
+ text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
+
+ # 3) Convert markdown links – escape the display text; inside the URL
+ # only ')' and '\' need escaping per the MarkdownV2 spec.
+ def _convert_link(m):
+ display = _escape_mdv2(m.group(1))
+ url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
+ return _ph(f'[{display}]({url})')
+
+ text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
+
+ # 4) Convert markdown headers (## Title) → bold *Title*
+ def _convert_header(m):
+ inner = m.group(1).strip()
+ # Strip redundant bold markers that may appear inside a header
+ inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
+ return _ph(f'*{_escape_mdv2(inner)}*')
+
+ text = re.sub(
+ r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
+ )
+
+ # 5) Convert bold: **text** → *text* (MarkdownV2 bold)
+ text = re.sub(
+ r'\*\*(.+?)\*\*',
+ lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
+ text,
+ )
+
+ # 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
+ text = re.sub(
+ r'\*([^*]+)\*',
+ lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
+ text,
+ )
+
+ # 7) Escape remaining special characters in plain text
+ text = _escape_mdv2(text)
+
+ # 8) Restore placeholders in reverse insertion order so that
+ # nested references (a placeholder inside another) resolve correctly.
+ for key in reversed(list(placeholders.keys())):
+ text = text.replace(key, placeholders[key])
+
+ return text
async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming text messages."""
diff --git a/gateway/run.py b/gateway/run.py
index b299085d7c..11bb11ca29 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -20,6 +20,7 @@ import re
import sys
import signal
import threading
+from logging.handlers import RotatingFileHandler
from pathlib import Path
from datetime import datetime
from typing import Dict, Optional, Any, List
@@ -402,9 +403,27 @@ class GatewayRunner:
# Build the context prompt to inject
context_prompt = build_session_context_prompt(context)
+ # If the previous session expired and was auto-reset, prepend a notice
+ # so the agent knows this is a fresh conversation (not an intentional /reset).
+ if getattr(session_entry, 'was_auto_reset', False):
+ context_prompt = (
+ "[System note: The user's previous session expired due to inactivity. "
+ "This is a fresh conversation with no prior context.]\n\n"
+ + context_prompt
+ )
+ session_entry.was_auto_reset = False
+
# Load conversation history from transcript
history = self.session_store.load_transcript(session_entry.session_id)
+ # First-message onboarding for brand-new messaging platform users
+ if not history:
+ context_prompt += (
+ "\n\n[System note: This is the user's very first message in this session. "
+ "Briefly introduce yourself and mention that /help shows available commands. "
+ "Keep the introduction concise -- one or two sentences max.]"
+ )
+
# -----------------------------------------------------------------
# Auto-analyze images sent by the user
#
@@ -1342,15 +1361,32 @@ def _start_cron_ticker(stop_event: threading.Event, interval: int = 60):
Runs inside the gateway process so cronjobs fire automatically without
needing a separate `hermes cron daemon` or system cron entry.
+
+ Every 60th tick (~once per hour) the image/audio cache is pruned so
+ stale temp files don't accumulate.
"""
from cron.scheduler import tick as cron_tick
+ from gateway.platforms.base import cleanup_image_cache
+
+ IMAGE_CACHE_EVERY = 60 # ticks — once per hour at default 60s interval
logger.info("Cron ticker started (interval=%ds)", interval)
+ tick_count = 0
while not stop_event.is_set():
try:
cron_tick(verbose=False)
except Exception as e:
logger.debug("Cron tick error: %s", e)
+
+ tick_count += 1
+ if tick_count % IMAGE_CACHE_EVERY == 0:
+ try:
+ removed = cleanup_image_cache(max_age_hours=24)
+ if removed:
+ logger.info("Image cache cleanup: removed %d stale file(s)", removed)
+ except Exception as e:
+ logger.debug("Image cache cleanup error: %s", e)
+
stop_event.wait(timeout=interval)
logger.info("Cron ticker stopped")
@@ -1363,6 +1399,18 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
Returns True if the gateway ran successfully, False if it failed to start.
A False return causes a non-zero exit code so systemd can auto-restart.
"""
+ # Configure rotating file log so gateway output is persisted for debugging
+ log_dir = Path.home() / '.hermes' / 'logs'
+ log_dir.mkdir(parents=True, exist_ok=True)
+ file_handler = RotatingFileHandler(
+ log_dir / 'gateway.log',
+ maxBytes=5 * 1024 * 1024,
+ backupCount=3,
+ )
+ file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
+ logging.getLogger().addHandler(file_handler)
+ logging.getLogger().setLevel(logging.INFO)
+
runner = GatewayRunner(config)
# Set up signal handlers
diff --git a/gateway/session.py b/gateway/session.py
index c66c638b46..b6603ecfaf 100644
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -219,6 +219,10 @@ class SessionEntry:
output_tokens: int = 0
total_tokens: int = 0
+ # Set when a session was created because the previous one expired;
+ # consumed once by the message handler to inject a notice into context
+ was_auto_reset: bool = False
+
def to_dict(self) -> Dict[str, Any]:
result = {
"session_key": self.session_key,
@@ -388,11 +392,14 @@ class SessionStore:
return entry
else:
# Session is being reset -- end the old one in SQLite
+ was_auto_reset = True
if self._db:
try:
self._db.end_session(entry.session_id, "session_reset")
except Exception as e:
logger.debug("Session DB operation failed: %s", e)
+ else:
+ was_auto_reset = False
# Create new session
session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
@@ -406,6 +413,7 @@ class SessionStore:
display_name=source.chat_name,
platform=source.platform,
chat_type=source.chat_type,
+ was_auto_reset=was_auto_reset,
)
self._entries[session_key] = entry
diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py
index d8c95978c4..7e647afc35 100644
--- a/hermes_cli/__init__.py
+++ b/hermes_cli/__init__.py
@@ -11,4 +11,4 @@ Provides subcommands for:
- hermes cron - Manage cron jobs
"""
-__version__ = "0.1.0"
+__version__ = "v1.0.0"
diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index 2597e880d7..974dfaa15e 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -33,7 +33,7 @@ def cprint(text: str):
# ASCII Art & Branding
# =========================================================================
-VERSION = "v1.0.0"
+from hermes_cli import __version__ as VERSION
HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/]
[bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/]
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 09176ba154..6a103a372f 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -44,6 +44,8 @@ def run_doctor(args):
should_fix = getattr(args, 'fix', False)
issues = []
+ manual_issues = [] # issues that can't be auto-fixed
+ fixed_count = 0
print()
print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
@@ -135,8 +137,15 @@ def run_doctor(args):
check_ok(".env file exists (in project directory)")
else:
check_fail("~/.hermes/.env file missing")
- check_info("Run 'hermes setup' to create one")
- issues.append("Run 'hermes setup' to create .env")
+ if should_fix:
+ env_path.parent.mkdir(parents=True, exist_ok=True)
+ env_path.touch()
+ check_ok("Created empty ~/.hermes/.env")
+ check_info("Run 'hermes setup' to configure API keys")
+ fixed_count += 1
+ else:
+ check_info("Run 'hermes setup' to create one")
+ issues.append("Run 'hermes setup' to create .env")
# Check ~/.hermes/config.yaml (primary) or project cli-config.yaml (fallback)
config_path = HERMES_HOME / 'config.yaml'
@@ -147,7 +156,17 @@ def run_doctor(args):
if fallback_config.exists():
check_ok("cli-config.yaml exists (in project directory)")
else:
- check_warn("config.yaml not found", "(using defaults)")
+ example_config = PROJECT_ROOT / 'cli-config.yaml.example'
+ if should_fix and example_config.exists():
+ config_path.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(str(example_config), str(config_path))
+ check_ok("Created ~/.hermes/config.yaml from cli-config.yaml.example")
+ fixed_count += 1
+ elif should_fix:
+ check_warn("config.yaml not found and no example to copy from")
+ manual_issues.append("Create ~/.hermes/config.yaml manually")
+ else:
+ check_warn("config.yaml not found", "(using defaults)")
# =========================================================================
# Check: Directory structure
@@ -159,7 +178,26 @@ def run_doctor(args):
if hermes_home.exists():
check_ok("~/.hermes directory exists")
else:
- check_warn("~/.hermes not found", "(will be created on first use)")
+ if should_fix:
+ hermes_home.mkdir(parents=True, exist_ok=True)
+ check_ok("Created ~/.hermes directory")
+ fixed_count += 1
+ else:
+ check_warn("~/.hermes not found", "(will be created on first use)")
+
+ # Check expected subdirectories
+ expected_subdirs = ["cron", "sessions", "logs", "skills", "memories"]
+ for subdir_name in expected_subdirs:
+ subdir_path = hermes_home / subdir_name
+ if subdir_path.exists():
+ check_ok(f"~/.hermes/{subdir_name}/ exists")
+ else:
+ if should_fix:
+ subdir_path.mkdir(parents=True, exist_ok=True)
+ check_ok(f"Created ~/.hermes/{subdir_name}/")
+ fixed_count += 1
+ else:
+ check_warn(f"~/.hermes/{subdir_name}/ not found", "(will be created on first use)")
# Check for SOUL.md persona file
soul_path = hermes_home / "SOUL.md"
@@ -175,14 +213,25 @@ def run_doctor(args):
check_warn("~/.hermes/SOUL.md not found", "(create it to give Hermes a custom personality)")
if should_fix:
soul_path.parent.mkdir(parents=True, exist_ok=True)
- soul_path.write_text("# Hermes Agent Persona\n\n\n", encoding="utf-8")
- check_ok("Created ~/.hermes/SOUL.md")
+ soul_path.write_text(
+ "# Hermes Agent Persona\n\n"
+ "\n\n"
+ "You are Hermes, a helpful AI assistant.\n",
+ encoding="utf-8",
+ )
+ check_ok("Created ~/.hermes/SOUL.md with basic template")
+ fixed_count += 1
logs_dir = PROJECT_ROOT / "logs"
if logs_dir.exists():
- check_ok("logs/ directory exists")
+ check_ok("logs/ directory exists (project root)")
else:
- check_warn("logs/ not found", "(will be created on first use)")
+ if should_fix:
+ logs_dir.mkdir(parents=True, exist_ok=True)
+ check_ok("Created logs/ directory")
+ fixed_count += 1
+ else:
+ check_warn("logs/ not found", "(will be created on first use)")
# Check memory directory
memories_dir = hermes_home / "memories"
@@ -205,6 +254,7 @@ def run_doctor(args):
if should_fix:
memories_dir.mkdir(parents=True, exist_ok=True)
check_ok("Created ~/.hermes/memories/")
+ fixed_count += 1
# Check SQLite session store
state_db_path = hermes_home / "state.db"
@@ -299,6 +349,7 @@ def run_doctor(args):
openrouter_key = os.getenv("OPENROUTER_API_KEY")
if openrouter_key:
+ print(" Checking OpenRouter API...", end="", flush=True)
try:
import httpx
response = httpx.get(
@@ -307,20 +358,21 @@ def run_doctor(args):
timeout=10
)
if response.status_code == 200:
- check_ok("OpenRouter API")
+ print(f"\r {color('✓', Colors.GREEN)} OpenRouter API ")
elif response.status_code == 401:
- check_fail("OpenRouter API", "(invalid API key)")
+ print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)} ")
issues.append("Check OPENROUTER_API_KEY in .env")
else:
- check_fail("OpenRouter API", f"(HTTP {response.status_code})")
+ print(f"\r {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)} ")
except Exception as e:
- check_fail("OpenRouter API", f"({e})")
+ print(f"\r {color('✗', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)} ")
issues.append("Check network connectivity")
else:
check_warn("OpenRouter API", "(not configured)")
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
if anthropic_key:
+ print(" Checking Anthropic API...", end="", flush=True)
try:
import httpx
response = httpx.get(
@@ -332,14 +384,14 @@ def run_doctor(args):
timeout=10
)
if response.status_code == 200:
- check_ok("Anthropic API")
+ print(f"\r {color('✓', Colors.GREEN)} Anthropic API ")
elif response.status_code == 401:
- check_fail("Anthropic API", "(invalid API key)")
+ print(f"\r {color('✗', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)} ")
else:
- # Note: Anthropic may not have /models endpoint
- check_warn("Anthropic API", "(couldn't verify)")
+ msg = "(couldn't verify)"
+ print(f"\r {color('⚠', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)} ")
except Exception as e:
- check_warn("Anthropic API", f"({e})")
+ print(f"\r {color('⚠', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)} ")
# =========================================================================
# Check: Submodules
@@ -440,17 +492,28 @@ def run_doctor(args):
# Summary
# =========================================================================
print()
- if issues:
- print(color("─" * 60, Colors.YELLOW))
- print(color(f" Found {len(issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
+ remaining_issues = issues + manual_issues
+ if should_fix and fixed_count > 0:
+ print(color("─" * 60, Colors.GREEN))
+ print(color(f" Fixed {fixed_count} issue(s).", Colors.GREEN, Colors.BOLD), end="")
+ if remaining_issues:
+ print(color(f" {len(remaining_issues)} issue(s) require manual intervention.", Colors.YELLOW, Colors.BOLD))
+ else:
+ print()
print()
- for i, issue in enumerate(issues, 1):
+ if remaining_issues:
+ for i, issue in enumerate(remaining_issues, 1):
+ print(f" {i}. {issue}")
+ print()
+ elif remaining_issues:
+ print(color("─" * 60, Colors.YELLOW))
+ print(color(f" Found {len(remaining_issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
+ print()
+ for i, issue in enumerate(remaining_issues, 1):
print(f" {i}. {issue}")
print()
-
- if should_fix:
- print(color(" Attempting auto-fix is not yet implemented.", Colors.DIM))
- print(color(" Please resolve issues manually.", Colors.DIM))
+ if not should_fix:
+ print(color(" Tip: run 'hermes doctor --fix' to auto-fix what's possible.", Colors.DIM))
else:
print(color("─" * 60, Colors.GREEN))
print(color(" All checks passed! 🎉", Colors.GREEN, Colors.BOLD))
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 3406994b36..4264730c69 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -47,8 +47,66 @@ from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
+def _has_any_provider_configured() -> bool:
+ """Check if at least one inference provider is usable."""
+ from hermes_cli.config import get_env_path, get_hermes_home
+
+ # Check env vars (may be set by .env or shell)
+ if os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"):
+ return True
+
+ # Check .env file for keys
+ env_file = get_env_path()
+ if env_file.exists():
+ try:
+ for line in env_file.read_text().splitlines():
+ line = line.strip()
+ if line.startswith("#") or "=" not in line:
+ continue
+ key, _, val = line.partition("=")
+ val = val.strip().strip("'\"")
+ if key.strip() in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY") and val:
+ return True
+ except Exception:
+ pass
+
+ # Check for Nous Portal OAuth credentials
+ auth_file = get_hermes_home() / "auth.json"
+ if auth_file.exists():
+ try:
+ import json
+ auth = json.loads(auth_file.read_text())
+ active = auth.get("active_provider")
+ if active:
+ state = auth.get("providers", {}).get(active, {})
+ if state.get("access_token") or state.get("refresh_token"):
+ return True
+ except Exception:
+ pass
+
+ return False
+
+
def cmd_chat(args):
"""Run interactive chat CLI."""
+ # First-run guard: check if any provider is configured before launching
+ if not _has_any_provider_configured():
+ print()
+ print("It looks like Hermes isn't configured yet -- no API keys or providers found.")
+ print()
+ print(" Run: hermes setup")
+ print()
+ try:
+ reply = input("Run setup now? [Y/n] ").strip().lower()
+ except (EOFError, KeyboardInterrupt):
+ reply = "n"
+ if reply in ("", "y", "yes"):
+ cmd_setup(args)
+ return
+ print()
+ print("You can run 'hermes setup' at any time to configure.")
+ sys.exit(1)
+
# Import and run the CLI
from cli import main as cli_main
@@ -219,20 +277,10 @@ def _model_flow_openrouter(config, current_model=""):
print("API key saved.")
print()
- OPENROUTER_MODELS = [
- "anthropic/claude-opus-4.6",
- "anthropic/claude-sonnet-4.5",
- "anthropic/claude-opus-4.5",
- "openai/gpt-5.2",
- "openai/gpt-5.2-codex",
- "google/gemini-3-pro-preview",
- "google/gemini-3-flash-preview",
- "z-ai/glm-4.7",
- "moonshotai/kimi-k2.5",
- "minimax/minimax-m2.1",
- ]
+ from hermes_cli.models import model_ids
+ openrouter_models = model_ids()
- selected = _prompt_model_selection(OPENROUTER_MODELS, current_model=current_model)
+ selected = _prompt_model_selection(openrouter_models, current_model=current_model)
if selected:
# Clear any custom endpoint and set provider to openrouter
if get_env_value("OPENAI_BASE_URL"):
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
new file mode 100644
index 0000000000..789c51e867
--- /dev/null
+++ b/hermes_cli/models.py
@@ -0,0 +1,33 @@
+"""
+Canonical list of OpenRouter models offered in CLI and setup wizards.
+
+Add, remove, or reorder entries here — both `hermes setup` and
+`hermes` provider-selection will pick up the change automatically.
+"""
+
+# (model_id, display description shown in menus)
+OPENROUTER_MODELS: list[tuple[str, str]] = [
+ ("anthropic/claude-opus-4.6", "recommended"),
+ ("anthropic/claude-sonnet-4.5", ""),
+ ("anthropic/claude-opus-4.5", ""),
+ ("openai/gpt-5.2", ""),
+ ("openai/gpt-5.2-codex", ""),
+ ("google/gemini-3-pro-preview", ""),
+ ("google/gemini-3-flash-preview", ""),
+ ("z-ai/glm-4.7", ""),
+ ("moonshotai/kimi-k2.5", ""),
+ ("minimax/minimax-m2.1", ""),
+]
+
+
+def model_ids() -> list[str]:
+ """Return just the model-id strings (convenience helper)."""
+ return [mid for mid, _ in OPENROUTER_MODELS]
+
+
+def menu_labels() -> list[str]:
+ """Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
+ labels = []
+ for mid, desc in OPENROUTER_MODELS:
+ labels.append(f"{mid} ({desc})" if desc else mid)
+ return labels
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 1dd670858d..a50975b354 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -611,46 +611,27 @@ def run_setup_wizard(args):
save_env_value("LLM_MODEL", custom)
# else: keep current
else:
- # Static list for OpenRouter / fallback
- model_choices = [
- "anthropic/claude-opus-4.6 (recommended)",
- "anthropic/claude-sonnet-4.5",
- "anthropic/claude-opus-4.5",
- "openai/gpt-5.2",
- "openai/gpt-5.2-codex",
- "google/gemini-3-pro-preview",
- "google/gemini-3-flash-preview",
- "z-ai/glm-4.7",
- "moonshotai/kimi-k2.5",
- "minimax/minimax-m2.1",
+ # Static list for OpenRouter / fallback (from canonical list)
+ from hermes_cli.models import model_ids, menu_labels
+
+ ids = model_ids()
+ model_choices = menu_labels() + [
"Custom model",
- f"Keep current ({current_model})"
+ f"Keep current ({current_model})",
]
- model_idx = prompt_choice("Select default model:", model_choices, 11)
+ keep_idx = len(model_choices) - 1
+ model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
- model_map = {
- 0: "anthropic/claude-opus-4.6",
- 1: "anthropic/claude-sonnet-4.5",
- 2: "anthropic/claude-opus-4.5",
- 3: "openai/gpt-5.2",
- 4: "openai/gpt-5.2-codex",
- 5: "google/gemini-3-pro-preview",
- 6: "google/gemini-3-flash-preview",
- 7: "z-ai/glm-4.7",
- 8: "moonshotai/kimi-k2.5",
- 9: "minimax/minimax-m2.1",
- }
-
- if model_idx in model_map:
- config['model'] = model_map[model_idx]
- save_env_value("LLM_MODEL", model_map[model_idx])
- elif model_idx == 10: # Custom
+ if model_idx < len(ids):
+ config['model'] = ids[model_idx]
+ save_env_value("LLM_MODEL", ids[model_idx])
+ elif model_idx == len(ids): # Custom
custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
if custom:
config['model'] = custom
save_env_value("LLM_MODEL", custom)
- # else: Keep current (model_idx == 11)
+ # else: Keep current
# =========================================================================
# Step 4: Terminal Backend
diff --git a/run_agent.py b/run_agent.py
index 882d10b294..3aa1df6865 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -69,840 +69,12 @@ from agent.prompt_builder import build_skills_system_prompt, build_context_files
from agent.display import (
KawaiiSpinner, build_tool_preview as _build_tool_preview,
get_cute_tool_message as _get_cute_tool_message_impl,
- KAWAII_SEARCH, KAWAII_READ, KAWAII_TERMINAL, KAWAII_BROWSER,
- KAWAII_CREATE, KAWAII_SKILL, KAWAII_THINK, KAWAII_GENERIC,
)
from agent.trajectory import (
convert_scratchpad_to_think, has_incomplete_scratchpad,
save_trajectory as _save_trajectory_to_file,
)
-# Model metadata functions (fetch_model_metadata, get_model_context_length,
-# estimate_tokens_rough, estimate_messages_tokens_rough) are now in
-# agent/model_metadata.py -- imported above.
-
-
-class ContextCompressor:
- """
- Compresses conversation context when approaching model's context limit.
-
- Uses similar logic to trajectory_compressor but operates in real-time:
- 1. Protects first few turns (system, initial user, first assistant response)
- 2. Protects last N turns (recent context is most relevant)
- 3. Summarizes middle turns when threshold is reached
-
- Token tracking uses actual counts from API responses (usage.prompt_tokens)
- rather than estimates for accuracy.
- """
-
- def __init__(
- self,
- model: str,
- threshold_percent: float = 0.85,
- summary_model: str = "google/gemini-3-flash-preview",
- protect_first_n: int = 3,
- protect_last_n: int = 4,
- summary_target_tokens: int = 500,
- quiet_mode: bool = False,
- ):
- """
- Initialize the context compressor.
-
- Args:
- model: The main model being used (to determine context limit)
- threshold_percent: Trigger compression at this % of context (default 85%)
- summary_model: Model to use for generating summaries (cheap/fast)
- protect_first_n: Number of initial turns to always keep
- protect_last_n: Number of recent turns to always keep
- summary_target_tokens: Target token count for summaries
- quiet_mode: Suppress compression notifications
- """
- self.model = model
- self.threshold_percent = threshold_percent
- self.summary_model = summary_model
- self.protect_first_n = protect_first_n
- self.protect_last_n = protect_last_n
- self.summary_target_tokens = summary_target_tokens
- self.quiet_mode = quiet_mode
-
- self.context_length = get_model_context_length(model)
- self.threshold_tokens = int(self.context_length * threshold_percent)
- self.compression_count = 0
-
- # Track actual token usage from API responses
- self.last_prompt_tokens = 0
- self.last_completion_tokens = 0
- self.last_total_tokens = 0
-
- # Initialize OpenRouter client for summarization
- api_key = os.getenv("OPENROUTER_API_KEY", "")
- self.client = OpenAI(
- api_key=api_key,
- base_url=OPENROUTER_BASE_URL
- ) if api_key else None
-
- def update_from_response(self, usage: Dict[str, Any]):
- """
- Update tracked token usage from API response.
-
- Args:
- usage: The usage dict from response (contains prompt_tokens, completion_tokens, total_tokens)
- """
- self.last_prompt_tokens = usage.get("prompt_tokens", 0)
- self.last_completion_tokens = usage.get("completion_tokens", 0)
- self.last_total_tokens = usage.get("total_tokens", 0)
-
- def should_compress(self, prompt_tokens: int = None) -> bool:
- """
- Check if context exceeds the compression threshold.
-
- Uses actual token count from API response for accuracy.
-
- Args:
- prompt_tokens: Actual prompt tokens from last API response.
- If None, uses last tracked value.
-
- Returns:
- True if compression should be triggered
- """
- tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
- return tokens >= self.threshold_tokens
-
- def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
- """
- Quick pre-flight check using rough estimate (before API call).
-
- Use this to avoid making an API call that would fail due to context overflow.
- For post-response compression decisions, use should_compress() with actual tokens.
-
- Args:
- messages: Current conversation messages
-
- Returns:
- True if compression is likely needed
- """
- rough_estimate = estimate_messages_tokens_rough(messages)
- return rough_estimate >= self.threshold_tokens
-
- def get_status(self) -> Dict[str, Any]:
- """
- Get current compression status for display/logging.
-
- Returns:
- Dict with token usage and threshold info
- """
- return {
- "last_prompt_tokens": self.last_prompt_tokens,
- "threshold_tokens": self.threshold_tokens,
- "context_length": self.context_length,
- "usage_percent": (self.last_prompt_tokens / self.context_length * 100) if self.context_length else 0,
- "compression_count": self.compression_count,
- }
-
- def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> str:
- """
- Generate a concise summary of conversation turns using a fast model.
-
- Args:
- turns_to_summarize: List of message dicts to summarize
-
- Returns:
- Summary string
- """
- if not self.client:
- # Fallback if no API key
- return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed to save space. The assistant performed various actions and received responses."
-
- # Format turns for summarization
- parts = []
- for i, msg in enumerate(turns_to_summarize):
- role = msg.get("role", "unknown")
- content = msg.get("content", "")
-
- # Truncate very long content
- if len(content) > 2000:
- content = content[:1000] + "\n...[truncated]...\n" + content[-500:]
-
- # Include tool call info if present
- tool_calls = msg.get("tool_calls", [])
- if tool_calls:
- tool_names = [tc.get("function", {}).get("name", "?") for tc in tool_calls if isinstance(tc, dict)]
- content += f"\n[Tool calls: {', '.join(tool_names)}]"
-
- parts.append(f"[{role.upper()}]: {content}")
-
- content_to_summarize = "\n\n".join(parts)
-
- prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
-
-Write from a neutral perspective describing:
-1. What actions were taken (tool calls, searches, file operations)
-2. Key information or results obtained
-3. Important decisions or findings
-4. Relevant data, file names, or outputs
-
-Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
-
----
-TURNS TO SUMMARIZE:
-{content_to_summarize}
----
-
-Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
-
- try:
- response = self.client.chat.completions.create(
- model=self.summary_model,
- messages=[{"role": "user", "content": prompt}],
- temperature=0.3,
- max_tokens=self.summary_target_tokens * 2,
- timeout=30.0,
- )
-
- summary = response.choices[0].message.content.strip()
- if not summary.startswith("[CONTEXT SUMMARY]:"):
- summary = "[CONTEXT SUMMARY]: " + summary
-
- return summary
-
- except Exception as e:
- logging.warning(f"Failed to generate context summary: {e}")
- return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed. The assistant performed tool calls and received responses."
-
- def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]:
- """
- Compress conversation messages by summarizing middle turns.
-
- Algorithm:
- 1. Keep first N turns (system prompt, initial context)
- 2. Keep last N turns (recent/relevant context)
- 3. Summarize everything in between
- 4. Insert summary as a user message
-
- Args:
- messages: Current conversation messages
- current_tokens: Actual token count from API (for logging). If None, uses estimate.
-
- Returns:
- Compressed message list
- """
- n_messages = len(messages)
-
- # Not enough messages to compress
- if n_messages <= self.protect_first_n + self.protect_last_n + 1:
- if not self.quiet_mode:
- print(f"⚠️ Cannot compress: only {n_messages} messages (need > {self.protect_first_n + self.protect_last_n + 1})")
- return messages
-
- # Determine compression boundaries
- compress_start = self.protect_first_n
- compress_end = n_messages - self.protect_last_n
-
- # Nothing to compress
- if compress_start >= compress_end:
- return messages
-
- # Extract turns to summarize
- turns_to_summarize = messages[compress_start:compress_end]
-
- # Use actual token count if provided, otherwise estimate
- display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages)
-
- if not self.quiet_mode:
- print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
- print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
- print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
-
- # Generate summary
- summary = self._generate_summary(turns_to_summarize)
-
- # Build compressed messages
- compressed = []
-
- # Keep protected head turns
- for i in range(compress_start):
- msg = messages[i].copy()
- # Add notice to system message on first compression
- if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
- msg["content"] = msg.get("content", "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
- compressed.append(msg)
-
- # Add summary as user message
- compressed.append({
- "role": "user",
- "content": summary
- })
-
- # Keep protected tail turns
- for i in range(compress_end, n_messages):
- compressed.append(messages[i].copy())
-
- self.compression_count += 1
-
- if not self.quiet_mode:
- # Estimate new size (actual will be known after next API call)
- new_estimate = estimate_messages_tokens_rough(compressed)
- saved_estimate = display_tokens - new_estimate
- print(f" ✅ Compressed: {n_messages} → {len(compressed)} messages (~{saved_estimate:,} tokens saved)")
- print(f" 💡 Compression #{self.compression_count} complete")
-
- return compressed
-
-
-# =============================================================================
-# Anthropic Prompt Caching (system_and_3 strategy)
-# =============================================================================
-# Reduces input token costs by ~75% on multi-turn conversations by caching
-# the conversation prefix. Uses 4 cache_control breakpoints (Anthropic max):
-# 1. System prompt (stable across all turns)
-# 2-4. Last 3 non-system messages (rolling window)
-#
-# Cached tokens are read at 0.1x input price. Cache writes cost 1.25x (5m TTL)
-# or 2x (1h TTL). Only applied to Claude models via OpenRouter.
-
-def _apply_cache_marker(msg: dict, cache_marker: dict) -> None:
- """
- Add cache_control to a single message, handling all format variations.
-
- - tool messages: cache_control at message level (Anthropic API quirk)
- - string content: converted to multipart content array
- - list content: marker added to last item
- - None content (assistant with tool_calls): message level
- """
- role = msg.get("role", "")
- content = msg.get("content")
-
- if role == "tool":
- msg["cache_control"] = cache_marker
- return
-
- if content is None:
- msg["cache_control"] = cache_marker
- return
-
- if isinstance(content, str):
- msg["content"] = [{"type": "text", "text": content, "cache_control": cache_marker}]
- return
-
- if isinstance(content, list) and content:
- last = content[-1]
- if isinstance(last, dict):
- last["cache_control"] = cache_marker
-
-
-def apply_anthropic_cache_control(
- api_messages: List[Dict[str, Any]],
- cache_ttl: str = "5m",
-) -> List[Dict[str, Any]]:
- """
- Apply system_and_3 caching strategy to messages for Anthropic models.
-
- Places up to 4 cache_control breakpoints:
- 1. System prompt (index 0, stable across all turns)
- 2-4. Last 3 non-system messages (rolling cache frontier)
-
- Each breakpoint tells Anthropic "cache everything from the start up to here."
- Multiple breakpoints create a ladder of cached prefixes at different depths,
- which provides robust cache hits even when the most recent cache entry hasn't
- propagated yet.
-
- Args:
- api_messages: Fully assembled message list (system prompt first).
- cache_ttl: "5m" (default, 1.25x write cost) or "1h" (2x write cost).
-
- Returns:
- Deep copy of messages with cache_control breakpoints injected.
- """
- messages = copy.deepcopy(api_messages)
- if not messages:
- return messages
-
- marker = {"type": "ephemeral"}
- if cache_ttl == "1h":
- marker["ttl"] = "1h"
-
- breakpoints_used = 0
-
- # Breakpoint 1: System prompt (always stable, gives a guaranteed minimum hit)
- if messages[0].get("role") == "system":
- _apply_cache_marker(messages[0], marker)
- breakpoints_used += 1
-
- # Breakpoints 2-4: Last 3 non-system messages (rolling window)
- remaining = 4 - breakpoints_used
- non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
- for idx in non_sys[-remaining:]:
- _apply_cache_marker(messages[idx], marker)
-
- return messages
-
-
-# =============================================================================
-# Default System Prompt Components
-# =============================================================================
-
-# Skills guidance - embeds a compact skill index in the system prompt so
-# the model can match skills at a glance without extra tool calls.
-def build_skills_system_prompt() -> str:
- """
- Build a dynamic skills system prompt by scanning both bundled and user skill directories.
-
- Returns a prompt section that lists all skill categories (with descriptions
- from DESCRIPTION.md) and their skill names inline, so the model can
- immediately see if a relevant skill exists and load it with a single
- skill_view(name) call -- no discovery tool calls needed.
-
- Returns:
- str: The skills system prompt section, or empty string if no skills found.
- """
- import os
- from pathlib import Path
-
- hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
- skills_dir = hermes_home / "skills"
-
- if not skills_dir.exists():
- return ""
-
- # Scan for SKILL.md files grouped by category
- skills_by_category = {}
- for skill_file in skills_dir.rglob("SKILL.md"):
- rel_path = skill_file.relative_to(skills_dir)
- parts = rel_path.parts
- if len(parts) >= 2:
- category = parts[0]
- skill_name = parts[-2]
- else:
- category = "general"
- skill_name = skill_file.parent.name
- skills_by_category.setdefault(category, []).append(skill_name)
-
- if not skills_by_category:
- return ""
-
- # Load category descriptions from DESCRIPTION.md files
- category_descriptions = {}
- for category in skills_by_category:
- desc_file = skills_dir / category / "DESCRIPTION.md"
- if desc_file.exists():
- try:
- content = desc_file.read_text(encoding="utf-8")
- match = re.search(r"^---\s*\n.*?description:\s*(.+?)\s*\n.*?^---", content, re.MULTILINE | re.DOTALL)
- if match:
- category_descriptions[category] = match.group(1).strip()
- except Exception as e:
- logger.debug("Could not read skill description %s: %s", desc_file, e)
-
- index_lines = []
- for category in sorted(skills_by_category.keys()):
- desc = category_descriptions.get(category, "")
- names = ", ".join(sorted(set(skills_by_category[category])))
- if desc:
- index_lines.append(f" {category}: {desc}")
- else:
- index_lines.append(f" {category}:")
- index_lines.append(f" skills: {names}")
-
- return (
- "## Skills (mandatory)\n"
- "Before replying, scan the skills below. If one clearly matches your task, "
- "load it with skill_view(name) and follow its instructions. "
- "If a skill has issues, fix it with skill_manage(action='patch').\n"
- "\n"
- "\n"
- + "\n".join(index_lines) + "\n"
- "\n"
- "\n"
- "If none match, proceed normally without loading a skill."
- )
-
-
-# =============================================================================
-# Context File Injection (SOUL.md, AGENTS.md, .cursorrules)
-# =============================================================================
-
-# Maximum characters per context file before truncation
-CONTEXT_FILE_MAX_CHARS = 20_000
-# Truncation strategy: keep 70% from the head, 20% from the tail
-CONTEXT_TRUNCATE_HEAD_RATIO = 0.7
-CONTEXT_TRUNCATE_TAIL_RATIO = 0.2
-
-
-def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str:
- """
- Truncate content if it exceeds max_chars using a head/tail strategy.
-
- Keeps 70% from the start and 20% from the end, with a truncation
- marker in the middle so the model knows content was cut.
- """
- if len(content) <= max_chars:
- return content
-
- head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO)
- tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO)
- head = content[:head_chars]
- tail = content[-tail_chars:]
-
- marker = f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of {len(content)} chars. Use file tools to read the full file.]\n\n"
- return head + marker + tail
-
-
-def build_context_files_prompt(cwd: str = None) -> str:
- """
- Discover and load context files (SOUL.md, AGENTS.md, .cursorrules)
- for injection into the system prompt.
-
- Discovery rules:
- - AGENTS.md: Recursively search from cwd (only if top-level exists).
- Each file becomes a ## section with its relative path.
- - .cursorrules: Check cwd for .cursorrules file and .cursor/rules/*.mdc
- - SOUL.md: Check cwd first, then ~/.hermes/SOUL.md as global fallback
-
- Args:
- cwd: Working directory to search from. Defaults to os.getcwd().
-
- Returns:
- str: The context files prompt section, or empty string if none found.
- """
- import os
- import glob as glob_mod
- from pathlib import Path
-
- if cwd is None:
- cwd = os.getcwd()
-
- cwd_path = Path(cwd).resolve()
- sections = []
-
- # ----- AGENTS.md (hierarchical, recursive) -----
- top_level_agents = None
- for name in ["AGENTS.md", "agents.md"]:
- candidate = cwd_path / name
- if candidate.exists():
- top_level_agents = candidate
- break
-
- if top_level_agents:
- # Recursively find all AGENTS.md files (case-insensitive)
- agents_files = []
- for root, dirs, files in os.walk(cwd_path):
- # Skip hidden directories and common non-project dirs
- dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('node_modules', '__pycache__', 'venv', '.venv')]
- for f in files:
- if f.lower() == "agents.md":
- agents_files.append(Path(root) / f)
-
- # Sort by path depth (top-level first, then deeper)
- agents_files.sort(key=lambda p: len(p.parts))
-
- total_agents_content = ""
- for agents_path in agents_files:
- try:
- content = agents_path.read_text(encoding="utf-8").strip()
- if content:
- rel_path = agents_path.relative_to(cwd_path)
- total_agents_content += f"## {rel_path}\n\n{content}\n\n"
- except Exception as e:
- logger.debug("Could not read %s: %s", agents_path, e)
-
- if total_agents_content:
- total_agents_content = _truncate_content(total_agents_content, "AGENTS.md")
- sections.append(total_agents_content)
-
- # ----- .cursorrules -----
- cursorrules_content = ""
-
- # Check for .cursorrules file
- cursorrules_file = cwd_path / ".cursorrules"
- if cursorrules_file.exists():
- try:
- content = cursorrules_file.read_text(encoding="utf-8").strip()
- if content:
- cursorrules_content += f"## .cursorrules\n\n{content}\n\n"
- except Exception as e:
- logger.debug("Could not read .cursorrules: %s", e)
-
- # Check for .cursor/rules/*.mdc files
- cursor_rules_dir = cwd_path / ".cursor" / "rules"
- if cursor_rules_dir.exists() and cursor_rules_dir.is_dir():
- mdc_files = sorted(cursor_rules_dir.glob("*.mdc"))
- for mdc_file in mdc_files:
- try:
- content = mdc_file.read_text(encoding="utf-8").strip()
- if content:
- cursorrules_content += f"## .cursor/rules/{mdc_file.name}\n\n{content}\n\n"
- except Exception as e:
- logger.debug("Could not read %s: %s", mdc_file, e)
-
- if cursorrules_content:
- cursorrules_content = _truncate_content(cursorrules_content, ".cursorrules")
- sections.append(cursorrules_content)
-
- # ----- SOUL.md (cwd first, then ~/.hermes/ fallback) -----
- soul_content = ""
- soul_path = None
-
- for name in ["SOUL.md", "soul.md"]:
- candidate = cwd_path / name
- if candidate.exists():
- soul_path = candidate
- break
-
- if not soul_path:
- # Global fallback
- global_soul = Path.home() / ".hermes" / "SOUL.md"
- if global_soul.exists():
- soul_path = global_soul
-
- if soul_path:
- try:
- content = soul_path.read_text(encoding="utf-8").strip()
- if content:
- content = _truncate_content(content, "SOUL.md")
- soul_content = f"## SOUL.md\n\nIf SOUL.md is present, embody its persona and tone. Avoid stiff, generic replies; follow its guidance unless higher-priority instructions override it.\n\n{content}"
- sections.append(soul_content)
- except Exception as e:
- logger.debug("Could not read SOUL.md from %s: %s", soul_path, e)
-
- # ----- Assemble -----
- if not sections:
- return ""
-
- return "# Project Context\n\nThe following project context files have been loaded and should be followed:\n\n" + "\n".join(sections)
-
-
-def _build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str:
- """
- Build a short preview of a tool call's primary argument for display.
-
- Returns a truncated string showing the most informative argument,
- or None if no meaningful preview is available.
-
- Args:
- tool_name: Name of the tool being called
- args: The tool call arguments dict
- max_len: Maximum preview length before truncation
-
- Returns:
- str or None: Short preview string, or None
- """
- # Map tool names to their primary argument key(s)
- primary_args = {
- "terminal": "command",
- "web_search": "query",
- "web_extract": "urls",
- "read_file": "path",
- "write_file": "path",
- "patch": "path",
- "search_files": "pattern",
- "browser_navigate": "url",
- "browser_click": "ref",
- "browser_type": "text",
- "image_generate": "prompt",
- "text_to_speech": "text",
- "vision_analyze": "question",
- "mixture_of_agents": "user_prompt",
- "skill_view": "name",
- "skills_list": "category",
- "schedule_cronjob": "name",
- }
-
- # Special handling for tools with composite previews
- if tool_name == "process":
- action = args.get("action", "")
- session_id = args.get("session_id", "")
- data = args.get("data", "")
- timeout = args.get("timeout")
- parts = [action]
- if session_id:
- parts.append(session_id[:16])
- if data:
- parts.append(f'"{data[:20]}"')
- if timeout and action == "wait":
- parts.append(f"{timeout}s")
- return " ".join(parts) if parts else None
-
- if tool_name == "todo":
- todos_arg = args.get("todos")
- merge = args.get("merge", False)
- if todos_arg is None:
- return "reading task list"
- elif merge:
- return f"updating {len(todos_arg)} task(s)"
- else:
- return f"planning {len(todos_arg)} task(s)"
-
- if tool_name == "session_search":
- query = args.get("query", "")
- return f"recall: \"{query[:25]}{'...' if len(query) > 25 else ''}\""
-
- if tool_name == "memory":
- action = args.get("action", "")
- target = args.get("target", "")
- if action == "add":
- content = args.get("content", "")
- return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
- elif action == "replace":
- return f"~{target}: \"{args.get('old_text', '')[:20]}\""
- elif action == "remove":
- return f"-{target}: \"{args.get('old_text', '')[:20]}\""
- return action
-
- if tool_name == "send_message":
- target = args.get("target", "?")
- msg = args.get("message", "")
- if len(msg) > 20:
- msg = msg[:17] + "..."
- return f"to {target}: \"{msg}\""
-
- if tool_name.startswith("rl_"):
- rl_previews = {
- "rl_list_environments": "listing envs",
- "rl_select_environment": args.get("name", ""),
- "rl_get_current_config": "reading config",
- "rl_edit_config": f"{args.get('field', '')}={args.get('value', '')}",
- "rl_start_training": "starting",
- "rl_check_status": args.get("run_id", "")[:16],
- "rl_stop_training": f"stopping {args.get('run_id', '')[:16]}",
- "rl_get_results": args.get("run_id", "")[:16],
- "rl_list_runs": "listing runs",
- "rl_test_inference": f"{args.get('num_steps', 3)} steps",
- }
- return rl_previews.get(tool_name)
-
- key = primary_args.get(tool_name)
- if not key:
- # Try common arg names as fallback
- for fallback_key in ("query", "text", "command", "path", "name", "prompt"):
- if fallback_key in args:
- key = fallback_key
- break
-
- if not key or key not in args:
- return None
-
- value = args[key]
-
- # Handle list values (e.g., urls)
- if isinstance(value, list):
- value = value[0] if value else ""
-
- preview = str(value).strip()
- if not preview:
- return None
-
- # Truncate
- if len(preview) > max_len:
- preview = preview[:max_len - 3] + "..."
-
- return preview
-
-
-class KawaiiSpinner:
- """
- Animated spinner with kawaii faces for CLI feedback during tool execution.
- Runs in a background thread and can be stopped when the operation completes.
-
- Uses stdout with carriage return to animate in place.
- """
-
- # Different spinner animation sets
- SPINNERS = {
- 'dots': ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'],
- 'bounce': ['⠁', '⠂', '⠄', '⡀', '⢀', '⠠', '⠐', '⠈'],
- 'grow': ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█', '▇', '▆', '▅', '▄', '▃', '▂'],
- 'arrows': ['←', '↖', '↑', '↗', '→', '↘', '↓', '↙'],
- 'star': ['✶', '✷', '✸', '✹', '✺', '✹', '✸', '✷'],
- 'moon': ['🌑', '🌒', '🌓', '🌔', '🌕', '🌖', '🌗', '🌘'],
- 'pulse': ['◜', '◠', '◝', '◞', '◡', '◟'],
- 'brain': ['🧠', '💭', '💡', '✨', '💫', '🌟', '💡', '💭'],
- 'sparkle': ['⁺', '˚', '*', '✧', '✦', '✧', '*', '˚'],
- }
-
- # General waiting faces
- KAWAII_WAITING = [
- "(。◕‿◕。)", "(◕‿◕✿)", "٩(◕‿◕。)۶", "(✿◠‿◠)", "( ˘▽˘)っ",
- "♪(´ε` )", "(◕ᴗ◕✿)", "ヾ(^∇^)", "(≧◡≦)", "(★ω★)",
- ]
-
- # Thinking-specific faces and messages
- KAWAII_THINKING = [
- "(。•́︿•̀。)", "(◔_◔)", "(¬‿¬)", "( •_•)>⌐■-■", "(⌐■_■)",
- "(´・_・`)", "◉_◉", "(°ロ°)", "( ˘⌣˘)♡", "ヽ(>∀<☆)☆",
- "٩(๑❛ᴗ❛๑)۶", "(⊙_⊙)", "(¬_¬)", "( ͡° ͜ʖ ͡°)", "ಠ_ಠ",
- ]
-
- THINKING_VERBS = [
- "pondering", "contemplating", "musing", "cogitating", "ruminating",
- "deliberating", "mulling", "reflecting", "processing", "reasoning",
- "analyzing", "computing", "synthesizing", "formulating", "brainstorming",
- ]
-
- def __init__(self, message: str = "", spinner_type: str = 'dots'):
- self.message = message
- self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots'])
- self.running = False
- self.thread = None
- self.frame_idx = 0
- self.start_time = None
- self.last_line_len = 0
-
- def _animate(self):
- """Animation loop that runs in background thread."""
- while self.running:
- # Check for pause signal (e.g., during sudo password prompt)
- if os.getenv("HERMES_SPINNER_PAUSE"):
- time.sleep(0.1)
- continue
-
- frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
- elapsed = time.time() - self.start_time
-
- # Build the spinner line
- line = f" {frame} {self.message} ({elapsed:.1f}s)"
-
- # Clear previous line and write new one
- clear = '\r' + ' ' * self.last_line_len + '\r'
- print(clear + line, end='', flush=True)
- self.last_line_len = len(line)
-
- self.frame_idx += 1
- time.sleep(0.12) # ~8 FPS animation
-
- def start(self):
- """Start the spinner animation."""
- if self.running:
- return
- self.running = True
- self.start_time = time.time()
- self.thread = threading.Thread(target=self._animate, daemon=True)
- self.thread.start()
-
- def update_text(self, new_message: str):
- """Update the spinner message text while it's running."""
- self.message = new_message
-
- def stop(self, final_message: str = None):
- """Stop the spinner and optionally print a final message."""
- self.running = False
- if self.thread:
- self.thread.join(timeout=0.5)
-
- # Clear the spinner line
- print('\r' + ' ' * (self.last_line_len + 5) + '\r', end='', flush=True)
-
- # Print final message if provided
- if final_message:
- print(f" {final_message}", flush=True)
-
- def __enter__(self):
- self.start()
- return self
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- self.stop()
- return False
-
class AIAgent:
"""
@@ -1238,256 +410,6 @@ class AIAgent:
else:
print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
- # Pools of kawaii faces for random selection
- KAWAII_SEARCH = [
- "♪(´ε` )", "(。◕‿◕。)", "ヾ(^∇^)", "(◕ᴗ◕✿)", "( ˘▽˘)っ",
- "٩(◕‿◕。)۶", "(✿◠‿◠)", "♪~(´ε` )", "(ノ´ヮ`)ノ*:・゚✧", "\(◎o◎)/",
- ]
- KAWAII_READ = [
- "φ(゜▽゜*)♪", "( ˘▽˘)っ", "(⌐■_■)", "٩(。•́‿•̀。)۶", "(◕‿◕✿)",
- "ヾ(@⌒ー⌒@)ノ", "(✧ω✧)", "♪(๑ᴖ◡ᴖ๑)♪", "(≧◡≦)", "( ´ ▽ ` )ノ",
- ]
- KAWAII_TERMINAL = [
- "ヽ(>∀<☆)ノ", "(ノ°∀°)ノ", "٩(^ᴗ^)۶", "ヾ(⌐■_■)ノ♪", "(•̀ᴗ•́)و",
- "┗(^0^)┓", "(`・ω・´)", "\( ̄▽ ̄)/", "(ง •̀_•́)ง", "ヽ(´▽`)/",
- ]
- KAWAII_BROWSER = [
- "(ノ°∀°)ノ", "(☞゚ヮ゚)☞", "( ͡° ͜ʖ ͡°)", "┌( ಠ_ಠ)┘", "(⊙_⊙)?",
- "ヾ(•ω•`)o", "( ̄ω ̄)", "( ˇωˇ )", "(ᵔᴥᵔ)", "\(◎o◎)/",
- ]
- KAWAII_CREATE = [
- "✧*。٩(ˊᗜˋ*)و✧", "(ノ◕ヮ◕)ノ*:・゚✧", "ヽ(>∀<☆)ノ", "٩(♡ε♡)۶", "(◕‿◕)♡",
- "✿◕ ‿ ◕✿", "(*≧▽≦)", "ヾ(^-^)ノ", "(☆▽☆)", "°˖✧◝(⁰▿⁰)◜✧˖°",
- ]
- KAWAII_SKILL = [
- "ヾ(@⌒ー⌒@)ノ", "(๑˃ᴗ˂)ﻭ", "٩(◕‿◕。)۶", "(✿╹◡╹)", "ヽ(・∀・)ノ",
- "(ノ´ヮ`)ノ*:・゚✧", "♪(๑ᴖ◡ᴖ๑)♪", "(◠‿◠)", "٩(ˊᗜˋ*)و", "(^▽^)",
- "ヾ(^∇^)", "(★ω★)/", "٩(。•́‿•̀。)۶", "(◕ᴗ◕✿)", "\(◎o◎)/",
- "(✧ω✧)", "ヽ(>∀<☆)ノ", "( ˘▽˘)っ", "(≧◡≦) ♡", "ヾ( ̄▽ ̄)",
- ]
- KAWAII_THINK = [
- "(っ°Д°;)っ", "(;′⌒`)", "(・_・ヾ", "( ´_ゝ`)", "( ̄ヘ ̄)",
- "(。-`ω´-)", "( ˘︹˘ )", "(¬_¬)", "ヽ(ー_ー )ノ", "(;一_一)",
- ]
- KAWAII_GENERIC = [
- "♪(´ε` )", "(◕‿◕✿)", "ヾ(^∇^)", "٩(◕‿◕。)۶", "(✿◠‿◠)",
- "(ノ´ヮ`)ノ*:・゚✧", "ヽ(>∀<☆)ノ", "(☆▽☆)", "( ˘▽˘)っ", "(≧◡≦)",
- ]
-
- def _get_cute_tool_message(self, tool_name: str, args: dict, duration: float) -> str:
- """
- Generate a clean, aligned tool activity line for CLI quiet mode.
-
- Format: ┊ {emoji} {verb:9} {detail} {duration}
-
- Kawaii faces live in the animated spinner (while the tool runs).
- This completion message replaces the spinner with a permanent log line.
- """
- dur = f"{duration:.1f}s"
-
- def _trunc(s, n=40):
- s = str(s)
- return (s[:n-3] + "...") if len(s) > n else s
-
- def _path(p, n=35):
- p = str(p)
- return ("..." + p[-(n-3):]) if len(p) > n else p
-
- # ── Web ──
- if tool_name == "web_search":
- q = _trunc(args.get("query", ""), 42)
- return f"┊ 🔍 search {q} {dur}"
-
- if tool_name == "web_extract":
- urls = args.get("urls", [])
- if urls:
- url = urls[0] if isinstance(urls, list) else str(urls)
- domain = url.replace("https://", "").replace("http://", "").split("/")[0]
- extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
- return f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}"
- return f"┊ 📄 fetch pages {dur}"
-
- if tool_name == "web_crawl":
- url = args.get("url", "")
- domain = url.replace("https://", "").replace("http://", "").split("/")[0]
- return f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}"
-
- # ── Terminal & Process ──
- if tool_name == "terminal":
- cmd = _trunc(args.get("command", ""), 42)
- return f"┊ 💻 $ {cmd} {dur}"
-
- if tool_name == "process":
- action = args.get("action", "?")
- sid = args.get("session_id", "")[:12]
- labels = {
- "list": "ls processes", "poll": f"poll {sid}",
- "log": f"log {sid}", "wait": f"wait {sid}",
- "kill": f"kill {sid}", "write": f"write {sid}",
- "submit": f"submit {sid}",
- }
- detail = labels.get(action, f"{action} {sid}")
- return f"┊ ⚙️ proc {detail} {dur}"
-
- # ── Files ──
- if tool_name == "read_file":
- return f"┊ 📖 read {_path(args.get('path', ''))} {dur}"
-
- if tool_name == "write_file":
- return f"┊ ✍️ write {_path(args.get('path', ''))} {dur}"
-
- if tool_name == "patch":
- return f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}"
-
- if tool_name == "search_files":
- pattern = _trunc(args.get("pattern", ""), 35)
- target = args.get("target", "content")
- verb = "find" if target == "files" else "grep"
- return f"┊ 🔎 {verb:9} {pattern} {dur}"
-
- # ── Browser ──
- if tool_name == "browser_navigate":
- url = args.get("url", "")
- domain = url.replace("https://", "").replace("http://", "").split("/")[0]
- return f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}"
-
- if tool_name == "browser_snapshot":
- mode = "full" if args.get("full") else "compact"
- return f"┊ 📸 snapshot {mode} {dur}"
-
- if tool_name == "browser_click":
- return f"┊ 👆 click {args.get('ref', '?')} {dur}"
-
- if tool_name == "browser_type":
- text = _trunc(args.get("text", ""), 30)
- return f"┊ ⌨️ type \"{text}\" {dur}"
-
- if tool_name == "browser_scroll":
- d = args.get("direction", "down")
- arrow = {"down": "↓", "up": "↑", "right": "→", "left": "←"}.get(d, "↓")
- return f"┊ {arrow} scroll {d} {dur}"
-
- if tool_name == "browser_back":
- return f"┊ ◀️ back {dur}"
-
- if tool_name == "browser_press":
- return f"┊ ⌨️ press {args.get('key', '?')} {dur}"
-
- if tool_name == "browser_close":
- return f"┊ 🚪 close browser {dur}"
-
- if tool_name == "browser_get_images":
- return f"┊ 🖼️ images extracting {dur}"
-
- if tool_name == "browser_vision":
- return f"┊ 👁️ vision analyzing page {dur}"
-
- # ── Planning ──
- if tool_name == "todo":
- todos_arg = args.get("todos")
- merge = args.get("merge", False)
- if todos_arg is None:
- return f"┊ 📋 plan reading tasks {dur}"
- elif merge:
- return f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}"
- else:
- return f"┊ 📋 plan {len(todos_arg)} task(s) {dur}"
-
- # ── Session Search ──
- if tool_name == "session_search":
- query = _trunc(args.get("query", ""), 35)
- return f"┊ 🔍 recall \"{query}\" {dur}"
-
- # ── Memory ──
- if tool_name == "memory":
- action = args.get("action", "?")
- target = args.get("target", "")
- if action == "add":
- preview = _trunc(args.get("content", ""), 30)
- return f"┊ 🧠 memory +{target}: \"{preview}\" {dur}"
- elif action == "replace":
- snippet = _trunc(args.get("old_text", ""), 20)
- return f"┊ 🧠 memory ~{target}: \"{snippet}\" {dur}"
- elif action == "remove":
- snippet = _trunc(args.get("old_text", ""), 20)
- return f"┊ 🧠 memory -{target}: \"{snippet}\" {dur}"
- elif action == "search_sessions":
- query = _trunc(args.get("content", ""), 30)
- return f"┊ 🧠 recall \"{query}\" {dur}"
- else:
- return f"┊ 🧠 memory {action} {dur}"
-
- # ── Skills ──
- if tool_name == "skills_list":
- return f"┊ 📚 skills list {args.get('category', 'all')} {dur}"
-
- if tool_name == "skill_view":
- return f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}"
-
- # ── Generation & Media ──
- if tool_name == "image_generate":
- return f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}"
-
- if tool_name == "text_to_speech":
- return f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}"
-
- if tool_name == "vision_analyze":
- return f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}"
-
- if tool_name == "mixture_of_agents":
- return f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}"
-
- # ── Messaging & Scheduling ──
- if tool_name == "send_message":
- target = args.get("target", "?")
- msg = _trunc(args.get("message", ""), 25)
- return f"┊ 📨 send {target}: \"{msg}\" {dur}"
-
- if tool_name == "schedule_cronjob":
- name = _trunc(args.get("name", args.get("prompt", "task")), 30)
- return f"┊ ⏰ schedule {name} {dur}"
-
- if tool_name == "list_cronjobs":
- return f"┊ ⏰ jobs listing {dur}"
-
- if tool_name == "remove_cronjob":
- return f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}"
-
- # ── RL Training ──
- if tool_name.startswith("rl_"):
- rl = {
- "rl_list_environments": "list envs",
- "rl_select_environment": f"select {args.get('name', '')}",
- "rl_get_current_config": "get config",
- "rl_edit_config": f"set {args.get('field', '?')}",
- "rl_start_training": "start training",
- "rl_check_status": f"status {args.get('run_id', '?')[:12]}",
- "rl_stop_training": f"stop {args.get('run_id', '?')[:12]}",
- "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
- "rl_list_runs": "list runs",
- "rl_test_inference": "test inference",
- }
- detail = rl.get(tool_name, tool_name.replace("rl_", ""))
- return f"┊ 🧪 rl {detail} {dur}"
-
- # ── Code Execution Sandbox ──
- if tool_name == "execute_code":
- code = args.get("code", "")
- first_line = code.strip().split("\n")[0] if code.strip() else ""
- return f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}"
-
- # ── Subagent Delegation ──
- if tool_name == "delegate_task":
- tasks = args.get("tasks")
- if tasks and isinstance(tasks, list):
- return f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}"
- goal = _trunc(args.get("goal", ""), 35)
- return f"┊ 🔀 delegate {goal} {dur}"
-
- # ── Fallback ──
- preview = _build_tool_preview(tool_name, args) or ""
- return f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}"
-
def _has_content_after_think_block(self, content: str) -> bool:
"""
Check if content has actual text after any blocks.
@@ -2330,7 +1252,7 @@ class AIAgent:
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
- print(f" {self._get_cute_tool_message('todo', function_args, tool_duration)}")
+ print(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
elif function_name == "session_search" and self._session_db:
from tools.session_search_tool import session_search as _session_search
function_result = _session_search(
@@ -2341,7 +1263,7 @@ class AIAgent:
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
- print(f" {self._get_cute_tool_message('session_search', function_args, tool_duration)}")
+ print(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
elif function_name == "memory":
from tools.memory_tool import memory_tool as _memory_tool
function_result = _memory_tool(
@@ -2353,7 +1275,7 @@ class AIAgent:
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
- print(f" {self._get_cute_tool_message('memory', function_args, tool_duration)}")
+ print(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
elif function_name == "clarify":
from tools.clarify_tool import clarify_tool as _clarify_tool
function_result = _clarify_tool(
@@ -2363,7 +1285,7 @@ class AIAgent:
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
- print(f" {self._get_cute_tool_message('clarify', function_args, tool_duration)}")
+ print(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
elif function_name == "delegate_task":
from tools.delegate_tool import delegate_task as _delegate_task
tasks_arg = function_args.get("tasks")
@@ -2378,6 +1300,7 @@ class AIAgent:
spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots')
spinner.start()
self._delegate_spinner = spinner
+ _delegate_result = None
try:
function_result = _delegate_task(
goal=function_args.get("goal"),
@@ -2388,10 +1311,11 @@ class AIAgent:
max_iterations=function_args.get("max_iterations"),
parent_agent=self,
)
+ _delegate_result = function_result
finally:
self._delegate_spinner = None
tool_duration = time.time() - tool_start_time
- cute_msg = self._get_cute_tool_message('delegate_task', function_args, tool_duration)
+ cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
if spinner:
spinner.stop(cute_msg)
elif self.quiet_mode:
@@ -2420,11 +1344,13 @@ class AIAgent:
preview = preview[:27] + "..."
spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots')
spinner.start()
+ _spinner_result = None
try:
function_result = handle_function_call(function_name, function_args, effective_task_id)
+ _spinner_result = function_result
finally:
tool_duration = time.time() - tool_start_time
- cute_msg = self._get_cute_tool_message(function_name, function_args, tool_duration)
+ cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
spinner.stop(cute_msg)
else:
function_result = handle_function_call(function_name, function_args, effective_task_id)
@@ -3187,11 +2113,40 @@ class AIAgent:
if self.verbose_logging:
logging.exception("Detailed error information:")
- # Add error to conversation and try to continue
- messages.append({
- "role": "assistant",
- "content": f"I encountered an error: {error_msg}. Let me try a different approach."
- })
+ # If an assistant message with tool_calls was already appended,
+ # the API expects a role="tool" result for every tool_call_id.
+ # Fill in error results for any that weren't answered yet.
+ pending_handled = False
+ for idx in range(len(messages) - 1, -1, -1):
+ msg = messages[idx]
+ if not isinstance(msg, dict):
+ break
+ if msg.get("role") == "tool":
+ continue
+ if msg.get("role") == "assistant" and msg.get("tool_calls"):
+ answered_ids = {
+ m["tool_call_id"]
+ for m in messages[idx + 1:]
+ if isinstance(m, dict) and m.get("role") == "tool"
+ }
+ for tc in msg["tool_calls"]:
+ if tc["id"] not in answered_ids:
+ messages.append({
+ "role": "tool",
+ "tool_call_id": tc["id"],
+ "content": f"Error executing tool: {error_msg}",
+ })
+ pending_handled = True
+ break
+
+ if not pending_handled:
+ # Error happened before tool processing (e.g. response parsing).
+ # Use a user-role message so the model can see what went wrong
+ # without confusing the API with a fabricated assistant turn.
+ messages.append({
+ "role": "user",
+ "content": f"[System error during processing: {error_msg}]",
+ })
# If we're near the limit, break to avoid infinite loops
if api_call_count >= self.max_iterations - 1:
diff --git a/tools/approval.py b/tools/approval.py
index 2db8424cb7..18f9b67431 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -281,7 +281,12 @@ def check_dangerous_command(command: str, env_type: str,
approval_callback=approval_callback)
if choice == "deny":
- return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."}
+ return {
+ "approved": False,
+ "message": f"BLOCKED: User denied this potentially dangerous command (matched '{description}' pattern). Do NOT retry this command - the user has explicitly rejected it.",
+ "pattern_key": pattern_key,
+ "description": description,
+ }
if choice == "session":
approve_session(session_key, pattern_key)
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 4467b890d3..b76b886bc8 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -51,25 +51,16 @@ import signal
import subprocess
import shutil
import sys
-import asyncio
import tempfile
import threading
import time
import requests
from typing import Dict, Any, Optional, List
from pathlib import Path
-from hermes_constants import OPENROUTER_CHAT_URL
+from agent.auxiliary_client import get_vision_auxiliary_client
logger = logging.getLogger(__name__)
-# Try to import httpx for async LLM calls
-try:
- import httpx
- HTTPX_AVAILABLE = True
-except ImportError:
- HTTPX_AVAILABLE = False
-
-
# ============================================================================
# Configuration
# ============================================================================
@@ -83,8 +74,8 @@ DEFAULT_SESSION_TIMEOUT = 300
# Max tokens for snapshot content before summarization
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
-# Model for task-aware extraction
-EXTRACTION_MODEL = "google/gemini-3-flash-preview"
+# Resolve vision auxiliary client for extraction/vision tasks
+_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client()
# Track active sessions per task
# Now stores tuple of (session_name, browserbase_session_id, cdp_url)
@@ -782,87 +773,49 @@ def _run_browser_command(
return {"success": False, "error": str(e)}
-async def _extract_relevant_content(
+def _extract_relevant_content(
snapshot_text: str,
user_task: Optional[str] = None
) -> str:
+ """Use LLM to extract relevant content from a snapshot based on the user's task.
+
+ Falls back to simple truncation when no auxiliary vision model is configured.
"""
- Use LLM to extract relevant content from a snapshot based on the user's task.
-
- This provides task-aware summarization that preserves meaningful text content
- (paragraphs, prices, descriptions) relevant to what the user is trying to accomplish.
-
- Args:
- snapshot_text: The full snapshot text
- user_task: The user's current task/goal (optional)
-
- Returns:
- Summarized/extracted content
- """
- if not HTTPX_AVAILABLE:
- # Fall back to simple truncation
+ if _aux_vision_client is None or EXTRACTION_MODEL is None:
return _truncate_snapshot(snapshot_text)
-
- # Get API key
- api_key = os.environ.get("OPENROUTER_API_KEY")
- if not api_key:
- return _truncate_snapshot(snapshot_text)
-
- # Build extraction prompt
+
if user_task:
- extraction_prompt = f"""You are a content extractor for a browser automation agent.
-
-The user's task is: {user_task}
-
-Given the following page snapshot (accessibility tree representation), extract and summarize the most relevant information for completing this task. Focus on:
-1. Interactive elements (buttons, links, inputs) that might be needed
-2. Text content relevant to the task (prices, descriptions, headings, important info)
-3. Navigation structure if relevant
-
-Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.
-
-Page Snapshot:
-{snapshot_text}
-
-Provide a concise summary that preserves actionable information and relevant content."""
+ extraction_prompt = (
+ f"You are a content extractor for a browser automation agent.\n\n"
+ f"The user's task is: {user_task}\n\n"
+ f"Given the following page snapshot (accessibility tree representation), "
+ f"extract and summarize the most relevant information for completing this task. Focus on:\n"
+ f"1. Interactive elements (buttons, links, inputs) that might be needed\n"
+ f"2. Text content relevant to the task (prices, descriptions, headings, important info)\n"
+ f"3. Navigation structure if relevant\n\n"
+ f"Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.\n\n"
+ f"Page Snapshot:\n{snapshot_text}\n\n"
+ f"Provide a concise summary that preserves actionable information and relevant content."
+ )
else:
- extraction_prompt = f"""Summarize this page snapshot, preserving:
-1. All interactive elements with their ref IDs (like [ref=e5])
-2. Key text content and headings
-3. Important information visible on the page
-
-Page Snapshot:
-{snapshot_text}
-
-Provide a concise summary focused on interactive elements and key content."""
+ extraction_prompt = (
+ f"Summarize this page snapshot, preserving:\n"
+ f"1. All interactive elements with their ref IDs (like [ref=e5])\n"
+ f"2. Key text content and headings\n"
+ f"3. Important information visible on the page\n\n"
+ f"Page Snapshot:\n{snapshot_text}\n\n"
+ f"Provide a concise summary focused on interactive elements and key content."
+ )
try:
- async with httpx.AsyncClient(timeout=30.0) as client:
- response = await client.post(
- OPENROUTER_CHAT_URL,
- headers={
- "Authorization": f"Bearer {api_key}",
- "Content-Type": "application/json"
- },
- json={
- "model": EXTRACTION_MODEL,
- "messages": [
- {"role": "user", "content": extraction_prompt}
- ],
- "max_tokens": 4000,
- "temperature": 0.1
- }
- )
-
- if response.status_code == 200:
- result = response.json()
- return result["choices"][0]["message"]["content"]
- else:
- # Fall back to truncation on API error
- return _truncate_snapshot(snapshot_text)
-
+ response = _aux_vision_client.chat.completions.create(
+ model=EXTRACTION_MODEL,
+ messages=[{"role": "user", "content": extraction_prompt}],
+ max_tokens=4000,
+ temperature=0.1,
+ )
+ return response.choices[0].message.content
except Exception:
- # Fall back to truncation on any error
return _truncate_snapshot(snapshot_text)
@@ -991,16 +944,7 @@ def browser_snapshot(
# Check if snapshot needs summarization
if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD and user_task:
- # Run async extraction
- try:
- loop = asyncio.get_event_loop()
- except RuntimeError:
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
-
- snapshot_text = loop.run_until_complete(
- _extract_relevant_content(snapshot_text, user_task)
- )
+ snapshot_text = _extract_relevant_content(snapshot_text, user_task)
elif len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
snapshot_text = _truncate_snapshot(snapshot_text)
@@ -1286,12 +1230,12 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
effective_task_id = task_id or "default"
- # Check for OpenRouter API key
- api_key = os.environ.get("OPENROUTER_API_KEY")
- if not api_key:
+ # Check auxiliary vision client
+ if _aux_vision_client is None or EXTRACTION_MODEL is None:
return json.dumps({
"success": False,
- "error": "OPENROUTER_API_KEY not set. Vision analysis requires this API key."
+ "error": "Browser vision unavailable: no auxiliary vision model configured. "
+ "Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
}, ensure_ascii=False)
# Create a temporary file for the screenshot
@@ -1325,110 +1269,36 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
image_base64 = base64.b64encode(image_data).decode("ascii")
data_url = f"data:image/png;base64,{image_base64}"
- # Prepare the vision prompt
- vision_prompt = f"""You are analyzing a screenshot of a web browser.
+ vision_prompt = (
+ f"You are analyzing a screenshot of a web browser.\n\n"
+ f"User's question: {question}\n\n"
+ f"Provide a detailed and helpful answer based on what you see in the screenshot. "
+ f"If there are interactive elements, describe them. If there are verification challenges "
+ f"or CAPTCHAs, describe what type they are and what action might be needed. "
+ f"Focus on answering the user's specific question."
+ )
-User's question: {question}
-
-Provide a detailed and helpful answer based on what you see in the screenshot.
-If there are interactive elements, describe them. If there are verification challenges
-or CAPTCHAs, describe what type they are and what action might be needed.
-Focus on answering the user's specific question."""
-
- # Call OpenRouter/Gemini for vision analysis
- if HTTPX_AVAILABLE:
- import asyncio
-
- async def analyze_screenshot():
- async with httpx.AsyncClient(timeout=60.0) as client:
- response = await client.post(
- OPENROUTER_CHAT_URL,
- headers={
- "Authorization": f"Bearer {api_key}",
- "Content-Type": "application/json"
- },
- json={
- "model": "google/gemini-3-flash-preview",
- "messages": [
- {
- "role": "user",
- "content": [
- {"type": "text", "text": vision_prompt},
- {
- "type": "image_url",
- "image_url": {"url": data_url}
- }
- ]
- }
- ],
- "max_tokens": 2000,
- "temperature": 0.1
- }
- )
-
- if response.status_code != 200:
- return {
- "success": False,
- "error": f"Vision API error: {response.status_code} - {response.text[:200]}"
- }
-
- result_data = response.json()
- analysis = result_data["choices"][0]["message"]["content"]
- return {
- "success": True,
- "analysis": analysis
- }
-
- # Run the async function
- try:
- loop = asyncio.get_event_loop()
- except RuntimeError:
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
-
- vision_result = loop.run_until_complete(analyze_screenshot())
- return json.dumps(vision_result, ensure_ascii=False)
-
- else:
- # Fallback: use synchronous requests
- response = requests.post(
- OPENROUTER_CHAT_URL,
- headers={
- "Authorization": f"Bearer {api_key}",
- "Content-Type": "application/json"
- },
- json={
- "model": "google/gemini-3-flash-preview",
- "messages": [
- {
- "role": "user",
- "content": [
- {"type": "text", "text": vision_prompt},
- {
- "type": "image_url",
- "image_url": {"url": data_url}
- }
- ]
- }
+ # Use the sync auxiliary vision client directly
+ response = _aux_vision_client.chat.completions.create(
+ model=EXTRACTION_MODEL,
+ messages=[
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": vision_prompt},
+ {"type": "image_url", "image_url": {"url": data_url}},
],
- "max_tokens": 2000,
- "temperature": 0.1
- },
- timeout=60
- )
-
- if response.status_code != 200:
- return json.dumps({
- "success": False,
- "error": f"Vision API error: {response.status_code} - {response.text[:200]}"
- }, ensure_ascii=False)
-
- result_data = response.json()
- analysis = result_data["choices"][0]["message"]["content"]
- return json.dumps({
- "success": True,
- "analysis": analysis
- }, ensure_ascii=False)
+ }
+ ],
+ max_tokens=2000,
+ temperature=0.1,
+ )
+
+ analysis = response.choices[0].message.content
+ return json.dumps({
+ "success": True,
+ "analysis": analysis,
+ }, ensure_ascii=False)
except Exception as e:
return json.dumps({
diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py
index 07c39989e8..16508e9762 100644
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -22,9 +22,19 @@ import os
import logging
from typing import Dict, Any, List, Optional
-from tools.openrouter_client import get_async_client as _get_client
+from openai import AsyncOpenAI, OpenAI
-SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
+from agent.auxiliary_client import get_text_auxiliary_client
+
+# Resolve the auxiliary client at import time so we have the model slug.
+# We build an AsyncOpenAI from the same credentials for async summarization.
+_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
+_async_aux_client: AsyncOpenAI | None = None
+if _aux_client is not None:
+ _async_aux_client = AsyncOpenAI(
+ api_key=_aux_client.api_key,
+ base_url=str(_aux_client.base_url),
+ )
MAX_SESSION_CHARS = 100_000
MAX_SUMMARY_TOKENS = 2000
@@ -126,11 +136,15 @@ async def _summarize_session(
f"Summarize this conversation with focus on: {query}"
)
+ if _async_aux_client is None or _SUMMARIZER_MODEL is None:
+ logging.warning("No auxiliary model available for session summarization")
+ return None
+
max_retries = 3
for attempt in range(max_retries):
try:
- response = await _get_client().chat.completions.create(
- model=SUMMARIZER_MODEL,
+ response = await _async_aux_client.chat.completions.create(
+ model=_SUMMARIZER_MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
@@ -252,8 +266,8 @@ def session_search(
def check_session_search_requirements() -> bool:
- """Requires SQLite state database and OpenRouter API key."""
- if not os.getenv("OPENROUTER_API_KEY"):
+ """Requires SQLite state database and an auxiliary text model."""
+ if _async_aux_client is None:
return False
try:
from hermes_state import DEFAULT_DB_PATH
@@ -316,5 +330,4 @@ registry.register(
limit=args.get("limit", 3),
db=kw.get("db")),
check_fn=check_session_search_requirements,
- requires_env=["OPENROUTER_API_KEY"],
)
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 6b95d185db..09d1ff31dc 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -359,7 +359,6 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p
# Global state for environment lifecycle management
_active_environments: Dict[str, Any] = {}
-_task_workdirs: Dict[str, str] = {} # Maps task_id to working directory
_last_activity: Dict[str, float] = {}
_env_lock = threading.Lock()
_creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation
@@ -530,7 +529,6 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
if current_time - last_time > lifetime_seconds:
env = _active_environments.pop(task_id, None)
_last_activity.pop(task_id, None)
- _task_workdirs.pop(task_id, None)
if env is not None:
envs_to_stop.append((task_id, env))
@@ -609,7 +607,7 @@ def get_active_environments_info() -> Dict[str, Any]:
info = {
"count": len(_active_environments),
"task_ids": list(_active_environments.keys()),
- "workdirs": dict(_task_workdirs),
+ "workdirs": {},
}
# Calculate total disk usage
@@ -632,7 +630,7 @@ def get_active_environments_info() -> Dict[str, Any]:
def cleanup_all_environments():
"""Clean up ALL active environments. Use with caution."""
- global _active_environments, _last_activity, _task_workdirs
+ global _active_environments, _last_activity
task_ids = list(_active_environments.keys())
cleaned = 0
@@ -661,7 +659,7 @@ def cleanup_all_environments():
def cleanup_vm(task_id: str):
"""Manually clean up a specific environment by task_id."""
- global _active_environments, _last_activity, _task_workdirs
+ global _active_environments, _last_activity
# Remove from tracking dicts while holding the lock, but defer the
# actual (potentially slow) env.cleanup() call to outside the lock
@@ -669,7 +667,6 @@ def cleanup_vm(task_id: str):
env = None
with _env_lock:
env = _active_environments.pop(task_id, None)
- _task_workdirs.pop(task_id, None)
_last_activity.pop(task_id, None)
# Clean up per-task creation lock
@@ -782,17 +779,6 @@ def terminal_tool(
default_timeout = config["timeout"]
effective_timeout = timeout or default_timeout
- # For local environment in batch mode, create a unique subdirectory per task
- # This prevents parallel tasks from overwriting each other's files
- # In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories
- if env_type == "local" and not os.getenv("HERMES_QUIET"):
- with _env_lock:
- if effective_task_id not in _task_workdirs:
- task_workdir = Path(cwd) / f"hermes-{effective_task_id}-{uuid.uuid4().hex[:8]}"
- task_workdir.mkdir(parents=True, exist_ok=True)
- _task_workdirs[effective_task_id] = str(task_workdir)
- cwd = _task_workdirs[effective_task_id]
-
# Start cleanup thread
_start_cleanup_thread()
@@ -874,11 +860,16 @@ def terminal_tool(
"description": approval.get("description", "dangerous command"),
"pattern_key": approval.get("pattern_key", ""),
}, ensure_ascii=False)
- # Command was blocked - return informative message
+ # Command was blocked - include the pattern category so the caller knows why
+ desc = approval.get("description", "potentially dangerous operation")
+ fallback_msg = (
+ f"Command denied: matches '{desc}' pattern. "
+ "Use the approval prompt to allow it, or rephrase the command."
+ )
return json.dumps({
"output": "",
"exit_code": -1,
- "error": approval.get("message", "Command denied - potentially dangerous operation"),
+ "error": approval.get("message", fallback_msg),
"status": "blocked"
}, ensure_ascii=False)
@@ -996,11 +987,17 @@ def terminal_tool(
# Add helpful message for sudo failures in messaging context
output = _handle_sudo_failure(output, env_type)
- # Truncate output if too long
+ # Truncate output if too long, keeping both head and tail
MAX_OUTPUT_CHARS = 50000
if len(output) > MAX_OUTPUT_CHARS:
- truncated_notice = f"\n\n... [OUTPUT TRUNCATED - showing last {MAX_OUTPUT_CHARS} chars of {len(output)} total] ..."
- output = truncated_notice + output[-MAX_OUTPUT_CHARS:]
+ head_chars = int(MAX_OUTPUT_CHARS * 0.4) # 40% head (error messages often appear early)
+ tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output)
+ omitted = len(output) - head_chars - tail_chars
+ truncated_notice = (
+ f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
+ f"out of {len(output)} total] ...\n\n"
+ )
+ output = output[:head_chars] + truncated_notice + output[-tail_chars:]
return json.dumps({
"output": output.strip() if output else "",
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index 7750485d5d..90c0d430c2 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -36,13 +36,20 @@ import base64
from pathlib import Path
from typing import Dict, Any, Optional
import httpx
-from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key
+from openai import AsyncOpenAI
+from agent.auxiliary_client import get_vision_auxiliary_client
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
-# Configuration for vision processing
-DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview"
+# Resolve vision auxiliary client at module level; build an async wrapper.
+_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
+_aux_async_client: AsyncOpenAI | None = None
+if _aux_sync_client is not None:
+ _aux_async_client = AsyncOpenAI(
+ api_key=_aux_sync_client.api_key,
+ base_url=str(_aux_sync_client.base_url),
+ )
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
@@ -230,9 +237,13 @@ async def vision_analyze_tool(
logger.info("Analyzing image: %s", image_url[:60])
logger.info("User prompt: %s", user_prompt[:100])
- # Check API key availability
- if not os.getenv("OPENROUTER_API_KEY"):
- raise ValueError("OPENROUTER_API_KEY environment variable not set")
+ # Check auxiliary vision client availability
+ if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
+ return json.dumps({
+ "success": False,
+ "analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
+ "Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
+ }, indent=2, ensure_ascii=False)
# Determine if this is a local file path or a remote URL
local_path = Path(image_url)
@@ -291,18 +302,12 @@ async def vision_analyze_tool(
logger.info("Processing image with %s...", model)
- # Call the vision API with reasoning enabled
- response = await _get_openrouter_client().chat.completions.create(
+ # Call the vision API
+ response = await _aux_async_client.chat.completions.create(
model=model,
messages=messages,
- temperature=0.1, # Low temperature for consistent analysis
- max_tokens=2000, # Generous limit for detailed analysis
- extra_body={
- "reasoning": {
- "enabled": True,
- "effort": "xhigh"
- }
- }
+ temperature=0.1,
+ max_tokens=2000,
)
# Extract the analysis
@@ -353,13 +358,8 @@ async def vision_analyze_tool(
def check_vision_requirements() -> bool:
- """
- Check if all requirements for vision tools are met.
-
- Returns:
- bool: True if requirements are met, False otherwise
- """
- return check_openrouter_api_key()
+ """Check if an auxiliary vision model is available."""
+ return _aux_async_client is not None
def get_debug_session_info() -> Dict[str, Any]:
@@ -379,16 +379,15 @@ if __name__ == "__main__":
print("👁️ Vision Tools Module")
print("=" * 40)
- # Check if API key is available
- api_available = check_openrouter_api_key()
+ # Check if vision model is available
+ api_available = check_vision_requirements()
if not api_available:
- print("❌ OPENROUTER_API_KEY environment variable not set")
- print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
- print("Get API key at: https://openrouter.ai/")
+ print("❌ No auxiliary vision model available")
+ print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
exit(1)
else:
- print("✅ OpenRouter API key found")
+ print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
print("🛠️ Vision tools ready for use!")
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
@@ -455,7 +454,8 @@ def _handle_vision_analyze(args, **kw):
image_url = args.get("image_url", "")
question = args.get("question", "")
full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
- return vision_analyze_tool(image_url, full_prompt, "google/gemini-3-flash-preview")
+ model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
+ return vision_analyze_tool(image_url, full_prompt, model)
registry.register(
@@ -464,6 +464,5 @@ registry.register(
schema=VISION_ANALYZE_SCHEMA,
handler=_handle_vision_analyze,
check_fn=check_vision_requirements,
- requires_env=["OPENROUTER_API_KEY"],
is_async=True,
)
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 5809a26faf..868abb9420 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -47,7 +47,8 @@ import re
import asyncio
from typing import List, Dict, Any, Optional
from firecrawl import Firecrawl
-from tools.openrouter_client import get_async_client as _get_openrouter_client
+from openai import AsyncOpenAI
+from agent.auxiliary_client import get_text_auxiliary_client
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
@@ -64,9 +65,17 @@ def _get_firecrawl_client():
_firecrawl_client = Firecrawl(api_key=api_key)
return _firecrawl_client
-DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
+# Resolve auxiliary text client at module level; build an async wrapper.
+_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
+_aux_async_client: AsyncOpenAI | None = None
+if _aux_sync_client is not None:
+ _aux_async_client = AsyncOpenAI(
+ api_key=_aux_sync_client.api_key,
+ base_url=str(_aux_sync_client.base_url),
+ )
+
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
@@ -223,7 +232,10 @@ Create a markdown summary that captures all key information in a well-organized,
for attempt in range(max_retries):
try:
- response = await _get_openrouter_client().chat.completions.create(
+ if _aux_async_client is None:
+ logger.warning("No auxiliary model available for web content processing")
+ return None
+ response = await _aux_async_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
@@ -231,12 +243,6 @@ Create a markdown summary that captures all key information in a well-organized,
],
temperature=0.1,
max_tokens=max_tokens,
- extra_body={
- "reasoning": {
- "enabled": True,
- "effort": "xhigh"
- }
- }
)
return response.choices[0].message.content.strip()
except Exception as api_error:
@@ -342,7 +348,14 @@ Synthesize these into ONE cohesive, comprehensive summary that:
Create a single, unified markdown summary."""
try:
- response = await _get_openrouter_client().chat.completions.create(
+ if _aux_async_client is None:
+ logger.warning("No auxiliary model for synthesis, concatenating summaries")
+ fallback = "\n\n".join(summaries)
+ if len(fallback) > max_output_size:
+ fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
+ return fallback
+
+ response = await _aux_async_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
@@ -350,12 +363,6 @@ Create a single, unified markdown summary."""
],
temperature=0.1,
max_tokens=4000,
- extra_body={
- "reasoning": {
- "enabled": True,
- "effort": "xhigh"
- }
- }
)
final_summary = response.choices[0].message.content.strip()
@@ -677,8 +684,8 @@ async def web_extract_tool(
debug_call_data["pages_extracted"] = pages_extracted
debug_call_data["original_response_size"] = len(json.dumps(response))
- # Process each result with LLM if enabled
- if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
+ # Process each result with LLM if enabled and auxiliary client is available
+ if use_llm_processing and _aux_async_client is not None:
logger.info("Processing extracted content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing")
@@ -744,8 +751,8 @@ async def web_extract_tool(
else:
logger.warning("%s (no content to process)", url)
else:
- if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
- logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
+ if use_llm_processing and _aux_async_client is None:
+ logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of extracted pages for debugging (original behavior)
@@ -973,8 +980,8 @@ async def web_crawl_tool(
debug_call_data["pages_crawled"] = pages_crawled
debug_call_data["original_response_size"] = len(json.dumps(response))
- # Process each result with LLM if enabled
- if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
+ # Process each result with LLM if enabled and auxiliary client is available
+ if use_llm_processing and _aux_async_client is not None:
logger.info("Processing crawled content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing")
@@ -1040,8 +1047,8 @@ async def web_crawl_tool(
else:
logger.warning("%s (no content to process)", page_url)
else:
- if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
- logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
+ if use_llm_processing and _aux_async_client is None:
+ logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of crawled pages for debugging (original behavior)
@@ -1096,14 +1103,9 @@ def check_firecrawl_api_key() -> bool:
return bool(os.getenv("FIRECRAWL_API_KEY"))
-def check_nous_api_key() -> bool:
- """
- Check if the Nous Research API key is available in environment variables.
-
- Returns:
- bool: True if API key is set, False otherwise
- """
- return bool(os.getenv("OPENROUTER_API_KEY"))
+def check_auxiliary_model() -> bool:
+ """Check if an auxiliary text model is available for LLM content processing."""
+ return _aux_async_client is not None
def get_debug_session_info() -> Dict[str, Any]:
@@ -1120,7 +1122,7 @@ if __name__ == "__main__":
# Check if API keys are available
firecrawl_available = check_firecrawl_api_key()
- nous_available = check_nous_api_key()
+ nous_available = check_auxiliary_model()
if not firecrawl_available:
print("❌ FIRECRAWL_API_KEY environment variable not set")
@@ -1130,12 +1132,11 @@ if __name__ == "__main__":
print("✅ Firecrawl API key found")
if not nous_available:
- print("❌ OPENROUTER_API_KEY environment variable not set")
- print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
- print("Get API key at: https://inference-api.nousresearch.com/")
- print("⚠️ Without Nous API key, LLM content processing will be disabled")
+ print("❌ No auxiliary model available for LLM content processing")
+ print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY")
+ print("⚠️ Without an auxiliary model, LLM content processing will be disabled")
else:
- print("✅ Nous Research API key found")
+ print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
if not firecrawl_available:
exit(1)
@@ -1143,7 +1144,7 @@ if __name__ == "__main__":
print("🛠️ Web tools ready for use!")
if nous_available:
- print("🧠 LLM content processing available with Gemini 3 Flash Preview via OpenRouter")
+ print(f"🧠 LLM content processing available with {DEFAULT_SUMMARIZER_MODEL}")
print(f" Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars")
# Show debug mode status