Hermes Agent UX Improvements

This commit is contained in:
teknium1 2026-02-22 02:16:11 -08:00
parent b1f55e3ee5
commit ededaaa874
23 changed files with 945 additions and 1545 deletions

128
agent/auxiliary_client.py Normal file
View file

@ -0,0 +1,128 @@
"""Shared auxiliary OpenAI client for cheap/fast side tasks.
Provides a single resolution chain so every consumer (context compression,
session search, web extraction, vision analysis, browser vision) picks up
the best available backend without duplicating fallback logic.
Resolution order for text tasks:
1. OpenRouter (OPENROUTER_API_KEY)
2. Nous Portal (~/.hermes/auth.json active provider)
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
4. None
Resolution order for vision/multimodal tasks:
1. OpenRouter
2. Nous Portal
3. None (custom endpoints can't substitute for Gemini multimodal)
"""
import json
import logging
import os
from pathlib import Path
from typing import Optional, Tuple
from openai import OpenAI
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
# Default auxiliary models per provider
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
_NOUS_MODEL = "gemini-3-flash"
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
def _read_nous_auth() -> Optional[dict]:
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
Returns the provider state dict if Nous is active with tokens,
otherwise None.
"""
try:
if not _AUTH_JSON_PATH.is_file():
return None
data = json.loads(_AUTH_JSON_PATH.read_text())
if data.get("active_provider") != "nous":
return None
provider = data.get("providers", {}).get("nous", {})
# Must have at least an access_token or agent_key
if not provider.get("agent_key") and not provider.get("access_token"):
return None
return provider
except Exception as exc:
logger.debug("Could not read Nous auth: %s", exc)
return None
def _nous_api_key(provider: dict) -> str:
"""Extract the best API key from a Nous provider state dict."""
return provider.get("agent_key") or provider.get("access_token", "")
def _nous_base_url() -> str:
"""Resolve the Nous inference base URL from env or default."""
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
# ── Public API ──────────────────────────────────────────────────────────────
def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
"""Return (client, model_slug) for text-only auxiliary tasks.
Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
"""
# 1. OpenRouter
or_key = os.getenv("OPENROUTER_API_KEY")
if or_key:
logger.debug("Auxiliary text client: OpenRouter")
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
# 2. Nous Portal
nous = _read_nous_auth()
if nous:
logger.debug("Auxiliary text client: Nous Portal")
return (
OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
_NOUS_MODEL,
)
# 3. Custom endpoint (both base URL and key must be set)
custom_base = os.getenv("OPENAI_BASE_URL")
custom_key = os.getenv("OPENAI_API_KEY")
if custom_base and custom_key:
model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
logger.debug("Auxiliary text client: custom endpoint (%s)", model)
return OpenAI(api_key=custom_key, base_url=custom_base), model
# 4. Nothing available
logger.debug("Auxiliary text client: none available")
return None, None
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
"""Return (client, model_slug) for vision/multimodal auxiliary tasks.
Only OpenRouter and Nous Portal qualify custom endpoints cannot
substitute for Gemini multimodal.
"""
# 1. OpenRouter
or_key = os.getenv("OPENROUTER_API_KEY")
if or_key:
logger.debug("Auxiliary vision client: OpenRouter")
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL
# 2. Nous Portal
nous = _read_nous_auth()
if nous:
logger.debug("Auxiliary vision client: Nous Portal")
return (
OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
_NOUS_MODEL,
)
# 3. Nothing suitable
logger.debug("Auxiliary vision client: none available")
return None, None

View file

@ -9,13 +9,11 @@ import logging
import os
from typing import Any, Dict, List
from openai import OpenAI
from agent.auxiliary_client import get_text_auxiliary_client
from agent.model_metadata import (
get_model_context_length,
estimate_messages_tokens_rough,
)
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
@ -31,7 +29,6 @@ class ContextCompressor:
self,
model: str,
threshold_percent: float = 0.85,
summary_model: str = "google/gemini-3-flash-preview",
protect_first_n: int = 3,
protect_last_n: int = 4,
summary_target_tokens: int = 500,
@ -39,7 +36,6 @@ class ContextCompressor:
):
self.model = model
self.threshold_percent = threshold_percent
self.summary_model = summary_model
self.protect_first_n = protect_first_n
self.protect_last_n = protect_last_n
self.summary_target_tokens = summary_target_tokens
@ -53,8 +49,7 @@ class ContextCompressor:
self.last_completion_tokens = 0
self.last_total_tokens = 0
api_key = os.getenv("OPENROUTER_API_KEY", "")
self.client = OpenAI(api_key=api_key, base_url=OPENROUTER_BASE_URL) if api_key else None
self.client, self.summary_model = get_text_auxiliary_client()
def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response."""
@ -155,6 +150,26 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
if not self.quiet_mode:
print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)")
print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})")
# Truncation fallback when no auxiliary model is available
if self.client is None:
print("⚠️ Context compression: no auxiliary model available. Falling back to message truncation.")
# Keep system message(s) at the front and the protected tail;
# simply drop the oldest non-system messages until under threshold.
kept = []
for msg in messages:
if msg.get("role") == "system":
kept.append(msg.copy())
else:
break
tail = messages[-self.protect_last_n:]
kept.extend(m.copy() for m in tail)
self.compression_count += 1
if not self.quiet_mode:
print(f" ✂️ Truncated: {len(messages)}{len(kept)} messages (dropped middle turns)")
return kept
if not self.quiet_mode:
print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)")
summary = self._generate_summary(turns_to_summarize)

View file

@ -4,11 +4,16 @@ Pure display functions and classes with no AIAgent dependency.
Used by AIAgent._execute_tool_calls for CLI feedback.
"""
import json
import os
import random
import threading
import time
# ANSI escape codes for coloring tool failure indicators
_RED = "\033[31m"
_RESET = "\033[0m"
# =========================================================================
# Tool preview (one-line summary of a tool call's primary argument)
@ -242,12 +247,46 @@ KAWAII_GENERIC = [
# Cute tool message (completion line that replaces the spinner)
# =========================================================================
def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
"""Inspect a tool result string for signs of failure.
Returns ``(is_failure, suffix)`` where *suffix* is an informational tag
like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic
failures. On success, returns ``(False, "")``.
"""
if result is None:
return False, ""
if tool_name == "terminal":
try:
data = json.loads(result)
exit_code = data.get("exit_code")
if exit_code is not None and exit_code != 0:
return True, f" [exit {exit_code}]"
except (json.JSONDecodeError, TypeError, AttributeError):
pass
return False, ""
# Generic heuristic for non-terminal tools
lower = result[:500].lower()
if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
return True, " [error]"
return False, ""
def get_cute_tool_message(
tool_name: str, args: dict, duration: float, result: str | None = None,
) -> str:
"""Generate a formatted tool completion line for CLI quiet mode.
Format: ``| {emoji} {verb:9} {detail} {duration}``
When *result* is provided the line is checked for failure indicators.
Failed tool calls get a red prefix and an informational suffix.
"""
dur = f"{duration:.1f}s"
is_failure, failure_suffix = _detect_tool_failure(tool_name, result)
def _trunc(s, n=40):
s = str(s)
@ -257,105 +296,111 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
p = str(p)
return ("..." + p[-(n-3):]) if len(p) > n else p
def _wrap(line: str) -> str:
"""Apply red coloring and failure suffix when the tool failed."""
if not is_failure:
return line
return f"{_RED}{line}{failure_suffix}{_RESET}"
if tool_name == "web_search":
return f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}"
return _wrap(f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}")
if tool_name == "web_extract":
urls = args.get("urls", [])
if urls:
url = urls[0] if isinstance(urls, list) else str(urls)
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
return f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}"
return f"┊ 📄 fetch pages {dur}"
return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}")
return _wrap(f"┊ 📄 fetch pages {dur}")
if tool_name == "web_crawl":
url = args.get("url", "")
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
return f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}"
return _wrap(f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}")
if tool_name == "terminal":
return f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}"
return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}")
if tool_name == "process":
action = args.get("action", "?")
sid = args.get("session_id", "")[:12]
labels = {"list": "ls processes", "poll": f"poll {sid}", "log": f"log {sid}",
"wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"}
return f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}"
return _wrap(f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}")
if tool_name == "read_file":
return f"┊ 📖 read {_path(args.get('path', ''))} {dur}"
return _wrap(f"┊ 📖 read {_path(args.get('path', ''))} {dur}")
if tool_name == "write_file":
return f"┊ ✍️ write {_path(args.get('path', ''))} {dur}"
return _wrap(f"┊ ✍️ write {_path(args.get('path', ''))} {dur}")
if tool_name == "patch":
return f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}"
return _wrap(f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}")
if tool_name == "search_files":
pattern = _trunc(args.get("pattern", ""), 35)
target = args.get("target", "content")
verb = "find" if target == "files" else "grep"
return f"┊ 🔎 {verb:9} {pattern} {dur}"
return _wrap(f"┊ 🔎 {verb:9} {pattern} {dur}")
if tool_name == "browser_navigate":
url = args.get("url", "")
domain = url.replace("https://", "").replace("http://", "").split("/")[0]
return f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}"
return _wrap(f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}")
if tool_name == "browser_snapshot":
mode = "full" if args.get("full") else "compact"
return f"┊ 📸 snapshot {mode} {dur}"
return _wrap(f"┊ 📸 snapshot {mode} {dur}")
if tool_name == "browser_click":
return f"┊ 👆 click {args.get('ref', '?')} {dur}"
return _wrap(f"┊ 👆 click {args.get('ref', '?')} {dur}")
if tool_name == "browser_type":
return f"┊ ⌨️ type \"{_trunc(args.get('text', ''), 30)}\" {dur}"
return _wrap(f"┊ ⌨️ type \"{_trunc(args.get('text', ''), 30)}\" {dur}")
if tool_name == "browser_scroll":
d = args.get("direction", "down")
arrow = {"down": "", "up": "", "right": "", "left": ""}.get(d, "")
return f"{arrow} scroll {d} {dur}"
return _wrap(f"{arrow} scroll {d} {dur}")
if tool_name == "browser_back":
return f"┊ ◀️ back {dur}"
return _wrap(f"┊ ◀️ back {dur}")
if tool_name == "browser_press":
return f"┊ ⌨️ press {args.get('key', '?')} {dur}"
return _wrap(f"┊ ⌨️ press {args.get('key', '?')} {dur}")
if tool_name == "browser_close":
return f"┊ 🚪 close browser {dur}"
return _wrap(f"┊ 🚪 close browser {dur}")
if tool_name == "browser_get_images":
return f"┊ 🖼️ images extracting {dur}"
return _wrap(f"┊ 🖼️ images extracting {dur}")
if tool_name == "browser_vision":
return f"┊ 👁️ vision analyzing page {dur}"
return _wrap(f"┊ 👁️ vision analyzing page {dur}")
if tool_name == "todo":
todos_arg = args.get("todos")
merge = args.get("merge", False)
if todos_arg is None:
return f"┊ 📋 plan reading tasks {dur}"
return _wrap(f"┊ 📋 plan reading tasks {dur}")
elif merge:
return f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}"
return _wrap(f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}")
else:
return f"┊ 📋 plan {len(todos_arg)} task(s) {dur}"
return _wrap(f"┊ 📋 plan {len(todos_arg)} task(s) {dur}")
if tool_name == "session_search":
return f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}"
return _wrap(f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}")
if tool_name == "memory":
action = args.get("action", "?")
target = args.get("target", "")
if action == "add":
return f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}"
return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}")
elif action == "replace":
return f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}"
return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
elif action == "remove":
return f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}"
return f"┊ 🧠 memory {action} {dur}"
return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}")
return _wrap(f"┊ 🧠 memory {action} {dur}")
if tool_name == "skills_list":
return f"┊ 📚 skills list {args.get('category', 'all')} {dur}"
return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}")
if tool_name == "skill_view":
return f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}"
return _wrap(f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}")
if tool_name == "image_generate":
return f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}"
return _wrap(f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}")
if tool_name == "text_to_speech":
return f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}"
return _wrap(f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}")
if tool_name == "vision_analyze":
return f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}"
return _wrap(f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}")
if tool_name == "mixture_of_agents":
return f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}"
return _wrap(f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}")
if tool_name == "send_message":
return f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}"
return _wrap(f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}")
if tool_name == "schedule_cronjob":
return f"┊ ⏰ schedule {_trunc(args.get('name', args.get('prompt', 'task')), 30)} {dur}"
return _wrap(f"┊ ⏰ schedule {_trunc(args.get('name', args.get('prompt', 'task')), 30)} {dur}")
if tool_name == "list_cronjobs":
return f"┊ ⏰ jobs listing {dur}"
return _wrap(f"┊ ⏰ jobs listing {dur}")
if tool_name == "remove_cronjob":
return f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}"
return _wrap(f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}")
if tool_name.startswith("rl_"):
rl = {
"rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}",
@ -364,16 +409,16 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str:
"rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}",
"rl_list_runs": "list runs", "rl_test_inference": "test inference",
}
return f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}"
return _wrap(f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}")
if tool_name == "execute_code":
code = args.get("code", "")
first_line = code.strip().split("\n")[0] if code.strip() else ""
return f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}"
return _wrap(f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}")
if tool_name == "delegate_task":
tasks = args.get("tasks")
if tasks and isinstance(tasks, list):
return f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}"
return f"┊ 🔀 delegate {_trunc(args.get('goal', ''), 35)} {dur}"
return _wrap(f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}")
return _wrap(f"┊ 🔀 delegate {_trunc(args.get('goal', ''), 35)} {dur}")
preview = build_tool_preview(tool_name, args) or ""
return f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}"
return _wrap(f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}")

3
cli.py
View file

@ -339,9 +339,6 @@ def _cprint(text: str):
"""
_pt_print(_PT_ANSI(text))
# Version string
VERSION = "v1.0.0"
# ASCII Art - HERMES-AGENT logo (full width, single line - requires ~95 char terminal)
HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/]
[bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/]

View file

@ -8,6 +8,7 @@ Handles loading and validating configuration for:
- Delivery preferences
"""
import logging
import os
import json
from pathlib import Path
@ -15,6 +16,8 @@ from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any
from enum import Enum
logger = logging.getLogger(__name__)
class Platform(Enum):
"""Supported messaging platforms."""
@ -264,6 +267,40 @@ def load_gateway_config() -> GatewayConfig:
# Override with environment variables
_apply_env_overrides(config)
# --- Validate loaded values ---
policy = config.default_reset_policy
if not (0 <= policy.at_hour <= 23):
logger.warning(
"Invalid at_hour=%s (must be 0-23). Using default 4.", policy.at_hour
)
policy.at_hour = 4
if policy.idle_minutes is None or policy.idle_minutes <= 0:
logger.warning(
"Invalid idle_minutes=%s (must be positive). Using default 1440.",
policy.idle_minutes,
)
policy.idle_minutes = 1440
# Warn about empty bot tokens — platforms that loaded an empty string
# won't connect and the cause can be confusing without a log line.
_token_env_names = {
Platform.TELEGRAM: "TELEGRAM_BOT_TOKEN",
Platform.DISCORD: "DISCORD_BOT_TOKEN",
Platform.SLACK: "SLACK_BOT_TOKEN",
}
for platform, pconfig in config.platforms.items():
if not pconfig.enabled:
continue
env_name = _token_env_names.get(platform)
if env_name and pconfig.token is not None and not pconfig.token.strip():
logger.warning(
"%s is enabled but %s is empty. "
"The adapter will likely fail to connect.",
platform.value, env_name,
)
return config

View file

@ -8,12 +8,18 @@ Routes messages to the appropriate destination based on:
- Local (always saved to files)
"""
import logging
from pathlib import Path
from datetime import datetime
from dataclasses import dataclass
from typing import Dict, List, Optional, Any, Union
from enum import Enum
logger = logging.getLogger(__name__)
MAX_PLATFORM_OUTPUT = 4000
TRUNCATED_VISIBLE = 3800
from .config import Platform, GatewayConfig
from .session import SessionSource
@ -245,6 +251,15 @@ class DeliveryRouter:
"timestamp": timestamp
}
def _save_full_output(self, content: str, job_id: str) -> Path:
"""Save full cron output to disk and return the file path."""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
out_dir = Path.home() / ".hermes" / "cron" / "output"
out_dir.mkdir(parents=True, exist_ok=True)
path = out_dir / f"{job_id}_{timestamp}.txt"
path.write_text(content)
return path
async def _deliver_to_platform(
self,
target: DeliveryTarget,
@ -260,8 +275,16 @@ class DeliveryRouter:
if not target.chat_id:
raise ValueError(f"No chat ID for {target.platform.value} delivery")
# Call the adapter's send method
# Adapters should implement: async def send(chat_id: str, content: str) -> Dict
# Guard: truncate oversized cron output to stay within platform limits
if len(content) > MAX_PLATFORM_OUTPUT:
job_id = (metadata or {}).get("job_id", "unknown")
saved_path = self._save_full_output(content, job_id)
logger.info("Cron output truncated (%d chars) — full output: %s", len(content), saved_path)
content = (
content[:TRUNCATED_VISIBLE]
+ f"\n\n... [truncated, full output saved to {saved_path}]"
)
return await adapter.send(target.chat_id, content, metadata=metadata)

View file

@ -659,34 +659,90 @@ class BasePlatformAdapter(ABC):
def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
"""
Split a long message into chunks.
Split a long message into chunks, preserving code block boundaries.
When a split falls inside a triple-backtick code block, the fence is
closed at the end of the current chunk and reopened (with the original
language tag) at the start of the next chunk. Multi-chunk responses
receive indicators like ``(1/3)``.
Args:
content: The full message content
max_length: Maximum length per chunk (platform-specific)
Returns:
List of message chunks
"""
if len(content) <= max_length:
return [content]
chunks = []
while content:
if len(content) <= max_length:
chunks.append(content)
INDICATOR_RESERVE = 10 # room for " (XX/XX)"
FENCE_CLOSE = "\n```"
chunks: List[str] = []
remaining = content
# When the previous chunk ended mid-code-block, this holds the
# language tag (possibly "") so we can reopen the fence.
carry_lang: Optional[str] = None
while remaining:
# If we're continuing a code block from the previous chunk,
# prepend a new opening fence with the same language tag.
prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
# How much body text we can fit after accounting for the prefix,
# a potential closing fence, and the chunk indicator.
headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
if headroom < 1:
headroom = max_length // 2
# Everything remaining fits in one final chunk
if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
chunks.append(prefix + remaining)
break
# Try to split at a newline
split_idx = content.rfind("\n", 0, max_length)
if split_idx == -1:
# No newline, split at space
split_idx = content.rfind(" ", 0, max_length)
if split_idx == -1:
# No space either, hard split
split_idx = max_length
chunks.append(content[:split_idx])
content = content[split_idx:].lstrip()
# Find a natural split point (prefer newlines, then spaces)
region = remaining[:headroom]
split_at = region.rfind("\n")
if split_at < headroom // 2:
split_at = region.rfind(" ")
if split_at < 1:
split_at = headroom
chunk_body = remaining[:split_at]
remaining = remaining[split_at:].lstrip()
full_chunk = prefix + chunk_body
# Walk the chunk line-by-line to determine whether we end
# inside an open code block.
in_code = carry_lang is not None
lang = carry_lang or ""
for line in full_chunk.split("\n"):
stripped = line.strip()
if stripped.startswith("```"):
if in_code:
in_code = False
lang = ""
else:
in_code = True
tag = stripped[3:].strip()
lang = tag.split()[0] if tag else ""
if in_code:
# Close the orphaned fence so the chunk is valid on its own
full_chunk += FENCE_CLOSE
carry_lang = lang
else:
carry_lang = None
chunks.append(full_chunk)
# Append chunk indicators when the response spans multiple messages
if len(chunks) > 1:
total = len(chunks)
chunks = [
f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
]
return chunks

View file

@ -8,6 +8,7 @@ Uses python-telegram-bot library for:
"""
import asyncio
import re
from typing import Dict, List, Optional, Any
try:
@ -49,6 +50,16 @@ def check_telegram_requirements() -> bool:
return TELEGRAM_AVAILABLE
# Matches every character that MarkdownV2 requires to be backslash-escaped
# when it appears outside a code span or fenced code block.
_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
def _escape_mdv2(text: str) -> str:
"""Escape Telegram MarkdownV2 special characters with a preceding backslash."""
return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
class TelegramAdapter(BasePlatformAdapter):
"""
Telegram bot adapter.
@ -167,7 +178,7 @@ class TelegramAdapter(BasePlatformAdapter):
msg = await self._bot.send_message(
chat_id=int(chat_id),
text=chunk,
parse_mode=ParseMode.MARKDOWN,
parse_mode=ParseMode.MARKDOWN_V2,
reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
message_thread_id=int(thread_id) if thread_id else None,
)
@ -297,14 +308,81 @@ class TelegramAdapter(BasePlatformAdapter):
def format_message(self, content: str) -> str:
"""
Format message for Telegram.
Telegram uses a subset of markdown. We'll use the simpler
Markdown mode (not MarkdownV2) for compatibility.
Convert standard markdown to Telegram MarkdownV2 format.
Protected regions (code blocks, inline code) are extracted first so
their contents are never modified. Standard markdown constructs
(headers, bold, italic, links) are translated to MarkdownV2 syntax,
and all remaining special characters are escaped.
"""
# Basic escaping for Telegram Markdown
# In Markdown mode (not V2), only certain characters need escaping
return content
if not content:
return content
placeholders: dict = {}
counter = [0]
def _ph(value: str) -> str:
"""Stash *value* behind a placeholder token that survives escaping."""
key = f"\x00PH{counter[0]}\x00"
counter[0] += 1
placeholders[key] = value
return key
text = content
# 1) Protect fenced code blocks (``` ... ```)
text = re.sub(
r'(```(?:[^\n]*\n)?[\s\S]*?```)',
lambda m: _ph(m.group(0)),
text,
)
# 2) Protect inline code (`...`)
text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
# 3) Convert markdown links escape the display text; inside the URL
# only ')' and '\' need escaping per the MarkdownV2 spec.
def _convert_link(m):
display = _escape_mdv2(m.group(1))
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
return _ph(f'[{display}]({url})')
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
# 4) Convert markdown headers (## Title) → bold *Title*
def _convert_header(m):
inner = m.group(1).strip()
# Strip redundant bold markers that may appear inside a header
inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
return _ph(f'*{_escape_mdv2(inner)}*')
text = re.sub(
r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
)
# 5) Convert bold: **text** → *text* (MarkdownV2 bold)
text = re.sub(
r'\*\*(.+?)\*\*',
lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
text,
)
# 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
text = re.sub(
r'\*([^*]+)\*',
lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
text,
)
# 7) Escape remaining special characters in plain text
text = _escape_mdv2(text)
# 8) Restore placeholders in reverse insertion order so that
# nested references (a placeholder inside another) resolve correctly.
for key in reversed(list(placeholders.keys())):
text = text.replace(key, placeholders[key])
return text
async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming text messages."""

View file

@ -20,6 +20,7 @@ import re
import sys
import signal
import threading
from logging.handlers import RotatingFileHandler
from pathlib import Path
from datetime import datetime
from typing import Dict, Optional, Any, List
@ -402,9 +403,27 @@ class GatewayRunner:
# Build the context prompt to inject
context_prompt = build_session_context_prompt(context)
# If the previous session expired and was auto-reset, prepend a notice
# so the agent knows this is a fresh conversation (not an intentional /reset).
if getattr(session_entry, 'was_auto_reset', False):
context_prompt = (
"[System note: The user's previous session expired due to inactivity. "
"This is a fresh conversation with no prior context.]\n\n"
+ context_prompt
)
session_entry.was_auto_reset = False
# Load conversation history from transcript
history = self.session_store.load_transcript(session_entry.session_id)
# First-message onboarding for brand-new messaging platform users
if not history:
context_prompt += (
"\n\n[System note: This is the user's very first message in this session. "
"Briefly introduce yourself and mention that /help shows available commands. "
"Keep the introduction concise -- one or two sentences max.]"
)
# -----------------------------------------------------------------
# Auto-analyze images sent by the user
#
@ -1342,15 +1361,32 @@ def _start_cron_ticker(stop_event: threading.Event, interval: int = 60):
Runs inside the gateway process so cronjobs fire automatically without
needing a separate `hermes cron daemon` or system cron entry.
Every 60th tick (~once per hour) the image/audio cache is pruned so
stale temp files don't accumulate.
"""
from cron.scheduler import tick as cron_tick
from gateway.platforms.base import cleanup_image_cache
IMAGE_CACHE_EVERY = 60 # ticks — once per hour at default 60s interval
logger.info("Cron ticker started (interval=%ds)", interval)
tick_count = 0
while not stop_event.is_set():
try:
cron_tick(verbose=False)
except Exception as e:
logger.debug("Cron tick error: %s", e)
tick_count += 1
if tick_count % IMAGE_CACHE_EVERY == 0:
try:
removed = cleanup_image_cache(max_age_hours=24)
if removed:
logger.info("Image cache cleanup: removed %d stale file(s)", removed)
except Exception as e:
logger.debug("Image cache cleanup error: %s", e)
stop_event.wait(timeout=interval)
logger.info("Cron ticker stopped")
@ -1363,6 +1399,18 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
Returns True if the gateway ran successfully, False if it failed to start.
A False return causes a non-zero exit code so systemd can auto-restart.
"""
# Configure rotating file log so gateway output is persisted for debugging
log_dir = Path.home() / '.hermes' / 'logs'
log_dir.mkdir(parents=True, exist_ok=True)
file_handler = RotatingFileHandler(
log_dir / 'gateway.log',
maxBytes=5 * 1024 * 1024,
backupCount=3,
)
file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s'))
logging.getLogger().addHandler(file_handler)
logging.getLogger().setLevel(logging.INFO)
runner = GatewayRunner(config)
# Set up signal handlers

View file

@ -219,6 +219,10 @@ class SessionEntry:
output_tokens: int = 0
total_tokens: int = 0
# Set when a session was created because the previous one expired;
# consumed once by the message handler to inject a notice into context
was_auto_reset: bool = False
def to_dict(self) -> Dict[str, Any]:
result = {
"session_key": self.session_key,
@ -388,11 +392,14 @@ class SessionStore:
return entry
else:
# Session is being reset -- end the old one in SQLite
was_auto_reset = True
if self._db:
try:
self._db.end_session(entry.session_id, "session_reset")
except Exception as e:
logger.debug("Session DB operation failed: %s", e)
else:
was_auto_reset = False
# Create new session
session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
@ -406,6 +413,7 @@ class SessionStore:
display_name=source.chat_name,
platform=source.platform,
chat_type=source.chat_type,
was_auto_reset=was_auto_reset,
)
self._entries[session_key] = entry

View file

@ -11,4 +11,4 @@ Provides subcommands for:
- hermes cron - Manage cron jobs
"""
__version__ = "0.1.0"
__version__ = "v1.0.0"

View file

@ -33,7 +33,7 @@ def cprint(text: str):
# ASCII Art & Branding
# =========================================================================
VERSION = "v1.0.0"
from hermes_cli import __version__ as VERSION
HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/]
[bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/]

View file

@ -44,6 +44,8 @@ def run_doctor(args):
should_fix = getattr(args, 'fix', False)
issues = []
manual_issues = [] # issues that can't be auto-fixed
fixed_count = 0
print()
print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN))
@ -135,8 +137,15 @@ def run_doctor(args):
check_ok(".env file exists (in project directory)")
else:
check_fail("~/.hermes/.env file missing")
check_info("Run 'hermes setup' to create one")
issues.append("Run 'hermes setup' to create .env")
if should_fix:
env_path.parent.mkdir(parents=True, exist_ok=True)
env_path.touch()
check_ok("Created empty ~/.hermes/.env")
check_info("Run 'hermes setup' to configure API keys")
fixed_count += 1
else:
check_info("Run 'hermes setup' to create one")
issues.append("Run 'hermes setup' to create .env")
# Check ~/.hermes/config.yaml (primary) or project cli-config.yaml (fallback)
config_path = HERMES_HOME / 'config.yaml'
@ -147,7 +156,17 @@ def run_doctor(args):
if fallback_config.exists():
check_ok("cli-config.yaml exists (in project directory)")
else:
check_warn("config.yaml not found", "(using defaults)")
example_config = PROJECT_ROOT / 'cli-config.yaml.example'
if should_fix and example_config.exists():
config_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(str(example_config), str(config_path))
check_ok("Created ~/.hermes/config.yaml from cli-config.yaml.example")
fixed_count += 1
elif should_fix:
check_warn("config.yaml not found and no example to copy from")
manual_issues.append("Create ~/.hermes/config.yaml manually")
else:
check_warn("config.yaml not found", "(using defaults)")
# =========================================================================
# Check: Directory structure
@ -159,7 +178,26 @@ def run_doctor(args):
if hermes_home.exists():
check_ok("~/.hermes directory exists")
else:
check_warn("~/.hermes not found", "(will be created on first use)")
if should_fix:
hermes_home.mkdir(parents=True, exist_ok=True)
check_ok("Created ~/.hermes directory")
fixed_count += 1
else:
check_warn("~/.hermes not found", "(will be created on first use)")
# Check expected subdirectories
expected_subdirs = ["cron", "sessions", "logs", "skills", "memories"]
for subdir_name in expected_subdirs:
subdir_path = hermes_home / subdir_name
if subdir_path.exists():
check_ok(f"~/.hermes/{subdir_name}/ exists")
else:
if should_fix:
subdir_path.mkdir(parents=True, exist_ok=True)
check_ok(f"Created ~/.hermes/{subdir_name}/")
fixed_count += 1
else:
check_warn(f"~/.hermes/{subdir_name}/ not found", "(will be created on first use)")
# Check for SOUL.md persona file
soul_path = hermes_home / "SOUL.md"
@ -175,14 +213,25 @@ def run_doctor(args):
check_warn("~/.hermes/SOUL.md not found", "(create it to give Hermes a custom personality)")
if should_fix:
soul_path.parent.mkdir(parents=True, exist_ok=True)
soul_path.write_text("# Hermes Agent Persona\n\n<!-- Edit this file to customize how Hermes communicates. -->\n", encoding="utf-8")
check_ok("Created ~/.hermes/SOUL.md")
soul_path.write_text(
"# Hermes Agent Persona\n\n"
"<!-- Edit this file to customize how Hermes communicates. -->\n\n"
"You are Hermes, a helpful AI assistant.\n",
encoding="utf-8",
)
check_ok("Created ~/.hermes/SOUL.md with basic template")
fixed_count += 1
logs_dir = PROJECT_ROOT / "logs"
if logs_dir.exists():
check_ok("logs/ directory exists")
check_ok("logs/ directory exists (project root)")
else:
check_warn("logs/ not found", "(will be created on first use)")
if should_fix:
logs_dir.mkdir(parents=True, exist_ok=True)
check_ok("Created logs/ directory")
fixed_count += 1
else:
check_warn("logs/ not found", "(will be created on first use)")
# Check memory directory
memories_dir = hermes_home / "memories"
@ -205,6 +254,7 @@ def run_doctor(args):
if should_fix:
memories_dir.mkdir(parents=True, exist_ok=True)
check_ok("Created ~/.hermes/memories/")
fixed_count += 1
# Check SQLite session store
state_db_path = hermes_home / "state.db"
@ -299,6 +349,7 @@ def run_doctor(args):
openrouter_key = os.getenv("OPENROUTER_API_KEY")
if openrouter_key:
print(" Checking OpenRouter API...", end="", flush=True)
try:
import httpx
response = httpx.get(
@ -307,20 +358,21 @@ def run_doctor(args):
timeout=10
)
if response.status_code == 200:
check_ok("OpenRouter API")
print(f"\r {color('', Colors.GREEN)} OpenRouter API ")
elif response.status_code == 401:
check_fail("OpenRouter API", "(invalid API key)")
print(f"\r {color('', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)} ")
issues.append("Check OPENROUTER_API_KEY in .env")
else:
check_fail("OpenRouter API", f"(HTTP {response.status_code})")
print(f"\r {color('', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)} ")
except Exception as e:
check_fail("OpenRouter API", f"({e})")
print(f"\r {color('', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)} ")
issues.append("Check network connectivity")
else:
check_warn("OpenRouter API", "(not configured)")
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
if anthropic_key:
print(" Checking Anthropic API...", end="", flush=True)
try:
import httpx
response = httpx.get(
@ -332,14 +384,14 @@ def run_doctor(args):
timeout=10
)
if response.status_code == 200:
check_ok("Anthropic API")
print(f"\r {color('', Colors.GREEN)} Anthropic API ")
elif response.status_code == 401:
check_fail("Anthropic API", "(invalid API key)")
print(f"\r {color('', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)} ")
else:
# Note: Anthropic may not have /models endpoint
check_warn("Anthropic API", "(couldn't verify)")
msg = "(couldn't verify)"
print(f"\r {color('', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)} ")
except Exception as e:
check_warn("Anthropic API", f"({e})")
print(f"\r {color('', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)} ")
# =========================================================================
# Check: Submodules
@ -440,17 +492,28 @@ def run_doctor(args):
# Summary
# =========================================================================
print()
if issues:
print(color("" * 60, Colors.YELLOW))
print(color(f" Found {len(issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
remaining_issues = issues + manual_issues
if should_fix and fixed_count > 0:
print(color("" * 60, Colors.GREEN))
print(color(f" Fixed {fixed_count} issue(s).", Colors.GREEN, Colors.BOLD), end="")
if remaining_issues:
print(color(f" {len(remaining_issues)} issue(s) require manual intervention.", Colors.YELLOW, Colors.BOLD))
else:
print()
print()
for i, issue in enumerate(issues, 1):
if remaining_issues:
for i, issue in enumerate(remaining_issues, 1):
print(f" {i}. {issue}")
print()
elif remaining_issues:
print(color("" * 60, Colors.YELLOW))
print(color(f" Found {len(remaining_issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD))
print()
for i, issue in enumerate(remaining_issues, 1):
print(f" {i}. {issue}")
print()
if should_fix:
print(color(" Attempting auto-fix is not yet implemented.", Colors.DIM))
print(color(" Please resolve issues manually.", Colors.DIM))
if not should_fix:
print(color(" Tip: run 'hermes doctor --fix' to auto-fix what's possible.", Colors.DIM))
else:
print(color("" * 60, Colors.GREEN))
print(color(" All checks passed! 🎉", Colors.GREEN, Colors.BOLD))

View file

@ -47,8 +47,66 @@ from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
def _has_any_provider_configured() -> bool:
"""Check if at least one inference provider is usable."""
from hermes_cli.config import get_env_path, get_hermes_home
# Check env vars (may be set by .env or shell)
if os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"):
return True
# Check .env file for keys
env_file = get_env_path()
if env_file.exists():
try:
for line in env_file.read_text().splitlines():
line = line.strip()
if line.startswith("#") or "=" not in line:
continue
key, _, val = line.partition("=")
val = val.strip().strip("'\"")
if key.strip() in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY") and val:
return True
except Exception:
pass
# Check for Nous Portal OAuth credentials
auth_file = get_hermes_home() / "auth.json"
if auth_file.exists():
try:
import json
auth = json.loads(auth_file.read_text())
active = auth.get("active_provider")
if active:
state = auth.get("providers", {}).get(active, {})
if state.get("access_token") or state.get("refresh_token"):
return True
except Exception:
pass
return False
def cmd_chat(args):
"""Run interactive chat CLI."""
# First-run guard: check if any provider is configured before launching
if not _has_any_provider_configured():
print()
print("It looks like Hermes isn't configured yet -- no API keys or providers found.")
print()
print(" Run: hermes setup")
print()
try:
reply = input("Run setup now? [Y/n] ").strip().lower()
except (EOFError, KeyboardInterrupt):
reply = "n"
if reply in ("", "y", "yes"):
cmd_setup(args)
return
print()
print("You can run 'hermes setup' at any time to configure.")
sys.exit(1)
# Import and run the CLI
from cli import main as cli_main
@ -219,20 +277,10 @@ def _model_flow_openrouter(config, current_model=""):
print("API key saved.")
print()
OPENROUTER_MODELS = [
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.5",
"anthropic/claude-opus-4.5",
"openai/gpt-5.2",
"openai/gpt-5.2-codex",
"google/gemini-3-pro-preview",
"google/gemini-3-flash-preview",
"z-ai/glm-4.7",
"moonshotai/kimi-k2.5",
"minimax/minimax-m2.1",
]
from hermes_cli.models import model_ids
openrouter_models = model_ids()
selected = _prompt_model_selection(OPENROUTER_MODELS, current_model=current_model)
selected = _prompt_model_selection(openrouter_models, current_model=current_model)
if selected:
# Clear any custom endpoint and set provider to openrouter
if get_env_value("OPENAI_BASE_URL"):

33
hermes_cli/models.py Normal file
View file

@ -0,0 +1,33 @@
"""
Canonical list of OpenRouter models offered in CLI and setup wizards.
Add, remove, or reorder entries here both `hermes setup` and
`hermes` provider-selection will pick up the change automatically.
"""
# (model_id, display description shown in menus)
OPENROUTER_MODELS: list[tuple[str, str]] = [
("anthropic/claude-opus-4.6", "recommended"),
("anthropic/claude-sonnet-4.5", ""),
("anthropic/claude-opus-4.5", ""),
("openai/gpt-5.2", ""),
("openai/gpt-5.2-codex", ""),
("google/gemini-3-pro-preview", ""),
("google/gemini-3-flash-preview", ""),
("z-ai/glm-4.7", ""),
("moonshotai/kimi-k2.5", ""),
("minimax/minimax-m2.1", ""),
]
def model_ids() -> list[str]:
"""Return just the model-id strings (convenience helper)."""
return [mid for mid, _ in OPENROUTER_MODELS]
def menu_labels() -> list[str]:
"""Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
labels = []
for mid, desc in OPENROUTER_MODELS:
labels.append(f"{mid} ({desc})" if desc else mid)
return labels

View file

@ -611,46 +611,27 @@ def run_setup_wizard(args):
save_env_value("LLM_MODEL", custom)
# else: keep current
else:
# Static list for OpenRouter / fallback
model_choices = [
"anthropic/claude-opus-4.6 (recommended)",
"anthropic/claude-sonnet-4.5",
"anthropic/claude-opus-4.5",
"openai/gpt-5.2",
"openai/gpt-5.2-codex",
"google/gemini-3-pro-preview",
"google/gemini-3-flash-preview",
"z-ai/glm-4.7",
"moonshotai/kimi-k2.5",
"minimax/minimax-m2.1",
# Static list for OpenRouter / fallback (from canonical list)
from hermes_cli.models import model_ids, menu_labels
ids = model_ids()
model_choices = menu_labels() + [
"Custom model",
f"Keep current ({current_model})"
f"Keep current ({current_model})",
]
model_idx = prompt_choice("Select default model:", model_choices, 11)
keep_idx = len(model_choices) - 1
model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
model_map = {
0: "anthropic/claude-opus-4.6",
1: "anthropic/claude-sonnet-4.5",
2: "anthropic/claude-opus-4.5",
3: "openai/gpt-5.2",
4: "openai/gpt-5.2-codex",
5: "google/gemini-3-pro-preview",
6: "google/gemini-3-flash-preview",
7: "z-ai/glm-4.7",
8: "moonshotai/kimi-k2.5",
9: "minimax/minimax-m2.1",
}
if model_idx in model_map:
config['model'] = model_map[model_idx]
save_env_value("LLM_MODEL", model_map[model_idx])
elif model_idx == 10: # Custom
if model_idx < len(ids):
config['model'] = ids[model_idx]
save_env_value("LLM_MODEL", ids[model_idx])
elif model_idx == len(ids): # Custom
custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
if custom:
config['model'] = custom
save_env_value("LLM_MODEL", custom)
# else: Keep current (model_idx == 11)
# else: Keep current
# =========================================================================
# Step 4: Terminal Backend

File diff suppressed because it is too large Load diff

View file

@ -281,7 +281,12 @@ def check_dangerous_command(command: str, env_type: str,
approval_callback=approval_callback)
if choice == "deny":
return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."}
return {
"approved": False,
"message": f"BLOCKED: User denied this potentially dangerous command (matched '{description}' pattern). Do NOT retry this command - the user has explicitly rejected it.",
"pattern_key": pattern_key,
"description": description,
}
if choice == "session":
approve_session(session_key, pattern_key)

View file

@ -51,25 +51,16 @@ import signal
import subprocess
import shutil
import sys
import asyncio
import tempfile
import threading
import time
import requests
from typing import Dict, Any, Optional, List
from pathlib import Path
from hermes_constants import OPENROUTER_CHAT_URL
from agent.auxiliary_client import get_vision_auxiliary_client
logger = logging.getLogger(__name__)
# Try to import httpx for async LLM calls
try:
import httpx
HTTPX_AVAILABLE = True
except ImportError:
HTTPX_AVAILABLE = False
# ============================================================================
# Configuration
# ============================================================================
@ -83,8 +74,8 @@ DEFAULT_SESSION_TIMEOUT = 300
# Max tokens for snapshot content before summarization
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
# Model for task-aware extraction
EXTRACTION_MODEL = "google/gemini-3-flash-preview"
# Resolve vision auxiliary client for extraction/vision tasks
_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client()
# Track active sessions per task
# Now stores tuple of (session_name, browserbase_session_id, cdp_url)
@ -782,87 +773,49 @@ def _run_browser_command(
return {"success": False, "error": str(e)}
async def _extract_relevant_content(
def _extract_relevant_content(
snapshot_text: str,
user_task: Optional[str] = None
) -> str:
"""Use LLM to extract relevant content from a snapshot based on the user's task.
Falls back to simple truncation when no auxiliary vision model is configured.
"""
Use LLM to extract relevant content from a snapshot based on the user's task.
This provides task-aware summarization that preserves meaningful text content
(paragraphs, prices, descriptions) relevant to what the user is trying to accomplish.
Args:
snapshot_text: The full snapshot text
user_task: The user's current task/goal (optional)
Returns:
Summarized/extracted content
"""
if not HTTPX_AVAILABLE:
# Fall back to simple truncation
if _aux_vision_client is None or EXTRACTION_MODEL is None:
return _truncate_snapshot(snapshot_text)
# Get API key
api_key = os.environ.get("OPENROUTER_API_KEY")
if not api_key:
return _truncate_snapshot(snapshot_text)
# Build extraction prompt
if user_task:
extraction_prompt = f"""You are a content extractor for a browser automation agent.
The user's task is: {user_task}
Given the following page snapshot (accessibility tree representation), extract and summarize the most relevant information for completing this task. Focus on:
1. Interactive elements (buttons, links, inputs) that might be needed
2. Text content relevant to the task (prices, descriptions, headings, important info)
3. Navigation structure if relevant
Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.
Page Snapshot:
{snapshot_text}
Provide a concise summary that preserves actionable information and relevant content."""
extraction_prompt = (
f"You are a content extractor for a browser automation agent.\n\n"
f"The user's task is: {user_task}\n\n"
f"Given the following page snapshot (accessibility tree representation), "
f"extract and summarize the most relevant information for completing this task. Focus on:\n"
f"1. Interactive elements (buttons, links, inputs) that might be needed\n"
f"2. Text content relevant to the task (prices, descriptions, headings, important info)\n"
f"3. Navigation structure if relevant\n\n"
f"Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.\n\n"
f"Page Snapshot:\n{snapshot_text}\n\n"
f"Provide a concise summary that preserves actionable information and relevant content."
)
else:
extraction_prompt = f"""Summarize this page snapshot, preserving:
1. All interactive elements with their ref IDs (like [ref=e5])
2. Key text content and headings
3. Important information visible on the page
Page Snapshot:
{snapshot_text}
Provide a concise summary focused on interactive elements and key content."""
extraction_prompt = (
f"Summarize this page snapshot, preserving:\n"
f"1. All interactive elements with their ref IDs (like [ref=e5])\n"
f"2. Key text content and headings\n"
f"3. Important information visible on the page\n\n"
f"Page Snapshot:\n{snapshot_text}\n\n"
f"Provide a concise summary focused on interactive elements and key content."
)
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
OPENROUTER_CHAT_URL,
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": EXTRACTION_MODEL,
"messages": [
{"role": "user", "content": extraction_prompt}
],
"max_tokens": 4000,
"temperature": 0.1
}
)
if response.status_code == 200:
result = response.json()
return result["choices"][0]["message"]["content"]
else:
# Fall back to truncation on API error
return _truncate_snapshot(snapshot_text)
response = _aux_vision_client.chat.completions.create(
model=EXTRACTION_MODEL,
messages=[{"role": "user", "content": extraction_prompt}],
max_tokens=4000,
temperature=0.1,
)
return response.choices[0].message.content
except Exception:
# Fall back to truncation on any error
return _truncate_snapshot(snapshot_text)
@ -991,16 +944,7 @@ def browser_snapshot(
# Check if snapshot needs summarization
if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD and user_task:
# Run async extraction
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
snapshot_text = loop.run_until_complete(
_extract_relevant_content(snapshot_text, user_task)
)
snapshot_text = _extract_relevant_content(snapshot_text, user_task)
elif len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
snapshot_text = _truncate_snapshot(snapshot_text)
@ -1286,12 +1230,12 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
effective_task_id = task_id or "default"
# Check for OpenRouter API key
api_key = os.environ.get("OPENROUTER_API_KEY")
if not api_key:
# Check auxiliary vision client
if _aux_vision_client is None or EXTRACTION_MODEL is None:
return json.dumps({
"success": False,
"error": "OPENROUTER_API_KEY not set. Vision analysis requires this API key."
"error": "Browser vision unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
}, ensure_ascii=False)
# Create a temporary file for the screenshot
@ -1325,110 +1269,36 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
image_base64 = base64.b64encode(image_data).decode("ascii")
data_url = f"data:image/png;base64,{image_base64}"
# Prepare the vision prompt
vision_prompt = f"""You are analyzing a screenshot of a web browser.
vision_prompt = (
f"You are analyzing a screenshot of a web browser.\n\n"
f"User's question: {question}\n\n"
f"Provide a detailed and helpful answer based on what you see in the screenshot. "
f"If there are interactive elements, describe them. If there are verification challenges "
f"or CAPTCHAs, describe what type they are and what action might be needed. "
f"Focus on answering the user's specific question."
)
User's question: {question}
Provide a detailed and helpful answer based on what you see in the screenshot.
If there are interactive elements, describe them. If there are verification challenges
or CAPTCHAs, describe what type they are and what action might be needed.
Focus on answering the user's specific question."""
# Call OpenRouter/Gemini for vision analysis
if HTTPX_AVAILABLE:
import asyncio
async def analyze_screenshot():
async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(
OPENROUTER_CHAT_URL,
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": "google/gemini-3-flash-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{
"type": "image_url",
"image_url": {"url": data_url}
}
]
}
],
"max_tokens": 2000,
"temperature": 0.1
}
)
if response.status_code != 200:
return {
"success": False,
"error": f"Vision API error: {response.status_code} - {response.text[:200]}"
}
result_data = response.json()
analysis = result_data["choices"][0]["message"]["content"]
return {
"success": True,
"analysis": analysis
}
# Run the async function
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
vision_result = loop.run_until_complete(analyze_screenshot())
return json.dumps(vision_result, ensure_ascii=False)
else:
# Fallback: use synchronous requests
response = requests.post(
OPENROUTER_CHAT_URL,
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
},
json={
"model": "google/gemini-3-flash-preview",
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{
"type": "image_url",
"image_url": {"url": data_url}
}
]
}
# Use the sync auxiliary vision client directly
response = _aux_vision_client.chat.completions.create(
model=EXTRACTION_MODEL,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{"type": "image_url", "image_url": {"url": data_url}},
],
"max_tokens": 2000,
"temperature": 0.1
},
timeout=60
)
if response.status_code != 200:
return json.dumps({
"success": False,
"error": f"Vision API error: {response.status_code} - {response.text[:200]}"
}, ensure_ascii=False)
result_data = response.json()
analysis = result_data["choices"][0]["message"]["content"]
return json.dumps({
"success": True,
"analysis": analysis
}, ensure_ascii=False)
}
],
max_tokens=2000,
temperature=0.1,
)
analysis = response.choices[0].message.content
return json.dumps({
"success": True,
"analysis": analysis,
}, ensure_ascii=False)
except Exception as e:
return json.dumps({

View file

@ -22,9 +22,19 @@ import os
import logging
from typing import Dict, Any, List, Optional
from tools.openrouter_client import get_async_client as _get_client
from openai import AsyncOpenAI, OpenAI
SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
from agent.auxiliary_client import get_text_auxiliary_client
# Resolve the auxiliary client at import time so we have the model slug.
# We build an AsyncOpenAI from the same credentials for async summarization.
_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
_async_aux_client: AsyncOpenAI | None = None
if _aux_client is not None:
_async_aux_client = AsyncOpenAI(
api_key=_aux_client.api_key,
base_url=str(_aux_client.base_url),
)
MAX_SESSION_CHARS = 100_000
MAX_SUMMARY_TOKENS = 2000
@ -126,11 +136,15 @@ async def _summarize_session(
f"Summarize this conversation with focus on: {query}"
)
if _async_aux_client is None or _SUMMARIZER_MODEL is None:
logging.warning("No auxiliary model available for session summarization")
return None
max_retries = 3
for attempt in range(max_retries):
try:
response = await _get_client().chat.completions.create(
model=SUMMARIZER_MODEL,
response = await _async_aux_client.chat.completions.create(
model=_SUMMARIZER_MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
@ -252,8 +266,8 @@ def session_search(
def check_session_search_requirements() -> bool:
"""Requires SQLite state database and OpenRouter API key."""
if not os.getenv("OPENROUTER_API_KEY"):
"""Requires SQLite state database and an auxiliary text model."""
if _async_aux_client is None:
return False
try:
from hermes_state import DEFAULT_DB_PATH
@ -316,5 +330,4 @@ registry.register(
limit=args.get("limit", 3),
db=kw.get("db")),
check_fn=check_session_search_requirements,
requires_env=["OPENROUTER_API_KEY"],
)

View file

@ -359,7 +359,6 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p
# Global state for environment lifecycle management
_active_environments: Dict[str, Any] = {}
_task_workdirs: Dict[str, str] = {} # Maps task_id to working directory
_last_activity: Dict[str, float] = {}
_env_lock = threading.Lock()
_creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation
@ -530,7 +529,6 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
if current_time - last_time > lifetime_seconds:
env = _active_environments.pop(task_id, None)
_last_activity.pop(task_id, None)
_task_workdirs.pop(task_id, None)
if env is not None:
envs_to_stop.append((task_id, env))
@ -609,7 +607,7 @@ def get_active_environments_info() -> Dict[str, Any]:
info = {
"count": len(_active_environments),
"task_ids": list(_active_environments.keys()),
"workdirs": dict(_task_workdirs),
"workdirs": {},
}
# Calculate total disk usage
@ -632,7 +630,7 @@ def get_active_environments_info() -> Dict[str, Any]:
def cleanup_all_environments():
"""Clean up ALL active environments. Use with caution."""
global _active_environments, _last_activity, _task_workdirs
global _active_environments, _last_activity
task_ids = list(_active_environments.keys())
cleaned = 0
@ -661,7 +659,7 @@ def cleanup_all_environments():
def cleanup_vm(task_id: str):
"""Manually clean up a specific environment by task_id."""
global _active_environments, _last_activity, _task_workdirs
global _active_environments, _last_activity
# Remove from tracking dicts while holding the lock, but defer the
# actual (potentially slow) env.cleanup() call to outside the lock
@ -669,7 +667,6 @@ def cleanup_vm(task_id: str):
env = None
with _env_lock:
env = _active_environments.pop(task_id, None)
_task_workdirs.pop(task_id, None)
_last_activity.pop(task_id, None)
# Clean up per-task creation lock
@ -782,17 +779,6 @@ def terminal_tool(
default_timeout = config["timeout"]
effective_timeout = timeout or default_timeout
# For local environment in batch mode, create a unique subdirectory per task
# This prevents parallel tasks from overwriting each other's files
# In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories
if env_type == "local" and not os.getenv("HERMES_QUIET"):
with _env_lock:
if effective_task_id not in _task_workdirs:
task_workdir = Path(cwd) / f"hermes-{effective_task_id}-{uuid.uuid4().hex[:8]}"
task_workdir.mkdir(parents=True, exist_ok=True)
_task_workdirs[effective_task_id] = str(task_workdir)
cwd = _task_workdirs[effective_task_id]
# Start cleanup thread
_start_cleanup_thread()
@ -874,11 +860,16 @@ def terminal_tool(
"description": approval.get("description", "dangerous command"),
"pattern_key": approval.get("pattern_key", ""),
}, ensure_ascii=False)
# Command was blocked - return informative message
# Command was blocked - include the pattern category so the caller knows why
desc = approval.get("description", "potentially dangerous operation")
fallback_msg = (
f"Command denied: matches '{desc}' pattern. "
"Use the approval prompt to allow it, or rephrase the command."
)
return json.dumps({
"output": "",
"exit_code": -1,
"error": approval.get("message", "Command denied - potentially dangerous operation"),
"error": approval.get("message", fallback_msg),
"status": "blocked"
}, ensure_ascii=False)
@ -996,11 +987,17 @@ def terminal_tool(
# Add helpful message for sudo failures in messaging context
output = _handle_sudo_failure(output, env_type)
# Truncate output if too long
# Truncate output if too long, keeping both head and tail
MAX_OUTPUT_CHARS = 50000
if len(output) > MAX_OUTPUT_CHARS:
truncated_notice = f"\n\n... [OUTPUT TRUNCATED - showing last {MAX_OUTPUT_CHARS} chars of {len(output)} total] ..."
output = truncated_notice + output[-MAX_OUTPUT_CHARS:]
head_chars = int(MAX_OUTPUT_CHARS * 0.4) # 40% head (error messages often appear early)
tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output)
omitted = len(output) - head_chars - tail_chars
truncated_notice = (
f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
f"out of {len(output)} total] ...\n\n"
)
output = output[:head_chars] + truncated_notice + output[-tail_chars:]
return json.dumps({
"output": output.strip() if output else "",

View file

@ -36,13 +36,20 @@ import base64
from pathlib import Path
from typing import Dict, Any, Optional
import httpx
from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key
from openai import AsyncOpenAI
from agent.auxiliary_client import get_vision_auxiliary_client
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
# Configuration for vision processing
DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview"
# Resolve vision auxiliary client at module level; build an async wrapper.
_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
_aux_async_client: AsyncOpenAI | None = None
if _aux_sync_client is not None:
_aux_async_client = AsyncOpenAI(
api_key=_aux_sync_client.api_key,
base_url=str(_aux_sync_client.base_url),
)
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
@ -230,9 +237,13 @@ async def vision_analyze_tool(
logger.info("Analyzing image: %s", image_url[:60])
logger.info("User prompt: %s", user_prompt[:100])
# Check API key availability
if not os.getenv("OPENROUTER_API_KEY"):
raise ValueError("OPENROUTER_API_KEY environment variable not set")
# Check auxiliary vision client availability
if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
return json.dumps({
"success": False,
"analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
}, indent=2, ensure_ascii=False)
# Determine if this is a local file path or a remote URL
local_path = Path(image_url)
@ -291,18 +302,12 @@ async def vision_analyze_tool(
logger.info("Processing image with %s...", model)
# Call the vision API with reasoning enabled
response = await _get_openrouter_client().chat.completions.create(
# Call the vision API
response = await _aux_async_client.chat.completions.create(
model=model,
messages=messages,
temperature=0.1, # Low temperature for consistent analysis
max_tokens=2000, # Generous limit for detailed analysis
extra_body={
"reasoning": {
"enabled": True,
"effort": "xhigh"
}
}
temperature=0.1,
max_tokens=2000,
)
# Extract the analysis
@ -353,13 +358,8 @@ async def vision_analyze_tool(
def check_vision_requirements() -> bool:
"""
Check if all requirements for vision tools are met.
Returns:
bool: True if requirements are met, False otherwise
"""
return check_openrouter_api_key()
"""Check if an auxiliary vision model is available."""
return _aux_async_client is not None
def get_debug_session_info() -> Dict[str, Any]:
@ -379,16 +379,15 @@ if __name__ == "__main__":
print("👁️ Vision Tools Module")
print("=" * 40)
# Check if API key is available
api_available = check_openrouter_api_key()
# Check if vision model is available
api_available = check_vision_requirements()
if not api_available:
print("❌ OPENROUTER_API_KEY environment variable not set")
print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
print("Get API key at: https://openrouter.ai/")
print("❌ No auxiliary vision model available")
print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
exit(1)
else:
print("✅ OpenRouter API key found")
print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
print("🛠️ Vision tools ready for use!")
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
@ -455,7 +454,8 @@ def _handle_vision_analyze(args, **kw):
image_url = args.get("image_url", "")
question = args.get("question", "")
full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
return vision_analyze_tool(image_url, full_prompt, "google/gemini-3-flash-preview")
model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
return vision_analyze_tool(image_url, full_prompt, model)
registry.register(
@ -464,6 +464,5 @@ registry.register(
schema=VISION_ANALYZE_SCHEMA,
handler=_handle_vision_analyze,
check_fn=check_vision_requirements,
requires_env=["OPENROUTER_API_KEY"],
is_async=True,
)

View file

@ -47,7 +47,8 @@ import re
import asyncio
from typing import List, Dict, Any, Optional
from firecrawl import Firecrawl
from tools.openrouter_client import get_async_client as _get_openrouter_client
from openai import AsyncOpenAI
from agent.auxiliary_client import get_text_auxiliary_client
from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__)
@ -64,9 +65,17 @@ def _get_firecrawl_client():
_firecrawl_client = Firecrawl(api_key=api_key)
return _firecrawl_client
DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview"
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
# Resolve auxiliary text client at module level; build an async wrapper.
_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
_aux_async_client: AsyncOpenAI | None = None
if _aux_sync_client is not None:
_aux_async_client = AsyncOpenAI(
api_key=_aux_sync_client.api_key,
base_url=str(_aux_sync_client.base_url),
)
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
@ -223,7 +232,10 @@ Create a markdown summary that captures all key information in a well-organized,
for attempt in range(max_retries):
try:
response = await _get_openrouter_client().chat.completions.create(
if _aux_async_client is None:
logger.warning("No auxiliary model available for web content processing")
return None
response = await _aux_async_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt},
@ -231,12 +243,6 @@ Create a markdown summary that captures all key information in a well-organized,
],
temperature=0.1,
max_tokens=max_tokens,
extra_body={
"reasoning": {
"enabled": True,
"effort": "xhigh"
}
}
)
return response.choices[0].message.content.strip()
except Exception as api_error:
@ -342,7 +348,14 @@ Synthesize these into ONE cohesive, comprehensive summary that:
Create a single, unified markdown summary."""
try:
response = await _get_openrouter_client().chat.completions.create(
if _aux_async_client is None:
logger.warning("No auxiliary model for synthesis, concatenating summaries")
fallback = "\n\n".join(summaries)
if len(fallback) > max_output_size:
fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
return fallback
response = await _aux_async_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
@ -350,12 +363,6 @@ Create a single, unified markdown summary."""
],
temperature=0.1,
max_tokens=4000,
extra_body={
"reasoning": {
"enabled": True,
"effort": "xhigh"
}
}
)
final_summary = response.choices[0].message.content.strip()
@ -677,8 +684,8 @@ async def web_extract_tool(
debug_call_data["pages_extracted"] = pages_extracted
debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled
if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
# Process each result with LLM if enabled and auxiliary client is available
if use_llm_processing and _aux_async_client is not None:
logger.info("Processing extracted content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing")
@ -744,8 +751,8 @@ async def web_extract_tool(
else:
logger.warning("%s (no content to process)", url)
else:
if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
if use_llm_processing and _aux_async_client is None:
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of extracted pages for debugging (original behavior)
@ -973,8 +980,8 @@ async def web_crawl_tool(
debug_call_data["pages_crawled"] = pages_crawled
debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled
if use_llm_processing and os.getenv("OPENROUTER_API_KEY"):
# Process each result with LLM if enabled and auxiliary client is available
if use_llm_processing and _aux_async_client is not None:
logger.info("Processing crawled content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing")
@ -1040,8 +1047,8 @@ async def web_crawl_tool(
else:
logger.warning("%s (no content to process)", page_url)
else:
if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"):
logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content")
if use_llm_processing and _aux_async_client is None:
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of crawled pages for debugging (original behavior)
@ -1096,14 +1103,9 @@ def check_firecrawl_api_key() -> bool:
return bool(os.getenv("FIRECRAWL_API_KEY"))
def check_nous_api_key() -> bool:
"""
Check if the Nous Research API key is available in environment variables.
Returns:
bool: True if API key is set, False otherwise
"""
return bool(os.getenv("OPENROUTER_API_KEY"))
def check_auxiliary_model() -> bool:
"""Check if an auxiliary text model is available for LLM content processing."""
return _aux_async_client is not None
def get_debug_session_info() -> Dict[str, Any]:
@ -1120,7 +1122,7 @@ if __name__ == "__main__":
# Check if API keys are available
firecrawl_available = check_firecrawl_api_key()
nous_available = check_nous_api_key()
nous_available = check_auxiliary_model()
if not firecrawl_available:
print("❌ FIRECRAWL_API_KEY environment variable not set")
@ -1130,12 +1132,11 @@ if __name__ == "__main__":
print("✅ Firecrawl API key found")
if not nous_available:
print("❌ OPENROUTER_API_KEY environment variable not set")
print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'")
print("Get API key at: https://inference-api.nousresearch.com/")
print("⚠️ Without Nous API key, LLM content processing will be disabled")
print("❌ No auxiliary model available for LLM content processing")
print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY")
print("⚠️ Without an auxiliary model, LLM content processing will be disabled")
else:
print("✅ Nous Research API key found")
print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
if not firecrawl_available:
exit(1)
@ -1143,7 +1144,7 @@ if __name__ == "__main__":
print("🛠️ Web tools ready for use!")
if nous_available:
print("🧠 LLM content processing available with Gemini 3 Flash Preview via OpenRouter")
print(f"🧠 LLM content processing available with {DEFAULT_SUMMARIZER_MODEL}")
print(f" Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars")
# Show debug mode status