diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py new file mode 100644 index 0000000000..3546e6bdb8 --- /dev/null +++ b/agent/auxiliary_client.py @@ -0,0 +1,128 @@ +"""Shared auxiliary OpenAI client for cheap/fast side tasks. + +Provides a single resolution chain so every consumer (context compression, +session search, web extraction, vision analysis, browser vision) picks up +the best available backend without duplicating fallback logic. + +Resolution order for text tasks: + 1. OpenRouter (OPENROUTER_API_KEY) + 2. Nous Portal (~/.hermes/auth.json active provider) + 3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) + 4. None + +Resolution order for vision/multimodal tasks: + 1. OpenRouter + 2. Nous Portal + 3. None (custom endpoints can't substitute for Gemini multimodal) +""" + +import json +import logging +import os +from pathlib import Path +from typing import Optional, Tuple + +from openai import OpenAI + +from hermes_constants import OPENROUTER_BASE_URL + +logger = logging.getLogger(__name__) + +# Default auxiliary models per provider +_OPENROUTER_MODEL = "google/gemini-3-flash-preview" +_NOUS_MODEL = "gemini-3-flash" +_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1" +_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json" + + +def _read_nous_auth() -> Optional[dict]: + """Read and validate ~/.hermes/auth.json for an active Nous provider. + + Returns the provider state dict if Nous is active with tokens, + otherwise None. + """ + try: + if not _AUTH_JSON_PATH.is_file(): + return None + data = json.loads(_AUTH_JSON_PATH.read_text()) + if data.get("active_provider") != "nous": + return None + provider = data.get("providers", {}).get("nous", {}) + # Must have at least an access_token or agent_key + if not provider.get("agent_key") and not provider.get("access_token"): + return None + return provider + except Exception as exc: + logger.debug("Could not read Nous auth: %s", exc) + return None + + +def _nous_api_key(provider: dict) -> str: + """Extract the best API key from a Nous provider state dict.""" + return provider.get("agent_key") or provider.get("access_token", "") + + +def _nous_base_url() -> str: + """Resolve the Nous inference base URL from env or default.""" + return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL) + + +# ── Public API ────────────────────────────────────────────────────────────── + +def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: + """Return (client, model_slug) for text-only auxiliary tasks. + + Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None). + """ + # 1. OpenRouter + or_key = os.getenv("OPENROUTER_API_KEY") + if or_key: + logger.debug("Auxiliary text client: OpenRouter") + return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL + + # 2. Nous Portal + nous = _read_nous_auth() + if nous: + logger.debug("Auxiliary text client: Nous Portal") + return ( + OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()), + _NOUS_MODEL, + ) + + # 3. Custom endpoint (both base URL and key must be set) + custom_base = os.getenv("OPENAI_BASE_URL") + custom_key = os.getenv("OPENAI_API_KEY") + if custom_base and custom_key: + model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini" + logger.debug("Auxiliary text client: custom endpoint (%s)", model) + return OpenAI(api_key=custom_key, base_url=custom_base), model + + # 4. Nothing available + logger.debug("Auxiliary text client: none available") + return None, None + + +def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: + """Return (client, model_slug) for vision/multimodal auxiliary tasks. + + Only OpenRouter and Nous Portal qualify — custom endpoints cannot + substitute for Gemini multimodal. + """ + # 1. OpenRouter + or_key = os.getenv("OPENROUTER_API_KEY") + if or_key: + logger.debug("Auxiliary vision client: OpenRouter") + return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL), _OPENROUTER_MODEL + + # 2. Nous Portal + nous = _read_nous_auth() + if nous: + logger.debug("Auxiliary vision client: Nous Portal") + return ( + OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()), + _NOUS_MODEL, + ) + + # 3. Nothing suitable + logger.debug("Auxiliary vision client: none available") + return None, None diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 7a8225cbb4..8f072a37a1 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -9,13 +9,11 @@ import logging import os from typing import Any, Dict, List -from openai import OpenAI - +from agent.auxiliary_client import get_text_auxiliary_client from agent.model_metadata import ( get_model_context_length, estimate_messages_tokens_rough, ) -from hermes_constants import OPENROUTER_BASE_URL logger = logging.getLogger(__name__) @@ -31,7 +29,6 @@ class ContextCompressor: self, model: str, threshold_percent: float = 0.85, - summary_model: str = "google/gemini-3-flash-preview", protect_first_n: int = 3, protect_last_n: int = 4, summary_target_tokens: int = 500, @@ -39,7 +36,6 @@ class ContextCompressor: ): self.model = model self.threshold_percent = threshold_percent - self.summary_model = summary_model self.protect_first_n = protect_first_n self.protect_last_n = protect_last_n self.summary_target_tokens = summary_target_tokens @@ -53,8 +49,7 @@ class ContextCompressor: self.last_completion_tokens = 0 self.last_total_tokens = 0 - api_key = os.getenv("OPENROUTER_API_KEY", "") - self.client = OpenAI(api_key=api_key, base_url=OPENROUTER_BASE_URL) if api_key else None + self.client, self.summary_model = get_text_auxiliary_client() def update_from_response(self, usage: Dict[str, Any]): """Update tracked token usage from API response.""" @@ -155,6 +150,26 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" if not self.quiet_mode: print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)") print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})") + + # Truncation fallback when no auxiliary model is available + if self.client is None: + print("⚠️ Context compression: no auxiliary model available. Falling back to message truncation.") + # Keep system message(s) at the front and the protected tail; + # simply drop the oldest non-system messages until under threshold. + kept = [] + for msg in messages: + if msg.get("role") == "system": + kept.append(msg.copy()) + else: + break + tail = messages[-self.protect_last_n:] + kept.extend(m.copy() for m in tail) + self.compression_count += 1 + if not self.quiet_mode: + print(f" ✂️ Truncated: {len(messages)} → {len(kept)} messages (dropped middle turns)") + return kept + + if not self.quiet_mode: print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)") summary = self._generate_summary(turns_to_summarize) diff --git a/agent/display.py b/agent/display.py index bed75e3062..7320cb7ea6 100644 --- a/agent/display.py +++ b/agent/display.py @@ -4,11 +4,16 @@ Pure display functions and classes with no AIAgent dependency. Used by AIAgent._execute_tool_calls for CLI feedback. """ +import json import os import random import threading import time +# ANSI escape codes for coloring tool failure indicators +_RED = "\033[31m" +_RESET = "\033[0m" + # ========================================================================= # Tool preview (one-line summary of a tool call's primary argument) @@ -242,12 +247,46 @@ KAWAII_GENERIC = [ # Cute tool message (completion line that replaces the spinner) # ========================================================================= -def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str: +def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]: + """Inspect a tool result string for signs of failure. + + Returns ``(is_failure, suffix)`` where *suffix* is an informational tag + like ``" [exit 1]"`` for terminal failures, or ``" [error]"`` for generic + failures. On success, returns ``(False, "")``. + """ + if result is None: + return False, "" + + if tool_name == "terminal": + try: + data = json.loads(result) + exit_code = data.get("exit_code") + if exit_code is not None and exit_code != 0: + return True, f" [exit {exit_code}]" + except (json.JSONDecodeError, TypeError, AttributeError): + pass + return False, "" + + # Generic heuristic for non-terminal tools + lower = result[:500].lower() + if '"error"' in lower or '"failed"' in lower or result.startswith("Error"): + return True, " [error]" + + return False, "" + + +def get_cute_tool_message( + tool_name: str, args: dict, duration: float, result: str | None = None, +) -> str: """Generate a formatted tool completion line for CLI quiet mode. Format: ``| {emoji} {verb:9} {detail} {duration}`` + + When *result* is provided the line is checked for failure indicators. + Failed tool calls get a red prefix and an informational suffix. """ dur = f"{duration:.1f}s" + is_failure, failure_suffix = _detect_tool_failure(tool_name, result) def _trunc(s, n=40): s = str(s) @@ -257,105 +296,111 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str: p = str(p) return ("..." + p[-(n-3):]) if len(p) > n else p + def _wrap(line: str) -> str: + """Apply red coloring and failure suffix when the tool failed.""" + if not is_failure: + return line + return f"{_RED}{line}{failure_suffix}{_RESET}" + if tool_name == "web_search": - return f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}" + return _wrap(f"┊ 🔍 search {_trunc(args.get('query', ''), 42)} {dur}") if tool_name == "web_extract": urls = args.get("urls", []) if urls: url = urls[0] if isinstance(urls, list) else str(urls) domain = url.replace("https://", "").replace("http://", "").split("/")[0] extra = f" +{len(urls)-1}" if len(urls) > 1 else "" - return f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}" - return f"┊ 📄 fetch pages {dur}" + return _wrap(f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}") + return _wrap(f"┊ 📄 fetch pages {dur}") if tool_name == "web_crawl": url = args.get("url", "") domain = url.replace("https://", "").replace("http://", "").split("/")[0] - return f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}" + return _wrap(f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}") if tool_name == "terminal": - return f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}" + return _wrap(f"┊ 💻 $ {_trunc(args.get('command', ''), 42)} {dur}") if tool_name == "process": action = args.get("action", "?") sid = args.get("session_id", "")[:12] labels = {"list": "ls processes", "poll": f"poll {sid}", "log": f"log {sid}", "wait": f"wait {sid}", "kill": f"kill {sid}", "write": f"write {sid}", "submit": f"submit {sid}"} - return f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}" + return _wrap(f"┊ ⚙️ proc {labels.get(action, f'{action} {sid}')} {dur}") if tool_name == "read_file": - return f"┊ 📖 read {_path(args.get('path', ''))} {dur}" + return _wrap(f"┊ 📖 read {_path(args.get('path', ''))} {dur}") if tool_name == "write_file": - return f"┊ ✍️ write {_path(args.get('path', ''))} {dur}" + return _wrap(f"┊ ✍️ write {_path(args.get('path', ''))} {dur}") if tool_name == "patch": - return f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}" + return _wrap(f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}") if tool_name == "search_files": pattern = _trunc(args.get("pattern", ""), 35) target = args.get("target", "content") verb = "find" if target == "files" else "grep" - return f"┊ 🔎 {verb:9} {pattern} {dur}" + return _wrap(f"┊ 🔎 {verb:9} {pattern} {dur}") if tool_name == "browser_navigate": url = args.get("url", "") domain = url.replace("https://", "").replace("http://", "").split("/")[0] - return f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}" + return _wrap(f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}") if tool_name == "browser_snapshot": mode = "full" if args.get("full") else "compact" - return f"┊ 📸 snapshot {mode} {dur}" + return _wrap(f"┊ 📸 snapshot {mode} {dur}") if tool_name == "browser_click": - return f"┊ 👆 click {args.get('ref', '?')} {dur}" + return _wrap(f"┊ 👆 click {args.get('ref', '?')} {dur}") if tool_name == "browser_type": - return f"┊ ⌨️ type \"{_trunc(args.get('text', ''), 30)}\" {dur}" + return _wrap(f"┊ ⌨️ type \"{_trunc(args.get('text', ''), 30)}\" {dur}") if tool_name == "browser_scroll": d = args.get("direction", "down") arrow = {"down": "↓", "up": "↑", "right": "→", "left": "←"}.get(d, "↓") - return f"┊ {arrow} scroll {d} {dur}" + return _wrap(f"┊ {arrow} scroll {d} {dur}") if tool_name == "browser_back": - return f"┊ ◀️ back {dur}" + return _wrap(f"┊ ◀️ back {dur}") if tool_name == "browser_press": - return f"┊ ⌨️ press {args.get('key', '?')} {dur}" + return _wrap(f"┊ ⌨️ press {args.get('key', '?')} {dur}") if tool_name == "browser_close": - return f"┊ 🚪 close browser {dur}" + return _wrap(f"┊ 🚪 close browser {dur}") if tool_name == "browser_get_images": - return f"┊ 🖼️ images extracting {dur}" + return _wrap(f"┊ 🖼️ images extracting {dur}") if tool_name == "browser_vision": - return f"┊ 👁️ vision analyzing page {dur}" + return _wrap(f"┊ 👁️ vision analyzing page {dur}") if tool_name == "todo": todos_arg = args.get("todos") merge = args.get("merge", False) if todos_arg is None: - return f"┊ 📋 plan reading tasks {dur}" + return _wrap(f"┊ 📋 plan reading tasks {dur}") elif merge: - return f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}" + return _wrap(f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}") else: - return f"┊ 📋 plan {len(todos_arg)} task(s) {dur}" + return _wrap(f"┊ 📋 plan {len(todos_arg)} task(s) {dur}") if tool_name == "session_search": - return f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}" + return _wrap(f"┊ 🔍 recall \"{_trunc(args.get('query', ''), 35)}\" {dur}") if tool_name == "memory": action = args.get("action", "?") target = args.get("target", "") if action == "add": - return f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}" + return _wrap(f"┊ 🧠 memory +{target}: \"{_trunc(args.get('content', ''), 30)}\" {dur}") elif action == "replace": - return f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}" + return _wrap(f"┊ 🧠 memory ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}") elif action == "remove": - return f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}" - return f"┊ 🧠 memory {action} {dur}" + return _wrap(f"┊ 🧠 memory -{target}: \"{_trunc(args.get('old_text', ''), 20)}\" {dur}") + return _wrap(f"┊ 🧠 memory {action} {dur}") if tool_name == "skills_list": - return f"┊ 📚 skills list {args.get('category', 'all')} {dur}" + return _wrap(f"┊ 📚 skills list {args.get('category', 'all')} {dur}") if tool_name == "skill_view": - return f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}" + return _wrap(f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}") if tool_name == "image_generate": - return f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}" + return _wrap(f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}") if tool_name == "text_to_speech": - return f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}" + return _wrap(f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}") if tool_name == "vision_analyze": - return f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}" + return _wrap(f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}") if tool_name == "mixture_of_agents": - return f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}" + return _wrap(f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}") if tool_name == "send_message": - return f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}" + return _wrap(f"┊ 📨 send {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\" {dur}") if tool_name == "schedule_cronjob": - return f"┊ ⏰ schedule {_trunc(args.get('name', args.get('prompt', 'task')), 30)} {dur}" + return _wrap(f"┊ ⏰ schedule {_trunc(args.get('name', args.get('prompt', 'task')), 30)} {dur}") if tool_name == "list_cronjobs": - return f"┊ ⏰ jobs listing {dur}" + return _wrap(f"┊ ⏰ jobs listing {dur}") if tool_name == "remove_cronjob": - return f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}" + return _wrap(f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}") if tool_name.startswith("rl_"): rl = { "rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}", @@ -364,16 +409,16 @@ def get_cute_tool_message(tool_name: str, args: dict, duration: float) -> str: "rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}", "rl_list_runs": "list runs", "rl_test_inference": "test inference", } - return f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}" + return _wrap(f"┊ 🧪 rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}") if tool_name == "execute_code": code = args.get("code", "") first_line = code.strip().split("\n")[0] if code.strip() else "" - return f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}" + return _wrap(f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}") if tool_name == "delegate_task": tasks = args.get("tasks") if tasks and isinstance(tasks, list): - return f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}" - return f"┊ 🔀 delegate {_trunc(args.get('goal', ''), 35)} {dur}" + return _wrap(f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}") + return _wrap(f"┊ 🔀 delegate {_trunc(args.get('goal', ''), 35)} {dur}") preview = build_tool_preview(tool_name, args) or "" - return f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}" + return _wrap(f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}") diff --git a/cli.py b/cli.py index 1972a18e03..91d7399996 100755 --- a/cli.py +++ b/cli.py @@ -339,9 +339,6 @@ def _cprint(text: str): """ _pt_print(_PT_ANSI(text)) -# Version string -VERSION = "v1.0.0" - # ASCII Art - HERMES-AGENT logo (full width, single line - requires ~95 char terminal) HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/] [bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/] diff --git a/gateway/config.py b/gateway/config.py index 8526c43693..16eceda672 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -8,6 +8,7 @@ Handles loading and validating configuration for: - Delivery preferences """ +import logging import os import json from pathlib import Path @@ -15,6 +16,8 @@ from dataclasses import dataclass, field from typing import Dict, List, Optional, Any from enum import Enum +logger = logging.getLogger(__name__) + class Platform(Enum): """Supported messaging platforms.""" @@ -264,6 +267,40 @@ def load_gateway_config() -> GatewayConfig: # Override with environment variables _apply_env_overrides(config) + # --- Validate loaded values --- + policy = config.default_reset_policy + + if not (0 <= policy.at_hour <= 23): + logger.warning( + "Invalid at_hour=%s (must be 0-23). Using default 4.", policy.at_hour + ) + policy.at_hour = 4 + + if policy.idle_minutes is None or policy.idle_minutes <= 0: + logger.warning( + "Invalid idle_minutes=%s (must be positive). Using default 1440.", + policy.idle_minutes, + ) + policy.idle_minutes = 1440 + + # Warn about empty bot tokens — platforms that loaded an empty string + # won't connect and the cause can be confusing without a log line. + _token_env_names = { + Platform.TELEGRAM: "TELEGRAM_BOT_TOKEN", + Platform.DISCORD: "DISCORD_BOT_TOKEN", + Platform.SLACK: "SLACK_BOT_TOKEN", + } + for platform, pconfig in config.platforms.items(): + if not pconfig.enabled: + continue + env_name = _token_env_names.get(platform) + if env_name and pconfig.token is not None and not pconfig.token.strip(): + logger.warning( + "%s is enabled but %s is empty. " + "The adapter will likely fail to connect.", + platform.value, env_name, + ) + return config diff --git a/gateway/delivery.py b/gateway/delivery.py index 676c3b5ae8..0093c1fb09 100644 --- a/gateway/delivery.py +++ b/gateway/delivery.py @@ -8,12 +8,18 @@ Routes messages to the appropriate destination based on: - Local (always saved to files) """ +import logging from pathlib import Path from datetime import datetime from dataclasses import dataclass from typing import Dict, List, Optional, Any, Union from enum import Enum +logger = logging.getLogger(__name__) + +MAX_PLATFORM_OUTPUT = 4000 +TRUNCATED_VISIBLE = 3800 + from .config import Platform, GatewayConfig from .session import SessionSource @@ -245,6 +251,15 @@ class DeliveryRouter: "timestamp": timestamp } + def _save_full_output(self, content: str, job_id: str) -> Path: + """Save full cron output to disk and return the file path.""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + out_dir = Path.home() / ".hermes" / "cron" / "output" + out_dir.mkdir(parents=True, exist_ok=True) + path = out_dir / f"{job_id}_{timestamp}.txt" + path.write_text(content) + return path + async def _deliver_to_platform( self, target: DeliveryTarget, @@ -260,8 +275,16 @@ class DeliveryRouter: if not target.chat_id: raise ValueError(f"No chat ID for {target.platform.value} delivery") - # Call the adapter's send method - # Adapters should implement: async def send(chat_id: str, content: str) -> Dict + # Guard: truncate oversized cron output to stay within platform limits + if len(content) > MAX_PLATFORM_OUTPUT: + job_id = (metadata or {}).get("job_id", "unknown") + saved_path = self._save_full_output(content, job_id) + logger.info("Cron output truncated (%d chars) — full output: %s", len(content), saved_path) + content = ( + content[:TRUNCATED_VISIBLE] + + f"\n\n... [truncated, full output saved to {saved_path}]" + ) + return await adapter.send(target.chat_id, content, metadata=metadata) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 918bc31bd6..9aef4033f5 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -659,34 +659,90 @@ class BasePlatformAdapter(ABC): def truncate_message(self, content: str, max_length: int = 4096) -> List[str]: """ - Split a long message into chunks. - + Split a long message into chunks, preserving code block boundaries. + + When a split falls inside a triple-backtick code block, the fence is + closed at the end of the current chunk and reopened (with the original + language tag) at the start of the next chunk. Multi-chunk responses + receive indicators like ``(1/3)``. + Args: content: The full message content max_length: Maximum length per chunk (platform-specific) - + Returns: List of message chunks """ if len(content) <= max_length: return [content] - - chunks = [] - while content: - if len(content) <= max_length: - chunks.append(content) + + INDICATOR_RESERVE = 10 # room for " (XX/XX)" + FENCE_CLOSE = "\n```" + + chunks: List[str] = [] + remaining = content + # When the previous chunk ended mid-code-block, this holds the + # language tag (possibly "") so we can reopen the fence. + carry_lang: Optional[str] = None + + while remaining: + # If we're continuing a code block from the previous chunk, + # prepend a new opening fence with the same language tag. + prefix = f"```{carry_lang}\n" if carry_lang is not None else "" + + # How much body text we can fit after accounting for the prefix, + # a potential closing fence, and the chunk indicator. + headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE) + if headroom < 1: + headroom = max_length // 2 + + # Everything remaining fits in one final chunk + if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE: + chunks.append(prefix + remaining) break - - # Try to split at a newline - split_idx = content.rfind("\n", 0, max_length) - if split_idx == -1: - # No newline, split at space - split_idx = content.rfind(" ", 0, max_length) - if split_idx == -1: - # No space either, hard split - split_idx = max_length - - chunks.append(content[:split_idx]) - content = content[split_idx:].lstrip() - + + # Find a natural split point (prefer newlines, then spaces) + region = remaining[:headroom] + split_at = region.rfind("\n") + if split_at < headroom // 2: + split_at = region.rfind(" ") + if split_at < 1: + split_at = headroom + + chunk_body = remaining[:split_at] + remaining = remaining[split_at:].lstrip() + + full_chunk = prefix + chunk_body + + # Walk the chunk line-by-line to determine whether we end + # inside an open code block. + in_code = carry_lang is not None + lang = carry_lang or "" + for line in full_chunk.split("\n"): + stripped = line.strip() + if stripped.startswith("```"): + if in_code: + in_code = False + lang = "" + else: + in_code = True + tag = stripped[3:].strip() + lang = tag.split()[0] if tag else "" + + if in_code: + # Close the orphaned fence so the chunk is valid on its own + full_chunk += FENCE_CLOSE + carry_lang = lang + else: + carry_lang = None + + chunks.append(full_chunk) + + # Append chunk indicators when the response spans multiple messages + if len(chunks) > 1: + total = len(chunks) + chunks = [ + f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks) + ] + return chunks diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index e0d277b7b4..01bbad0db0 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -8,6 +8,7 @@ Uses python-telegram-bot library for: """ import asyncio +import re from typing import Dict, List, Optional, Any try: @@ -49,6 +50,16 @@ def check_telegram_requirements() -> bool: return TELEGRAM_AVAILABLE +# Matches every character that MarkdownV2 requires to be backslash-escaped +# when it appears outside a code span or fenced code block. +_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])') + + +def _escape_mdv2(text: str) -> str: + """Escape Telegram MarkdownV2 special characters with a preceding backslash.""" + return _MDV2_ESCAPE_RE.sub(r'\\\1', text) + + class TelegramAdapter(BasePlatformAdapter): """ Telegram bot adapter. @@ -167,7 +178,7 @@ class TelegramAdapter(BasePlatformAdapter): msg = await self._bot.send_message( chat_id=int(chat_id), text=chunk, - parse_mode=ParseMode.MARKDOWN, + parse_mode=ParseMode.MARKDOWN_V2, reply_to_message_id=int(reply_to) if reply_to and i == 0 else None, message_thread_id=int(thread_id) if thread_id else None, ) @@ -297,14 +308,81 @@ class TelegramAdapter(BasePlatformAdapter): def format_message(self, content: str) -> str: """ - Format message for Telegram. - - Telegram uses a subset of markdown. We'll use the simpler - Markdown mode (not MarkdownV2) for compatibility. + Convert standard markdown to Telegram MarkdownV2 format. + + Protected regions (code blocks, inline code) are extracted first so + their contents are never modified. Standard markdown constructs + (headers, bold, italic, links) are translated to MarkdownV2 syntax, + and all remaining special characters are escaped. """ - # Basic escaping for Telegram Markdown - # In Markdown mode (not V2), only certain characters need escaping - return content + if not content: + return content + + placeholders: dict = {} + counter = [0] + + def _ph(value: str) -> str: + """Stash *value* behind a placeholder token that survives escaping.""" + key = f"\x00PH{counter[0]}\x00" + counter[0] += 1 + placeholders[key] = value + return key + + text = content + + # 1) Protect fenced code blocks (``` ... ```) + text = re.sub( + r'(```(?:[^\n]*\n)?[\s\S]*?```)', + lambda m: _ph(m.group(0)), + text, + ) + + # 2) Protect inline code (`...`) + text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text) + + # 3) Convert markdown links – escape the display text; inside the URL + # only ')' and '\' need escaping per the MarkdownV2 spec. + def _convert_link(m): + display = _escape_mdv2(m.group(1)) + url = m.group(2).replace('\\', '\\\\').replace(')', '\\)') + return _ph(f'[{display}]({url})') + + text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text) + + # 4) Convert markdown headers (## Title) → bold *Title* + def _convert_header(m): + inner = m.group(1).strip() + # Strip redundant bold markers that may appear inside a header + inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner) + return _ph(f'*{_escape_mdv2(inner)}*') + + text = re.sub( + r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE + ) + + # 5) Convert bold: **text** → *text* (MarkdownV2 bold) + text = re.sub( + r'\*\*(.+?)\*\*', + lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'), + text, + ) + + # 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic) + text = re.sub( + r'\*([^*]+)\*', + lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'), + text, + ) + + # 7) Escape remaining special characters in plain text + text = _escape_mdv2(text) + + # 8) Restore placeholders in reverse insertion order so that + # nested references (a placeholder inside another) resolve correctly. + for key in reversed(list(placeholders.keys())): + text = text.replace(key, placeholders[key]) + + return text async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: """Handle incoming text messages.""" diff --git a/gateway/run.py b/gateway/run.py index b299085d7c..11bb11ca29 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -20,6 +20,7 @@ import re import sys import signal import threading +from logging.handlers import RotatingFileHandler from pathlib import Path from datetime import datetime from typing import Dict, Optional, Any, List @@ -402,9 +403,27 @@ class GatewayRunner: # Build the context prompt to inject context_prompt = build_session_context_prompt(context) + # If the previous session expired and was auto-reset, prepend a notice + # so the agent knows this is a fresh conversation (not an intentional /reset). + if getattr(session_entry, 'was_auto_reset', False): + context_prompt = ( + "[System note: The user's previous session expired due to inactivity. " + "This is a fresh conversation with no prior context.]\n\n" + + context_prompt + ) + session_entry.was_auto_reset = False + # Load conversation history from transcript history = self.session_store.load_transcript(session_entry.session_id) + # First-message onboarding for brand-new messaging platform users + if not history: + context_prompt += ( + "\n\n[System note: This is the user's very first message in this session. " + "Briefly introduce yourself and mention that /help shows available commands. " + "Keep the introduction concise -- one or two sentences max.]" + ) + # ----------------------------------------------------------------- # Auto-analyze images sent by the user # @@ -1342,15 +1361,32 @@ def _start_cron_ticker(stop_event: threading.Event, interval: int = 60): Runs inside the gateway process so cronjobs fire automatically without needing a separate `hermes cron daemon` or system cron entry. + + Every 60th tick (~once per hour) the image/audio cache is pruned so + stale temp files don't accumulate. """ from cron.scheduler import tick as cron_tick + from gateway.platforms.base import cleanup_image_cache + + IMAGE_CACHE_EVERY = 60 # ticks — once per hour at default 60s interval logger.info("Cron ticker started (interval=%ds)", interval) + tick_count = 0 while not stop_event.is_set(): try: cron_tick(verbose=False) except Exception as e: logger.debug("Cron tick error: %s", e) + + tick_count += 1 + if tick_count % IMAGE_CACHE_EVERY == 0: + try: + removed = cleanup_image_cache(max_age_hours=24) + if removed: + logger.info("Image cache cleanup: removed %d stale file(s)", removed) + except Exception as e: + logger.debug("Image cache cleanup error: %s", e) + stop_event.wait(timeout=interval) logger.info("Cron ticker stopped") @@ -1363,6 +1399,18 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool: Returns True if the gateway ran successfully, False if it failed to start. A False return causes a non-zero exit code so systemd can auto-restart. """ + # Configure rotating file log so gateway output is persisted for debugging + log_dir = Path.home() / '.hermes' / 'logs' + log_dir.mkdir(parents=True, exist_ok=True) + file_handler = RotatingFileHandler( + log_dir / 'gateway.log', + maxBytes=5 * 1024 * 1024, + backupCount=3, + ) + file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s')) + logging.getLogger().addHandler(file_handler) + logging.getLogger().setLevel(logging.INFO) + runner = GatewayRunner(config) # Set up signal handlers diff --git a/gateway/session.py b/gateway/session.py index c66c638b46..b6603ecfaf 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -219,6 +219,10 @@ class SessionEntry: output_tokens: int = 0 total_tokens: int = 0 + # Set when a session was created because the previous one expired; + # consumed once by the message handler to inject a notice into context + was_auto_reset: bool = False + def to_dict(self) -> Dict[str, Any]: result = { "session_key": self.session_key, @@ -388,11 +392,14 @@ class SessionStore: return entry else: # Session is being reset -- end the old one in SQLite + was_auto_reset = True if self._db: try: self._db.end_session(entry.session_id, "session_reset") except Exception as e: logger.debug("Session DB operation failed: %s", e) + else: + was_auto_reset = False # Create new session session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}" @@ -406,6 +413,7 @@ class SessionStore: display_name=source.chat_name, platform=source.platform, chat_type=source.chat_type, + was_auto_reset=was_auto_reset, ) self._entries[session_key] = entry diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py index d8c95978c4..7e647afc35 100644 --- a/hermes_cli/__init__.py +++ b/hermes_cli/__init__.py @@ -11,4 +11,4 @@ Provides subcommands for: - hermes cron - Manage cron jobs """ -__version__ = "0.1.0" +__version__ = "v1.0.0" diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index 2597e880d7..974dfaa15e 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -33,7 +33,7 @@ def cprint(text: str): # ASCII Art & Branding # ========================================================================= -VERSION = "v1.0.0" +from hermes_cli import __version__ as VERSION HERMES_AGENT_LOGO = """[bold #FFD700]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ █████╗ ██████╗ ███████╗███╗ ██╗████████╗[/] [bold #FFD700]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ██╔══██╗██╔════╝ ██╔════╝████╗ ██║╚══██╔══╝[/] diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 09176ba154..6a103a372f 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -44,6 +44,8 @@ def run_doctor(args): should_fix = getattr(args, 'fix', False) issues = [] + manual_issues = [] # issues that can't be auto-fixed + fixed_count = 0 print() print(color("┌─────────────────────────────────────────────────────────┐", Colors.CYAN)) @@ -135,8 +137,15 @@ def run_doctor(args): check_ok(".env file exists (in project directory)") else: check_fail("~/.hermes/.env file missing") - check_info("Run 'hermes setup' to create one") - issues.append("Run 'hermes setup' to create .env") + if should_fix: + env_path.parent.mkdir(parents=True, exist_ok=True) + env_path.touch() + check_ok("Created empty ~/.hermes/.env") + check_info("Run 'hermes setup' to configure API keys") + fixed_count += 1 + else: + check_info("Run 'hermes setup' to create one") + issues.append("Run 'hermes setup' to create .env") # Check ~/.hermes/config.yaml (primary) or project cli-config.yaml (fallback) config_path = HERMES_HOME / 'config.yaml' @@ -147,7 +156,17 @@ def run_doctor(args): if fallback_config.exists(): check_ok("cli-config.yaml exists (in project directory)") else: - check_warn("config.yaml not found", "(using defaults)") + example_config = PROJECT_ROOT / 'cli-config.yaml.example' + if should_fix and example_config.exists(): + config_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(str(example_config), str(config_path)) + check_ok("Created ~/.hermes/config.yaml from cli-config.yaml.example") + fixed_count += 1 + elif should_fix: + check_warn("config.yaml not found and no example to copy from") + manual_issues.append("Create ~/.hermes/config.yaml manually") + else: + check_warn("config.yaml not found", "(using defaults)") # ========================================================================= # Check: Directory structure @@ -159,7 +178,26 @@ def run_doctor(args): if hermes_home.exists(): check_ok("~/.hermes directory exists") else: - check_warn("~/.hermes not found", "(will be created on first use)") + if should_fix: + hermes_home.mkdir(parents=True, exist_ok=True) + check_ok("Created ~/.hermes directory") + fixed_count += 1 + else: + check_warn("~/.hermes not found", "(will be created on first use)") + + # Check expected subdirectories + expected_subdirs = ["cron", "sessions", "logs", "skills", "memories"] + for subdir_name in expected_subdirs: + subdir_path = hermes_home / subdir_name + if subdir_path.exists(): + check_ok(f"~/.hermes/{subdir_name}/ exists") + else: + if should_fix: + subdir_path.mkdir(parents=True, exist_ok=True) + check_ok(f"Created ~/.hermes/{subdir_name}/") + fixed_count += 1 + else: + check_warn(f"~/.hermes/{subdir_name}/ not found", "(will be created on first use)") # Check for SOUL.md persona file soul_path = hermes_home / "SOUL.md" @@ -175,14 +213,25 @@ def run_doctor(args): check_warn("~/.hermes/SOUL.md not found", "(create it to give Hermes a custom personality)") if should_fix: soul_path.parent.mkdir(parents=True, exist_ok=True) - soul_path.write_text("# Hermes Agent Persona\n\n\n", encoding="utf-8") - check_ok("Created ~/.hermes/SOUL.md") + soul_path.write_text( + "# Hermes Agent Persona\n\n" + "\n\n" + "You are Hermes, a helpful AI assistant.\n", + encoding="utf-8", + ) + check_ok("Created ~/.hermes/SOUL.md with basic template") + fixed_count += 1 logs_dir = PROJECT_ROOT / "logs" if logs_dir.exists(): - check_ok("logs/ directory exists") + check_ok("logs/ directory exists (project root)") else: - check_warn("logs/ not found", "(will be created on first use)") + if should_fix: + logs_dir.mkdir(parents=True, exist_ok=True) + check_ok("Created logs/ directory") + fixed_count += 1 + else: + check_warn("logs/ not found", "(will be created on first use)") # Check memory directory memories_dir = hermes_home / "memories" @@ -205,6 +254,7 @@ def run_doctor(args): if should_fix: memories_dir.mkdir(parents=True, exist_ok=True) check_ok("Created ~/.hermes/memories/") + fixed_count += 1 # Check SQLite session store state_db_path = hermes_home / "state.db" @@ -299,6 +349,7 @@ def run_doctor(args): openrouter_key = os.getenv("OPENROUTER_API_KEY") if openrouter_key: + print(" Checking OpenRouter API...", end="", flush=True) try: import httpx response = httpx.get( @@ -307,20 +358,21 @@ def run_doctor(args): timeout=10 ) if response.status_code == 200: - check_ok("OpenRouter API") + print(f"\r {color('✓', Colors.GREEN)} OpenRouter API ") elif response.status_code == 401: - check_fail("OpenRouter API", "(invalid API key)") + print(f"\r {color('✗', Colors.RED)} OpenRouter API {color('(invalid API key)', Colors.DIM)} ") issues.append("Check OPENROUTER_API_KEY in .env") else: - check_fail("OpenRouter API", f"(HTTP {response.status_code})") + print(f"\r {color('✗', Colors.RED)} OpenRouter API {color(f'(HTTP {response.status_code})', Colors.DIM)} ") except Exception as e: - check_fail("OpenRouter API", f"({e})") + print(f"\r {color('✗', Colors.RED)} OpenRouter API {color(f'({e})', Colors.DIM)} ") issues.append("Check network connectivity") else: check_warn("OpenRouter API", "(not configured)") anthropic_key = os.getenv("ANTHROPIC_API_KEY") if anthropic_key: + print(" Checking Anthropic API...", end="", flush=True) try: import httpx response = httpx.get( @@ -332,14 +384,14 @@ def run_doctor(args): timeout=10 ) if response.status_code == 200: - check_ok("Anthropic API") + print(f"\r {color('✓', Colors.GREEN)} Anthropic API ") elif response.status_code == 401: - check_fail("Anthropic API", "(invalid API key)") + print(f"\r {color('✗', Colors.RED)} Anthropic API {color('(invalid API key)', Colors.DIM)} ") else: - # Note: Anthropic may not have /models endpoint - check_warn("Anthropic API", "(couldn't verify)") + msg = "(couldn't verify)" + print(f"\r {color('⚠', Colors.YELLOW)} Anthropic API {color(msg, Colors.DIM)} ") except Exception as e: - check_warn("Anthropic API", f"({e})") + print(f"\r {color('⚠', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)} ") # ========================================================================= # Check: Submodules @@ -440,17 +492,28 @@ def run_doctor(args): # Summary # ========================================================================= print() - if issues: - print(color("─" * 60, Colors.YELLOW)) - print(color(f" Found {len(issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD)) + remaining_issues = issues + manual_issues + if should_fix and fixed_count > 0: + print(color("─" * 60, Colors.GREEN)) + print(color(f" Fixed {fixed_count} issue(s).", Colors.GREEN, Colors.BOLD), end="") + if remaining_issues: + print(color(f" {len(remaining_issues)} issue(s) require manual intervention.", Colors.YELLOW, Colors.BOLD)) + else: + print() print() - for i, issue in enumerate(issues, 1): + if remaining_issues: + for i, issue in enumerate(remaining_issues, 1): + print(f" {i}. {issue}") + print() + elif remaining_issues: + print(color("─" * 60, Colors.YELLOW)) + print(color(f" Found {len(remaining_issues)} issue(s) to address:", Colors.YELLOW, Colors.BOLD)) + print() + for i, issue in enumerate(remaining_issues, 1): print(f" {i}. {issue}") print() - - if should_fix: - print(color(" Attempting auto-fix is not yet implemented.", Colors.DIM)) - print(color(" Please resolve issues manually.", Colors.DIM)) + if not should_fix: + print(color(" Tip: run 'hermes doctor --fix' to auto-fix what's possible.", Colors.DIM)) else: print(color("─" * 60, Colors.GREEN)) print(color(" All checks passed! 🎉", Colors.GREEN, Colors.BOLD)) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 3406994b36..4264730c69 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -47,8 +47,66 @@ from hermes_constants import OPENROUTER_BASE_URL logger = logging.getLogger(__name__) +def _has_any_provider_configured() -> bool: + """Check if at least one inference provider is usable.""" + from hermes_cli.config import get_env_path, get_hermes_home + + # Check env vars (may be set by .env or shell) + if os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"): + return True + + # Check .env file for keys + env_file = get_env_path() + if env_file.exists(): + try: + for line in env_file.read_text().splitlines(): + line = line.strip() + if line.startswith("#") or "=" not in line: + continue + key, _, val = line.partition("=") + val = val.strip().strip("'\"") + if key.strip() in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY") and val: + return True + except Exception: + pass + + # Check for Nous Portal OAuth credentials + auth_file = get_hermes_home() / "auth.json" + if auth_file.exists(): + try: + import json + auth = json.loads(auth_file.read_text()) + active = auth.get("active_provider") + if active: + state = auth.get("providers", {}).get(active, {}) + if state.get("access_token") or state.get("refresh_token"): + return True + except Exception: + pass + + return False + + def cmd_chat(args): """Run interactive chat CLI.""" + # First-run guard: check if any provider is configured before launching + if not _has_any_provider_configured(): + print() + print("It looks like Hermes isn't configured yet -- no API keys or providers found.") + print() + print(" Run: hermes setup") + print() + try: + reply = input("Run setup now? [Y/n] ").strip().lower() + except (EOFError, KeyboardInterrupt): + reply = "n" + if reply in ("", "y", "yes"): + cmd_setup(args) + return + print() + print("You can run 'hermes setup' at any time to configure.") + sys.exit(1) + # Import and run the CLI from cli import main as cli_main @@ -219,20 +277,10 @@ def _model_flow_openrouter(config, current_model=""): print("API key saved.") print() - OPENROUTER_MODELS = [ - "anthropic/claude-opus-4.6", - "anthropic/claude-sonnet-4.5", - "anthropic/claude-opus-4.5", - "openai/gpt-5.2", - "openai/gpt-5.2-codex", - "google/gemini-3-pro-preview", - "google/gemini-3-flash-preview", - "z-ai/glm-4.7", - "moonshotai/kimi-k2.5", - "minimax/minimax-m2.1", - ] + from hermes_cli.models import model_ids + openrouter_models = model_ids() - selected = _prompt_model_selection(OPENROUTER_MODELS, current_model=current_model) + selected = _prompt_model_selection(openrouter_models, current_model=current_model) if selected: # Clear any custom endpoint and set provider to openrouter if get_env_value("OPENAI_BASE_URL"): diff --git a/hermes_cli/models.py b/hermes_cli/models.py new file mode 100644 index 0000000000..789c51e867 --- /dev/null +++ b/hermes_cli/models.py @@ -0,0 +1,33 @@ +""" +Canonical list of OpenRouter models offered in CLI and setup wizards. + +Add, remove, or reorder entries here — both `hermes setup` and +`hermes` provider-selection will pick up the change automatically. +""" + +# (model_id, display description shown in menus) +OPENROUTER_MODELS: list[tuple[str, str]] = [ + ("anthropic/claude-opus-4.6", "recommended"), + ("anthropic/claude-sonnet-4.5", ""), + ("anthropic/claude-opus-4.5", ""), + ("openai/gpt-5.2", ""), + ("openai/gpt-5.2-codex", ""), + ("google/gemini-3-pro-preview", ""), + ("google/gemini-3-flash-preview", ""), + ("z-ai/glm-4.7", ""), + ("moonshotai/kimi-k2.5", ""), + ("minimax/minimax-m2.1", ""), +] + + +def model_ids() -> list[str]: + """Return just the model-id strings (convenience helper).""" + return [mid for mid, _ in OPENROUTER_MODELS] + + +def menu_labels() -> list[str]: + """Return display labels like 'anthropic/claude-opus-4.6 (recommended)'.""" + labels = [] + for mid, desc in OPENROUTER_MODELS: + labels.append(f"{mid} ({desc})" if desc else mid) + return labels diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 1dd670858d..a50975b354 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -611,46 +611,27 @@ def run_setup_wizard(args): save_env_value("LLM_MODEL", custom) # else: keep current else: - # Static list for OpenRouter / fallback - model_choices = [ - "anthropic/claude-opus-4.6 (recommended)", - "anthropic/claude-sonnet-4.5", - "anthropic/claude-opus-4.5", - "openai/gpt-5.2", - "openai/gpt-5.2-codex", - "google/gemini-3-pro-preview", - "google/gemini-3-flash-preview", - "z-ai/glm-4.7", - "moonshotai/kimi-k2.5", - "minimax/minimax-m2.1", + # Static list for OpenRouter / fallback (from canonical list) + from hermes_cli.models import model_ids, menu_labels + + ids = model_ids() + model_choices = menu_labels() + [ "Custom model", - f"Keep current ({current_model})" + f"Keep current ({current_model})", ] - model_idx = prompt_choice("Select default model:", model_choices, 11) + keep_idx = len(model_choices) - 1 + model_idx = prompt_choice("Select default model:", model_choices, keep_idx) - model_map = { - 0: "anthropic/claude-opus-4.6", - 1: "anthropic/claude-sonnet-4.5", - 2: "anthropic/claude-opus-4.5", - 3: "openai/gpt-5.2", - 4: "openai/gpt-5.2-codex", - 5: "google/gemini-3-pro-preview", - 6: "google/gemini-3-flash-preview", - 7: "z-ai/glm-4.7", - 8: "moonshotai/kimi-k2.5", - 9: "minimax/minimax-m2.1", - } - - if model_idx in model_map: - config['model'] = model_map[model_idx] - save_env_value("LLM_MODEL", model_map[model_idx]) - elif model_idx == 10: # Custom + if model_idx < len(ids): + config['model'] = ids[model_idx] + save_env_value("LLM_MODEL", ids[model_idx]) + elif model_idx == len(ids): # Custom custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)") if custom: config['model'] = custom save_env_value("LLM_MODEL", custom) - # else: Keep current (model_idx == 11) + # else: Keep current # ========================================================================= # Step 4: Terminal Backend diff --git a/run_agent.py b/run_agent.py index 882d10b294..3aa1df6865 100644 --- a/run_agent.py +++ b/run_agent.py @@ -69,840 +69,12 @@ from agent.prompt_builder import build_skills_system_prompt, build_context_files from agent.display import ( KawaiiSpinner, build_tool_preview as _build_tool_preview, get_cute_tool_message as _get_cute_tool_message_impl, - KAWAII_SEARCH, KAWAII_READ, KAWAII_TERMINAL, KAWAII_BROWSER, - KAWAII_CREATE, KAWAII_SKILL, KAWAII_THINK, KAWAII_GENERIC, ) from agent.trajectory import ( convert_scratchpad_to_think, has_incomplete_scratchpad, save_trajectory as _save_trajectory_to_file, ) -# Model metadata functions (fetch_model_metadata, get_model_context_length, -# estimate_tokens_rough, estimate_messages_tokens_rough) are now in -# agent/model_metadata.py -- imported above. - - -class ContextCompressor: - """ - Compresses conversation context when approaching model's context limit. - - Uses similar logic to trajectory_compressor but operates in real-time: - 1. Protects first few turns (system, initial user, first assistant response) - 2. Protects last N turns (recent context is most relevant) - 3. Summarizes middle turns when threshold is reached - - Token tracking uses actual counts from API responses (usage.prompt_tokens) - rather than estimates for accuracy. - """ - - def __init__( - self, - model: str, - threshold_percent: float = 0.85, - summary_model: str = "google/gemini-3-flash-preview", - protect_first_n: int = 3, - protect_last_n: int = 4, - summary_target_tokens: int = 500, - quiet_mode: bool = False, - ): - """ - Initialize the context compressor. - - Args: - model: The main model being used (to determine context limit) - threshold_percent: Trigger compression at this % of context (default 85%) - summary_model: Model to use for generating summaries (cheap/fast) - protect_first_n: Number of initial turns to always keep - protect_last_n: Number of recent turns to always keep - summary_target_tokens: Target token count for summaries - quiet_mode: Suppress compression notifications - """ - self.model = model - self.threshold_percent = threshold_percent - self.summary_model = summary_model - self.protect_first_n = protect_first_n - self.protect_last_n = protect_last_n - self.summary_target_tokens = summary_target_tokens - self.quiet_mode = quiet_mode - - self.context_length = get_model_context_length(model) - self.threshold_tokens = int(self.context_length * threshold_percent) - self.compression_count = 0 - - # Track actual token usage from API responses - self.last_prompt_tokens = 0 - self.last_completion_tokens = 0 - self.last_total_tokens = 0 - - # Initialize OpenRouter client for summarization - api_key = os.getenv("OPENROUTER_API_KEY", "") - self.client = OpenAI( - api_key=api_key, - base_url=OPENROUTER_BASE_URL - ) if api_key else None - - def update_from_response(self, usage: Dict[str, Any]): - """ - Update tracked token usage from API response. - - Args: - usage: The usage dict from response (contains prompt_tokens, completion_tokens, total_tokens) - """ - self.last_prompt_tokens = usage.get("prompt_tokens", 0) - self.last_completion_tokens = usage.get("completion_tokens", 0) - self.last_total_tokens = usage.get("total_tokens", 0) - - def should_compress(self, prompt_tokens: int = None) -> bool: - """ - Check if context exceeds the compression threshold. - - Uses actual token count from API response for accuracy. - - Args: - prompt_tokens: Actual prompt tokens from last API response. - If None, uses last tracked value. - - Returns: - True if compression should be triggered - """ - tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens - return tokens >= self.threshold_tokens - - def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool: - """ - Quick pre-flight check using rough estimate (before API call). - - Use this to avoid making an API call that would fail due to context overflow. - For post-response compression decisions, use should_compress() with actual tokens. - - Args: - messages: Current conversation messages - - Returns: - True if compression is likely needed - """ - rough_estimate = estimate_messages_tokens_rough(messages) - return rough_estimate >= self.threshold_tokens - - def get_status(self) -> Dict[str, Any]: - """ - Get current compression status for display/logging. - - Returns: - Dict with token usage and threshold info - """ - return { - "last_prompt_tokens": self.last_prompt_tokens, - "threshold_tokens": self.threshold_tokens, - "context_length": self.context_length, - "usage_percent": (self.last_prompt_tokens / self.context_length * 100) if self.context_length else 0, - "compression_count": self.compression_count, - } - - def _generate_summary(self, turns_to_summarize: List[Dict[str, Any]]) -> str: - """ - Generate a concise summary of conversation turns using a fast model. - - Args: - turns_to_summarize: List of message dicts to summarize - - Returns: - Summary string - """ - if not self.client: - # Fallback if no API key - return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed to save space. The assistant performed various actions and received responses." - - # Format turns for summarization - parts = [] - for i, msg in enumerate(turns_to_summarize): - role = msg.get("role", "unknown") - content = msg.get("content", "") - - # Truncate very long content - if len(content) > 2000: - content = content[:1000] + "\n...[truncated]...\n" + content[-500:] - - # Include tool call info if present - tool_calls = msg.get("tool_calls", []) - if tool_calls: - tool_names = [tc.get("function", {}).get("name", "?") for tc in tool_calls if isinstance(tc, dict)] - content += f"\n[Tool calls: {', '.join(tool_names)}]" - - parts.append(f"[{role.upper()}]: {content}") - - content_to_summarize = "\n\n".join(parts) - - prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history. - -Write from a neutral perspective describing: -1. What actions were taken (tool calls, searches, file operations) -2. Key information or results obtained -3. Important decisions or findings -4. Relevant data, file names, or outputs - -Keep factual and informative. Target ~{self.summary_target_tokens} tokens. - ---- -TURNS TO SUMMARIZE: -{content_to_summarize} ---- - -Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" - - try: - response = self.client.chat.completions.create( - model=self.summary_model, - messages=[{"role": "user", "content": prompt}], - temperature=0.3, - max_tokens=self.summary_target_tokens * 2, - timeout=30.0, - ) - - summary = response.choices[0].message.content.strip() - if not summary.startswith("[CONTEXT SUMMARY]:"): - summary = "[CONTEXT SUMMARY]: " + summary - - return summary - - except Exception as e: - logging.warning(f"Failed to generate context summary: {e}") - return "[CONTEXT SUMMARY]: Previous conversation turns have been compressed. The assistant performed tool calls and received responses." - - def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]: - """ - Compress conversation messages by summarizing middle turns. - - Algorithm: - 1. Keep first N turns (system prompt, initial context) - 2. Keep last N turns (recent/relevant context) - 3. Summarize everything in between - 4. Insert summary as a user message - - Args: - messages: Current conversation messages - current_tokens: Actual token count from API (for logging). If None, uses estimate. - - Returns: - Compressed message list - """ - n_messages = len(messages) - - # Not enough messages to compress - if n_messages <= self.protect_first_n + self.protect_last_n + 1: - if not self.quiet_mode: - print(f"⚠️ Cannot compress: only {n_messages} messages (need > {self.protect_first_n + self.protect_last_n + 1})") - return messages - - # Determine compression boundaries - compress_start = self.protect_first_n - compress_end = n_messages - self.protect_last_n - - # Nothing to compress - if compress_start >= compress_end: - return messages - - # Extract turns to summarize - turns_to_summarize = messages[compress_start:compress_end] - - # Use actual token count if provided, otherwise estimate - display_tokens = current_tokens if current_tokens else self.last_prompt_tokens or estimate_messages_tokens_rough(messages) - - if not self.quiet_mode: - print(f"\n📦 Context compression triggered ({display_tokens:,} tokens ≥ {self.threshold_tokens:,} threshold)") - print(f" 📊 Model context limit: {self.context_length:,} tokens ({self.threshold_percent*100:.0f}% = {self.threshold_tokens:,})") - print(f" 🗜️ Summarizing turns {compress_start+1}-{compress_end} ({len(turns_to_summarize)} turns)") - - # Generate summary - summary = self._generate_summary(turns_to_summarize) - - # Build compressed messages - compressed = [] - - # Keep protected head turns - for i in range(compress_start): - msg = messages[i].copy() - # Add notice to system message on first compression - if i == 0 and msg.get("role") == "system" and self.compression_count == 0: - msg["content"] = msg.get("content", "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]" - compressed.append(msg) - - # Add summary as user message - compressed.append({ - "role": "user", - "content": summary - }) - - # Keep protected tail turns - for i in range(compress_end, n_messages): - compressed.append(messages[i].copy()) - - self.compression_count += 1 - - if not self.quiet_mode: - # Estimate new size (actual will be known after next API call) - new_estimate = estimate_messages_tokens_rough(compressed) - saved_estimate = display_tokens - new_estimate - print(f" ✅ Compressed: {n_messages} → {len(compressed)} messages (~{saved_estimate:,} tokens saved)") - print(f" 💡 Compression #{self.compression_count} complete") - - return compressed - - -# ============================================================================= -# Anthropic Prompt Caching (system_and_3 strategy) -# ============================================================================= -# Reduces input token costs by ~75% on multi-turn conversations by caching -# the conversation prefix. Uses 4 cache_control breakpoints (Anthropic max): -# 1. System prompt (stable across all turns) -# 2-4. Last 3 non-system messages (rolling window) -# -# Cached tokens are read at 0.1x input price. Cache writes cost 1.25x (5m TTL) -# or 2x (1h TTL). Only applied to Claude models via OpenRouter. - -def _apply_cache_marker(msg: dict, cache_marker: dict) -> None: - """ - Add cache_control to a single message, handling all format variations. - - - tool messages: cache_control at message level (Anthropic API quirk) - - string content: converted to multipart content array - - list content: marker added to last item - - None content (assistant with tool_calls): message level - """ - role = msg.get("role", "") - content = msg.get("content") - - if role == "tool": - msg["cache_control"] = cache_marker - return - - if content is None: - msg["cache_control"] = cache_marker - return - - if isinstance(content, str): - msg["content"] = [{"type": "text", "text": content, "cache_control": cache_marker}] - return - - if isinstance(content, list) and content: - last = content[-1] - if isinstance(last, dict): - last["cache_control"] = cache_marker - - -def apply_anthropic_cache_control( - api_messages: List[Dict[str, Any]], - cache_ttl: str = "5m", -) -> List[Dict[str, Any]]: - """ - Apply system_and_3 caching strategy to messages for Anthropic models. - - Places up to 4 cache_control breakpoints: - 1. System prompt (index 0, stable across all turns) - 2-4. Last 3 non-system messages (rolling cache frontier) - - Each breakpoint tells Anthropic "cache everything from the start up to here." - Multiple breakpoints create a ladder of cached prefixes at different depths, - which provides robust cache hits even when the most recent cache entry hasn't - propagated yet. - - Args: - api_messages: Fully assembled message list (system prompt first). - cache_ttl: "5m" (default, 1.25x write cost) or "1h" (2x write cost). - - Returns: - Deep copy of messages with cache_control breakpoints injected. - """ - messages = copy.deepcopy(api_messages) - if not messages: - return messages - - marker = {"type": "ephemeral"} - if cache_ttl == "1h": - marker["ttl"] = "1h" - - breakpoints_used = 0 - - # Breakpoint 1: System prompt (always stable, gives a guaranteed minimum hit) - if messages[0].get("role") == "system": - _apply_cache_marker(messages[0], marker) - breakpoints_used += 1 - - # Breakpoints 2-4: Last 3 non-system messages (rolling window) - remaining = 4 - breakpoints_used - non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"] - for idx in non_sys[-remaining:]: - _apply_cache_marker(messages[idx], marker) - - return messages - - -# ============================================================================= -# Default System Prompt Components -# ============================================================================= - -# Skills guidance - embeds a compact skill index in the system prompt so -# the model can match skills at a glance without extra tool calls. -def build_skills_system_prompt() -> str: - """ - Build a dynamic skills system prompt by scanning both bundled and user skill directories. - - Returns a prompt section that lists all skill categories (with descriptions - from DESCRIPTION.md) and their skill names inline, so the model can - immediately see if a relevant skill exists and load it with a single - skill_view(name) call -- no discovery tool calls needed. - - Returns: - str: The skills system prompt section, or empty string if no skills found. - """ - import os - from pathlib import Path - - hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) - skills_dir = hermes_home / "skills" - - if not skills_dir.exists(): - return "" - - # Scan for SKILL.md files grouped by category - skills_by_category = {} - for skill_file in skills_dir.rglob("SKILL.md"): - rel_path = skill_file.relative_to(skills_dir) - parts = rel_path.parts - if len(parts) >= 2: - category = parts[0] - skill_name = parts[-2] - else: - category = "general" - skill_name = skill_file.parent.name - skills_by_category.setdefault(category, []).append(skill_name) - - if not skills_by_category: - return "" - - # Load category descriptions from DESCRIPTION.md files - category_descriptions = {} - for category in skills_by_category: - desc_file = skills_dir / category / "DESCRIPTION.md" - if desc_file.exists(): - try: - content = desc_file.read_text(encoding="utf-8") - match = re.search(r"^---\s*\n.*?description:\s*(.+?)\s*\n.*?^---", content, re.MULTILINE | re.DOTALL) - if match: - category_descriptions[category] = match.group(1).strip() - except Exception as e: - logger.debug("Could not read skill description %s: %s", desc_file, e) - - index_lines = [] - for category in sorted(skills_by_category.keys()): - desc = category_descriptions.get(category, "") - names = ", ".join(sorted(set(skills_by_category[category]))) - if desc: - index_lines.append(f" {category}: {desc}") - else: - index_lines.append(f" {category}:") - index_lines.append(f" skills: {names}") - - return ( - "## Skills (mandatory)\n" - "Before replying, scan the skills below. If one clearly matches your task, " - "load it with skill_view(name) and follow its instructions. " - "If a skill has issues, fix it with skill_manage(action='patch').\n" - "\n" - "\n" - + "\n".join(index_lines) + "\n" - "\n" - "\n" - "If none match, proceed normally without loading a skill." - ) - - -# ============================================================================= -# Context File Injection (SOUL.md, AGENTS.md, .cursorrules) -# ============================================================================= - -# Maximum characters per context file before truncation -CONTEXT_FILE_MAX_CHARS = 20_000 -# Truncation strategy: keep 70% from the head, 20% from the tail -CONTEXT_TRUNCATE_HEAD_RATIO = 0.7 -CONTEXT_TRUNCATE_TAIL_RATIO = 0.2 - - -def _truncate_content(content: str, filename: str, max_chars: int = CONTEXT_FILE_MAX_CHARS) -> str: - """ - Truncate content if it exceeds max_chars using a head/tail strategy. - - Keeps 70% from the start and 20% from the end, with a truncation - marker in the middle so the model knows content was cut. - """ - if len(content) <= max_chars: - return content - - head_chars = int(max_chars * CONTEXT_TRUNCATE_HEAD_RATIO) - tail_chars = int(max_chars * CONTEXT_TRUNCATE_TAIL_RATIO) - head = content[:head_chars] - tail = content[-tail_chars:] - - marker = f"\n\n[...truncated {filename}: kept {head_chars}+{tail_chars} of {len(content)} chars. Use file tools to read the full file.]\n\n" - return head + marker + tail - - -def build_context_files_prompt(cwd: str = None) -> str: - """ - Discover and load context files (SOUL.md, AGENTS.md, .cursorrules) - for injection into the system prompt. - - Discovery rules: - - AGENTS.md: Recursively search from cwd (only if top-level exists). - Each file becomes a ## section with its relative path. - - .cursorrules: Check cwd for .cursorrules file and .cursor/rules/*.mdc - - SOUL.md: Check cwd first, then ~/.hermes/SOUL.md as global fallback - - Args: - cwd: Working directory to search from. Defaults to os.getcwd(). - - Returns: - str: The context files prompt section, or empty string if none found. - """ - import os - import glob as glob_mod - from pathlib import Path - - if cwd is None: - cwd = os.getcwd() - - cwd_path = Path(cwd).resolve() - sections = [] - - # ----- AGENTS.md (hierarchical, recursive) ----- - top_level_agents = None - for name in ["AGENTS.md", "agents.md"]: - candidate = cwd_path / name - if candidate.exists(): - top_level_agents = candidate - break - - if top_level_agents: - # Recursively find all AGENTS.md files (case-insensitive) - agents_files = [] - for root, dirs, files in os.walk(cwd_path): - # Skip hidden directories and common non-project dirs - dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('node_modules', '__pycache__', 'venv', '.venv')] - for f in files: - if f.lower() == "agents.md": - agents_files.append(Path(root) / f) - - # Sort by path depth (top-level first, then deeper) - agents_files.sort(key=lambda p: len(p.parts)) - - total_agents_content = "" - for agents_path in agents_files: - try: - content = agents_path.read_text(encoding="utf-8").strip() - if content: - rel_path = agents_path.relative_to(cwd_path) - total_agents_content += f"## {rel_path}\n\n{content}\n\n" - except Exception as e: - logger.debug("Could not read %s: %s", agents_path, e) - - if total_agents_content: - total_agents_content = _truncate_content(total_agents_content, "AGENTS.md") - sections.append(total_agents_content) - - # ----- .cursorrules ----- - cursorrules_content = "" - - # Check for .cursorrules file - cursorrules_file = cwd_path / ".cursorrules" - if cursorrules_file.exists(): - try: - content = cursorrules_file.read_text(encoding="utf-8").strip() - if content: - cursorrules_content += f"## .cursorrules\n\n{content}\n\n" - except Exception as e: - logger.debug("Could not read .cursorrules: %s", e) - - # Check for .cursor/rules/*.mdc files - cursor_rules_dir = cwd_path / ".cursor" / "rules" - if cursor_rules_dir.exists() and cursor_rules_dir.is_dir(): - mdc_files = sorted(cursor_rules_dir.glob("*.mdc")) - for mdc_file in mdc_files: - try: - content = mdc_file.read_text(encoding="utf-8").strip() - if content: - cursorrules_content += f"## .cursor/rules/{mdc_file.name}\n\n{content}\n\n" - except Exception as e: - logger.debug("Could not read %s: %s", mdc_file, e) - - if cursorrules_content: - cursorrules_content = _truncate_content(cursorrules_content, ".cursorrules") - sections.append(cursorrules_content) - - # ----- SOUL.md (cwd first, then ~/.hermes/ fallback) ----- - soul_content = "" - soul_path = None - - for name in ["SOUL.md", "soul.md"]: - candidate = cwd_path / name - if candidate.exists(): - soul_path = candidate - break - - if not soul_path: - # Global fallback - global_soul = Path.home() / ".hermes" / "SOUL.md" - if global_soul.exists(): - soul_path = global_soul - - if soul_path: - try: - content = soul_path.read_text(encoding="utf-8").strip() - if content: - content = _truncate_content(content, "SOUL.md") - soul_content = f"## SOUL.md\n\nIf SOUL.md is present, embody its persona and tone. Avoid stiff, generic replies; follow its guidance unless higher-priority instructions override it.\n\n{content}" - sections.append(soul_content) - except Exception as e: - logger.debug("Could not read SOUL.md from %s: %s", soul_path, e) - - # ----- Assemble ----- - if not sections: - return "" - - return "# Project Context\n\nThe following project context files have been loaded and should be followed:\n\n" + "\n".join(sections) - - -def _build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str: - """ - Build a short preview of a tool call's primary argument for display. - - Returns a truncated string showing the most informative argument, - or None if no meaningful preview is available. - - Args: - tool_name: Name of the tool being called - args: The tool call arguments dict - max_len: Maximum preview length before truncation - - Returns: - str or None: Short preview string, or None - """ - # Map tool names to their primary argument key(s) - primary_args = { - "terminal": "command", - "web_search": "query", - "web_extract": "urls", - "read_file": "path", - "write_file": "path", - "patch": "path", - "search_files": "pattern", - "browser_navigate": "url", - "browser_click": "ref", - "browser_type": "text", - "image_generate": "prompt", - "text_to_speech": "text", - "vision_analyze": "question", - "mixture_of_agents": "user_prompt", - "skill_view": "name", - "skills_list": "category", - "schedule_cronjob": "name", - } - - # Special handling for tools with composite previews - if tool_name == "process": - action = args.get("action", "") - session_id = args.get("session_id", "") - data = args.get("data", "") - timeout = args.get("timeout") - parts = [action] - if session_id: - parts.append(session_id[:16]) - if data: - parts.append(f'"{data[:20]}"') - if timeout and action == "wait": - parts.append(f"{timeout}s") - return " ".join(parts) if parts else None - - if tool_name == "todo": - todos_arg = args.get("todos") - merge = args.get("merge", False) - if todos_arg is None: - return "reading task list" - elif merge: - return f"updating {len(todos_arg)} task(s)" - else: - return f"planning {len(todos_arg)} task(s)" - - if tool_name == "session_search": - query = args.get("query", "") - return f"recall: \"{query[:25]}{'...' if len(query) > 25 else ''}\"" - - if tool_name == "memory": - action = args.get("action", "") - target = args.get("target", "") - if action == "add": - content = args.get("content", "") - return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\"" - elif action == "replace": - return f"~{target}: \"{args.get('old_text', '')[:20]}\"" - elif action == "remove": - return f"-{target}: \"{args.get('old_text', '')[:20]}\"" - return action - - if tool_name == "send_message": - target = args.get("target", "?") - msg = args.get("message", "") - if len(msg) > 20: - msg = msg[:17] + "..." - return f"to {target}: \"{msg}\"" - - if tool_name.startswith("rl_"): - rl_previews = { - "rl_list_environments": "listing envs", - "rl_select_environment": args.get("name", ""), - "rl_get_current_config": "reading config", - "rl_edit_config": f"{args.get('field', '')}={args.get('value', '')}", - "rl_start_training": "starting", - "rl_check_status": args.get("run_id", "")[:16], - "rl_stop_training": f"stopping {args.get('run_id', '')[:16]}", - "rl_get_results": args.get("run_id", "")[:16], - "rl_list_runs": "listing runs", - "rl_test_inference": f"{args.get('num_steps', 3)} steps", - } - return rl_previews.get(tool_name) - - key = primary_args.get(tool_name) - if not key: - # Try common arg names as fallback - for fallback_key in ("query", "text", "command", "path", "name", "prompt"): - if fallback_key in args: - key = fallback_key - break - - if not key or key not in args: - return None - - value = args[key] - - # Handle list values (e.g., urls) - if isinstance(value, list): - value = value[0] if value else "" - - preview = str(value).strip() - if not preview: - return None - - # Truncate - if len(preview) > max_len: - preview = preview[:max_len - 3] + "..." - - return preview - - -class KawaiiSpinner: - """ - Animated spinner with kawaii faces for CLI feedback during tool execution. - Runs in a background thread and can be stopped when the operation completes. - - Uses stdout with carriage return to animate in place. - """ - - # Different spinner animation sets - SPINNERS = { - 'dots': ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'], - 'bounce': ['⠁', '⠂', '⠄', '⡀', '⢀', '⠠', '⠐', '⠈'], - 'grow': ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█', '▇', '▆', '▅', '▄', '▃', '▂'], - 'arrows': ['←', '↖', '↑', '↗', '→', '↘', '↓', '↙'], - 'star': ['✶', '✷', '✸', '✹', '✺', '✹', '✸', '✷'], - 'moon': ['🌑', '🌒', '🌓', '🌔', '🌕', '🌖', '🌗', '🌘'], - 'pulse': ['◜', '◠', '◝', '◞', '◡', '◟'], - 'brain': ['🧠', '💭', '💡', '✨', '💫', '🌟', '💡', '💭'], - 'sparkle': ['⁺', '˚', '*', '✧', '✦', '✧', '*', '˚'], - } - - # General waiting faces - KAWAII_WAITING = [ - "(。◕‿◕。)", "(◕‿◕✿)", "٩(◕‿◕。)۶", "(✿◠‿◠)", "( ˘▽˘)っ", - "♪(´ε` )", "(◕ᴗ◕✿)", "ヾ(^∇^)", "(≧◡≦)", "(★ω★)", - ] - - # Thinking-specific faces and messages - KAWAII_THINKING = [ - "(。•́︿•̀。)", "(◔_◔)", "(¬‿¬)", "( •_•)>⌐■-■", "(⌐■_■)", - "(´・_・`)", "◉_◉", "(°ロ°)", "( ˘⌣˘)♡", "ヽ(>∀<☆)☆", - "٩(๑❛ᴗ❛๑)۶", "(⊙_⊙)", "(¬_¬)", "( ͡° ͜ʖ ͡°)", "ಠ_ಠ", - ] - - THINKING_VERBS = [ - "pondering", "contemplating", "musing", "cogitating", "ruminating", - "deliberating", "mulling", "reflecting", "processing", "reasoning", - "analyzing", "computing", "synthesizing", "formulating", "brainstorming", - ] - - def __init__(self, message: str = "", spinner_type: str = 'dots'): - self.message = message - self.spinner_frames = self.SPINNERS.get(spinner_type, self.SPINNERS['dots']) - self.running = False - self.thread = None - self.frame_idx = 0 - self.start_time = None - self.last_line_len = 0 - - def _animate(self): - """Animation loop that runs in background thread.""" - while self.running: - # Check for pause signal (e.g., during sudo password prompt) - if os.getenv("HERMES_SPINNER_PAUSE"): - time.sleep(0.1) - continue - - frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)] - elapsed = time.time() - self.start_time - - # Build the spinner line - line = f" {frame} {self.message} ({elapsed:.1f}s)" - - # Clear previous line and write new one - clear = '\r' + ' ' * self.last_line_len + '\r' - print(clear + line, end='', flush=True) - self.last_line_len = len(line) - - self.frame_idx += 1 - time.sleep(0.12) # ~8 FPS animation - - def start(self): - """Start the spinner animation.""" - if self.running: - return - self.running = True - self.start_time = time.time() - self.thread = threading.Thread(target=self._animate, daemon=True) - self.thread.start() - - def update_text(self, new_message: str): - """Update the spinner message text while it's running.""" - self.message = new_message - - def stop(self, final_message: str = None): - """Stop the spinner and optionally print a final message.""" - self.running = False - if self.thread: - self.thread.join(timeout=0.5) - - # Clear the spinner line - print('\r' + ' ' * (self.last_line_len + 5) + '\r', end='', flush=True) - - # Print final message if provided - if final_message: - print(f" {final_message}", flush=True) - - def __enter__(self): - self.start() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.stop() - return False - class AIAgent: """ @@ -1238,256 +410,6 @@ class AIAgent: else: print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)") - # Pools of kawaii faces for random selection - KAWAII_SEARCH = [ - "♪(´ε` )", "(。◕‿◕。)", "ヾ(^∇^)", "(◕ᴗ◕✿)", "( ˘▽˘)っ", - "٩(◕‿◕。)۶", "(✿◠‿◠)", "♪~(´ε` )", "(ノ´ヮ`)ノ*:・゚✧", "\(◎o◎)/", - ] - KAWAII_READ = [ - "φ(゜▽゜*)♪", "( ˘▽˘)っ", "(⌐■_■)", "٩(。•́‿•̀。)۶", "(◕‿◕✿)", - "ヾ(@⌒ー⌒@)ノ", "(✧ω✧)", "♪(๑ᴖ◡ᴖ๑)♪", "(≧◡≦)", "( ´ ▽ ` )ノ", - ] - KAWAII_TERMINAL = [ - "ヽ(>∀<☆)ノ", "(ノ°∀°)ノ", "٩(^ᴗ^)۶", "ヾ(⌐■_■)ノ♪", "(•̀ᴗ•́)و", - "┗(^0^)┓", "(`・ω・´)", "\( ̄▽ ̄)/", "(ง •̀_•́)ง", "ヽ(´▽`)/", - ] - KAWAII_BROWSER = [ - "(ノ°∀°)ノ", "(☞゚ヮ゚)☞", "( ͡° ͜ʖ ͡°)", "┌( ಠ_ಠ)┘", "(⊙_⊙)?", - "ヾ(•ω•`)o", "( ̄ω ̄)", "( ˇωˇ )", "(ᵔᴥᵔ)", "\(◎o◎)/", - ] - KAWAII_CREATE = [ - "✧*。٩(ˊᗜˋ*)و✧", "(ノ◕ヮ◕)ノ*:・゚✧", "ヽ(>∀<☆)ノ", "٩(♡ε♡)۶", "(◕‿◕)♡", - "✿◕ ‿ ◕✿", "(*≧▽≦)", "ヾ(^-^)ノ", "(☆▽☆)", "°˖✧◝(⁰▿⁰)◜✧˖°", - ] - KAWAII_SKILL = [ - "ヾ(@⌒ー⌒@)ノ", "(๑˃ᴗ˂)ﻭ", "٩(◕‿◕。)۶", "(✿╹◡╹)", "ヽ(・∀・)ノ", - "(ノ´ヮ`)ノ*:・゚✧", "♪(๑ᴖ◡ᴖ๑)♪", "(◠‿◠)", "٩(ˊᗜˋ*)و", "(^▽^)", - "ヾ(^∇^)", "(★ω★)/", "٩(。•́‿•̀。)۶", "(◕ᴗ◕✿)", "\(◎o◎)/", - "(✧ω✧)", "ヽ(>∀<☆)ノ", "( ˘▽˘)っ", "(≧◡≦) ♡", "ヾ( ̄▽ ̄)", - ] - KAWAII_THINK = [ - "(っ°Д°;)っ", "(;′⌒`)", "(・_・ヾ", "( ´_ゝ`)", "( ̄ヘ ̄)", - "(。-`ω´-)", "( ˘︹˘ )", "(¬_¬)", "ヽ(ー_ー )ノ", "(;一_一)", - ] - KAWAII_GENERIC = [ - "♪(´ε` )", "(◕‿◕✿)", "ヾ(^∇^)", "٩(◕‿◕。)۶", "(✿◠‿◠)", - "(ノ´ヮ`)ノ*:・゚✧", "ヽ(>∀<☆)ノ", "(☆▽☆)", "( ˘▽˘)っ", "(≧◡≦)", - ] - - def _get_cute_tool_message(self, tool_name: str, args: dict, duration: float) -> str: - """ - Generate a clean, aligned tool activity line for CLI quiet mode. - - Format: ┊ {emoji} {verb:9} {detail} {duration} - - Kawaii faces live in the animated spinner (while the tool runs). - This completion message replaces the spinner with a permanent log line. - """ - dur = f"{duration:.1f}s" - - def _trunc(s, n=40): - s = str(s) - return (s[:n-3] + "...") if len(s) > n else s - - def _path(p, n=35): - p = str(p) - return ("..." + p[-(n-3):]) if len(p) > n else p - - # ── Web ── - if tool_name == "web_search": - q = _trunc(args.get("query", ""), 42) - return f"┊ 🔍 search {q} {dur}" - - if tool_name == "web_extract": - urls = args.get("urls", []) - if urls: - url = urls[0] if isinstance(urls, list) else str(urls) - domain = url.replace("https://", "").replace("http://", "").split("/")[0] - extra = f" +{len(urls)-1}" if len(urls) > 1 else "" - return f"┊ 📄 fetch {_trunc(domain, 35)}{extra} {dur}" - return f"┊ 📄 fetch pages {dur}" - - if tool_name == "web_crawl": - url = args.get("url", "") - domain = url.replace("https://", "").replace("http://", "").split("/")[0] - return f"┊ 🕸️ crawl {_trunc(domain, 35)} {dur}" - - # ── Terminal & Process ── - if tool_name == "terminal": - cmd = _trunc(args.get("command", ""), 42) - return f"┊ 💻 $ {cmd} {dur}" - - if tool_name == "process": - action = args.get("action", "?") - sid = args.get("session_id", "")[:12] - labels = { - "list": "ls processes", "poll": f"poll {sid}", - "log": f"log {sid}", "wait": f"wait {sid}", - "kill": f"kill {sid}", "write": f"write {sid}", - "submit": f"submit {sid}", - } - detail = labels.get(action, f"{action} {sid}") - return f"┊ ⚙️ proc {detail} {dur}" - - # ── Files ── - if tool_name == "read_file": - return f"┊ 📖 read {_path(args.get('path', ''))} {dur}" - - if tool_name == "write_file": - return f"┊ ✍️ write {_path(args.get('path', ''))} {dur}" - - if tool_name == "patch": - return f"┊ 🔧 patch {_path(args.get('path', ''))} {dur}" - - if tool_name == "search_files": - pattern = _trunc(args.get("pattern", ""), 35) - target = args.get("target", "content") - verb = "find" if target == "files" else "grep" - return f"┊ 🔎 {verb:9} {pattern} {dur}" - - # ── Browser ── - if tool_name == "browser_navigate": - url = args.get("url", "") - domain = url.replace("https://", "").replace("http://", "").split("/")[0] - return f"┊ 🌐 navigate {_trunc(domain, 35)} {dur}" - - if tool_name == "browser_snapshot": - mode = "full" if args.get("full") else "compact" - return f"┊ 📸 snapshot {mode} {dur}" - - if tool_name == "browser_click": - return f"┊ 👆 click {args.get('ref', '?')} {dur}" - - if tool_name == "browser_type": - text = _trunc(args.get("text", ""), 30) - return f"┊ ⌨️ type \"{text}\" {dur}" - - if tool_name == "browser_scroll": - d = args.get("direction", "down") - arrow = {"down": "↓", "up": "↑", "right": "→", "left": "←"}.get(d, "↓") - return f"┊ {arrow} scroll {d} {dur}" - - if tool_name == "browser_back": - return f"┊ ◀️ back {dur}" - - if tool_name == "browser_press": - return f"┊ ⌨️ press {args.get('key', '?')} {dur}" - - if tool_name == "browser_close": - return f"┊ 🚪 close browser {dur}" - - if tool_name == "browser_get_images": - return f"┊ 🖼️ images extracting {dur}" - - if tool_name == "browser_vision": - return f"┊ 👁️ vision analyzing page {dur}" - - # ── Planning ── - if tool_name == "todo": - todos_arg = args.get("todos") - merge = args.get("merge", False) - if todos_arg is None: - return f"┊ 📋 plan reading tasks {dur}" - elif merge: - return f"┊ 📋 plan update {len(todos_arg)} task(s) {dur}" - else: - return f"┊ 📋 plan {len(todos_arg)} task(s) {dur}" - - # ── Session Search ── - if tool_name == "session_search": - query = _trunc(args.get("query", ""), 35) - return f"┊ 🔍 recall \"{query}\" {dur}" - - # ── Memory ── - if tool_name == "memory": - action = args.get("action", "?") - target = args.get("target", "") - if action == "add": - preview = _trunc(args.get("content", ""), 30) - return f"┊ 🧠 memory +{target}: \"{preview}\" {dur}" - elif action == "replace": - snippet = _trunc(args.get("old_text", ""), 20) - return f"┊ 🧠 memory ~{target}: \"{snippet}\" {dur}" - elif action == "remove": - snippet = _trunc(args.get("old_text", ""), 20) - return f"┊ 🧠 memory -{target}: \"{snippet}\" {dur}" - elif action == "search_sessions": - query = _trunc(args.get("content", ""), 30) - return f"┊ 🧠 recall \"{query}\" {dur}" - else: - return f"┊ 🧠 memory {action} {dur}" - - # ── Skills ── - if tool_name == "skills_list": - return f"┊ 📚 skills list {args.get('category', 'all')} {dur}" - - if tool_name == "skill_view": - return f"┊ 📚 skill {_trunc(args.get('name', ''), 30)} {dur}" - - # ── Generation & Media ── - if tool_name == "image_generate": - return f"┊ 🎨 create {_trunc(args.get('prompt', ''), 35)} {dur}" - - if tool_name == "text_to_speech": - return f"┊ 🔊 speak {_trunc(args.get('text', ''), 30)} {dur}" - - if tool_name == "vision_analyze": - return f"┊ 👁️ vision {_trunc(args.get('question', ''), 30)} {dur}" - - if tool_name == "mixture_of_agents": - return f"┊ 🧠 reason {_trunc(args.get('user_prompt', ''), 30)} {dur}" - - # ── Messaging & Scheduling ── - if tool_name == "send_message": - target = args.get("target", "?") - msg = _trunc(args.get("message", ""), 25) - return f"┊ 📨 send {target}: \"{msg}\" {dur}" - - if tool_name == "schedule_cronjob": - name = _trunc(args.get("name", args.get("prompt", "task")), 30) - return f"┊ ⏰ schedule {name} {dur}" - - if tool_name == "list_cronjobs": - return f"┊ ⏰ jobs listing {dur}" - - if tool_name == "remove_cronjob": - return f"┊ ⏰ remove job {args.get('job_id', '?')} {dur}" - - # ── RL Training ── - if tool_name.startswith("rl_"): - rl = { - "rl_list_environments": "list envs", - "rl_select_environment": f"select {args.get('name', '')}", - "rl_get_current_config": "get config", - "rl_edit_config": f"set {args.get('field', '?')}", - "rl_start_training": "start training", - "rl_check_status": f"status {args.get('run_id', '?')[:12]}", - "rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", - "rl_get_results": f"results {args.get('run_id', '?')[:12]}", - "rl_list_runs": "list runs", - "rl_test_inference": "test inference", - } - detail = rl.get(tool_name, tool_name.replace("rl_", "")) - return f"┊ 🧪 rl {detail} {dur}" - - # ── Code Execution Sandbox ── - if tool_name == "execute_code": - code = args.get("code", "") - first_line = code.strip().split("\n")[0] if code.strip() else "" - return f"┊ 🐍 exec {_trunc(first_line, 35)} {dur}" - - # ── Subagent Delegation ── - if tool_name == "delegate_task": - tasks = args.get("tasks") - if tasks and isinstance(tasks, list): - return f"┊ 🔀 delegate {len(tasks)} parallel tasks {dur}" - goal = _trunc(args.get("goal", ""), 35) - return f"┊ 🔀 delegate {goal} {dur}" - - # ── Fallback ── - preview = _build_tool_preview(tool_name, args) or "" - return f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)} {dur}" - def _has_content_after_think_block(self, content: str) -> bool: """ Check if content has actual text after any blocks. @@ -2330,7 +1252,7 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self.quiet_mode: - print(f" {self._get_cute_tool_message('todo', function_args, tool_duration)}") + print(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}") elif function_name == "session_search" and self._session_db: from tools.session_search_tool import session_search as _session_search function_result = _session_search( @@ -2341,7 +1263,7 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self.quiet_mode: - print(f" {self._get_cute_tool_message('session_search', function_args, tool_duration)}") + print(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}") elif function_name == "memory": from tools.memory_tool import memory_tool as _memory_tool function_result = _memory_tool( @@ -2353,7 +1275,7 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self.quiet_mode: - print(f" {self._get_cute_tool_message('memory', function_args, tool_duration)}") + print(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}") elif function_name == "clarify": from tools.clarify_tool import clarify_tool as _clarify_tool function_result = _clarify_tool( @@ -2363,7 +1285,7 @@ class AIAgent: ) tool_duration = time.time() - tool_start_time if self.quiet_mode: - print(f" {self._get_cute_tool_message('clarify', function_args, tool_duration)}") + print(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}") elif function_name == "delegate_task": from tools.delegate_tool import delegate_task as _delegate_task tasks_arg = function_args.get("tasks") @@ -2378,6 +1300,7 @@ class AIAgent: spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots') spinner.start() self._delegate_spinner = spinner + _delegate_result = None try: function_result = _delegate_task( goal=function_args.get("goal"), @@ -2388,10 +1311,11 @@ class AIAgent: max_iterations=function_args.get("max_iterations"), parent_agent=self, ) + _delegate_result = function_result finally: self._delegate_spinner = None tool_duration = time.time() - tool_start_time - cute_msg = self._get_cute_tool_message('delegate_task', function_args, tool_duration) + cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result) if spinner: spinner.stop(cute_msg) elif self.quiet_mode: @@ -2420,11 +1344,13 @@ class AIAgent: preview = preview[:27] + "..." spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots') spinner.start() + _spinner_result = None try: function_result = handle_function_call(function_name, function_args, effective_task_id) + _spinner_result = function_result finally: tool_duration = time.time() - tool_start_time - cute_msg = self._get_cute_tool_message(function_name, function_args, tool_duration) + cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result) spinner.stop(cute_msg) else: function_result = handle_function_call(function_name, function_args, effective_task_id) @@ -3187,11 +2113,40 @@ class AIAgent: if self.verbose_logging: logging.exception("Detailed error information:") - # Add error to conversation and try to continue - messages.append({ - "role": "assistant", - "content": f"I encountered an error: {error_msg}. Let me try a different approach." - }) + # If an assistant message with tool_calls was already appended, + # the API expects a role="tool" result for every tool_call_id. + # Fill in error results for any that weren't answered yet. + pending_handled = False + for idx in range(len(messages) - 1, -1, -1): + msg = messages[idx] + if not isinstance(msg, dict): + break + if msg.get("role") == "tool": + continue + if msg.get("role") == "assistant" and msg.get("tool_calls"): + answered_ids = { + m["tool_call_id"] + for m in messages[idx + 1:] + if isinstance(m, dict) and m.get("role") == "tool" + } + for tc in msg["tool_calls"]: + if tc["id"] not in answered_ids: + messages.append({ + "role": "tool", + "tool_call_id": tc["id"], + "content": f"Error executing tool: {error_msg}", + }) + pending_handled = True + break + + if not pending_handled: + # Error happened before tool processing (e.g. response parsing). + # Use a user-role message so the model can see what went wrong + # without confusing the API with a fabricated assistant turn. + messages.append({ + "role": "user", + "content": f"[System error during processing: {error_msg}]", + }) # If we're near the limit, break to avoid infinite loops if api_call_count >= self.max_iterations - 1: diff --git a/tools/approval.py b/tools/approval.py index 2db8424cb7..18f9b67431 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -281,7 +281,12 @@ def check_dangerous_command(command: str, env_type: str, approval_callback=approval_callback) if choice == "deny": - return {"approved": False, "message": "BLOCKED: User denied this potentially dangerous command. Do NOT retry this command - the user has explicitly rejected it."} + return { + "approved": False, + "message": f"BLOCKED: User denied this potentially dangerous command (matched '{description}' pattern). Do NOT retry this command - the user has explicitly rejected it.", + "pattern_key": pattern_key, + "description": description, + } if choice == "session": approve_session(session_key, pattern_key) diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 4467b890d3..b76b886bc8 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -51,25 +51,16 @@ import signal import subprocess import shutil import sys -import asyncio import tempfile import threading import time import requests from typing import Dict, Any, Optional, List from pathlib import Path -from hermes_constants import OPENROUTER_CHAT_URL +from agent.auxiliary_client import get_vision_auxiliary_client logger = logging.getLogger(__name__) -# Try to import httpx for async LLM calls -try: - import httpx - HTTPX_AVAILABLE = True -except ImportError: - HTTPX_AVAILABLE = False - - # ============================================================================ # Configuration # ============================================================================ @@ -83,8 +74,8 @@ DEFAULT_SESSION_TIMEOUT = 300 # Max tokens for snapshot content before summarization SNAPSHOT_SUMMARIZE_THRESHOLD = 8000 -# Model for task-aware extraction -EXTRACTION_MODEL = "google/gemini-3-flash-preview" +# Resolve vision auxiliary client for extraction/vision tasks +_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client() # Track active sessions per task # Now stores tuple of (session_name, browserbase_session_id, cdp_url) @@ -782,87 +773,49 @@ def _run_browser_command( return {"success": False, "error": str(e)} -async def _extract_relevant_content( +def _extract_relevant_content( snapshot_text: str, user_task: Optional[str] = None ) -> str: + """Use LLM to extract relevant content from a snapshot based on the user's task. + + Falls back to simple truncation when no auxiliary vision model is configured. """ - Use LLM to extract relevant content from a snapshot based on the user's task. - - This provides task-aware summarization that preserves meaningful text content - (paragraphs, prices, descriptions) relevant to what the user is trying to accomplish. - - Args: - snapshot_text: The full snapshot text - user_task: The user's current task/goal (optional) - - Returns: - Summarized/extracted content - """ - if not HTTPX_AVAILABLE: - # Fall back to simple truncation + if _aux_vision_client is None or EXTRACTION_MODEL is None: return _truncate_snapshot(snapshot_text) - - # Get API key - api_key = os.environ.get("OPENROUTER_API_KEY") - if not api_key: - return _truncate_snapshot(snapshot_text) - - # Build extraction prompt + if user_task: - extraction_prompt = f"""You are a content extractor for a browser automation agent. - -The user's task is: {user_task} - -Given the following page snapshot (accessibility tree representation), extract and summarize the most relevant information for completing this task. Focus on: -1. Interactive elements (buttons, links, inputs) that might be needed -2. Text content relevant to the task (prices, descriptions, headings, important info) -3. Navigation structure if relevant - -Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them. - -Page Snapshot: -{snapshot_text} - -Provide a concise summary that preserves actionable information and relevant content.""" + extraction_prompt = ( + f"You are a content extractor for a browser automation agent.\n\n" + f"The user's task is: {user_task}\n\n" + f"Given the following page snapshot (accessibility tree representation), " + f"extract and summarize the most relevant information for completing this task. Focus on:\n" + f"1. Interactive elements (buttons, links, inputs) that might be needed\n" + f"2. Text content relevant to the task (prices, descriptions, headings, important info)\n" + f"3. Navigation structure if relevant\n\n" + f"Keep ref IDs (like [ref=e5]) for interactive elements so the agent can use them.\n\n" + f"Page Snapshot:\n{snapshot_text}\n\n" + f"Provide a concise summary that preserves actionable information and relevant content." + ) else: - extraction_prompt = f"""Summarize this page snapshot, preserving: -1. All interactive elements with their ref IDs (like [ref=e5]) -2. Key text content and headings -3. Important information visible on the page - -Page Snapshot: -{snapshot_text} - -Provide a concise summary focused on interactive elements and key content.""" + extraction_prompt = ( + f"Summarize this page snapshot, preserving:\n" + f"1. All interactive elements with their ref IDs (like [ref=e5])\n" + f"2. Key text content and headings\n" + f"3. Important information visible on the page\n\n" + f"Page Snapshot:\n{snapshot_text}\n\n" + f"Provide a concise summary focused on interactive elements and key content." + ) try: - async with httpx.AsyncClient(timeout=30.0) as client: - response = await client.post( - OPENROUTER_CHAT_URL, - headers={ - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json" - }, - json={ - "model": EXTRACTION_MODEL, - "messages": [ - {"role": "user", "content": extraction_prompt} - ], - "max_tokens": 4000, - "temperature": 0.1 - } - ) - - if response.status_code == 200: - result = response.json() - return result["choices"][0]["message"]["content"] - else: - # Fall back to truncation on API error - return _truncate_snapshot(snapshot_text) - + response = _aux_vision_client.chat.completions.create( + model=EXTRACTION_MODEL, + messages=[{"role": "user", "content": extraction_prompt}], + max_tokens=4000, + temperature=0.1, + ) + return response.choices[0].message.content except Exception: - # Fall back to truncation on any error return _truncate_snapshot(snapshot_text) @@ -991,16 +944,7 @@ def browser_snapshot( # Check if snapshot needs summarization if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD and user_task: - # Run async extraction - try: - loop = asyncio.get_event_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - snapshot_text = loop.run_until_complete( - _extract_relevant_content(snapshot_text, user_task) - ) + snapshot_text = _extract_relevant_content(snapshot_text, user_task) elif len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD: snapshot_text = _truncate_snapshot(snapshot_text) @@ -1286,12 +1230,12 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: effective_task_id = task_id or "default" - # Check for OpenRouter API key - api_key = os.environ.get("OPENROUTER_API_KEY") - if not api_key: + # Check auxiliary vision client + if _aux_vision_client is None or EXTRACTION_MODEL is None: return json.dumps({ "success": False, - "error": "OPENROUTER_API_KEY not set. Vision analysis requires this API key." + "error": "Browser vision unavailable: no auxiliary vision model configured. " + "Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision." }, ensure_ascii=False) # Create a temporary file for the screenshot @@ -1325,110 +1269,36 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: image_base64 = base64.b64encode(image_data).decode("ascii") data_url = f"data:image/png;base64,{image_base64}" - # Prepare the vision prompt - vision_prompt = f"""You are analyzing a screenshot of a web browser. + vision_prompt = ( + f"You are analyzing a screenshot of a web browser.\n\n" + f"User's question: {question}\n\n" + f"Provide a detailed and helpful answer based on what you see in the screenshot. " + f"If there are interactive elements, describe them. If there are verification challenges " + f"or CAPTCHAs, describe what type they are and what action might be needed. " + f"Focus on answering the user's specific question." + ) -User's question: {question} - -Provide a detailed and helpful answer based on what you see in the screenshot. -If there are interactive elements, describe them. If there are verification challenges -or CAPTCHAs, describe what type they are and what action might be needed. -Focus on answering the user's specific question.""" - - # Call OpenRouter/Gemini for vision analysis - if HTTPX_AVAILABLE: - import asyncio - - async def analyze_screenshot(): - async with httpx.AsyncClient(timeout=60.0) as client: - response = await client.post( - OPENROUTER_CHAT_URL, - headers={ - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json" - }, - json={ - "model": "google/gemini-3-flash-preview", - "messages": [ - { - "role": "user", - "content": [ - {"type": "text", "text": vision_prompt}, - { - "type": "image_url", - "image_url": {"url": data_url} - } - ] - } - ], - "max_tokens": 2000, - "temperature": 0.1 - } - ) - - if response.status_code != 200: - return { - "success": False, - "error": f"Vision API error: {response.status_code} - {response.text[:200]}" - } - - result_data = response.json() - analysis = result_data["choices"][0]["message"]["content"] - return { - "success": True, - "analysis": analysis - } - - # Run the async function - try: - loop = asyncio.get_event_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - vision_result = loop.run_until_complete(analyze_screenshot()) - return json.dumps(vision_result, ensure_ascii=False) - - else: - # Fallback: use synchronous requests - response = requests.post( - OPENROUTER_CHAT_URL, - headers={ - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json" - }, - json={ - "model": "google/gemini-3-flash-preview", - "messages": [ - { - "role": "user", - "content": [ - {"type": "text", "text": vision_prompt}, - { - "type": "image_url", - "image_url": {"url": data_url} - } - ] - } + # Use the sync auxiliary vision client directly + response = _aux_vision_client.chat.completions.create( + model=EXTRACTION_MODEL, + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": vision_prompt}, + {"type": "image_url", "image_url": {"url": data_url}}, ], - "max_tokens": 2000, - "temperature": 0.1 - }, - timeout=60 - ) - - if response.status_code != 200: - return json.dumps({ - "success": False, - "error": f"Vision API error: {response.status_code} - {response.text[:200]}" - }, ensure_ascii=False) - - result_data = response.json() - analysis = result_data["choices"][0]["message"]["content"] - return json.dumps({ - "success": True, - "analysis": analysis - }, ensure_ascii=False) + } + ], + max_tokens=2000, + temperature=0.1, + ) + + analysis = response.choices[0].message.content + return json.dumps({ + "success": True, + "analysis": analysis, + }, ensure_ascii=False) except Exception as e: return json.dumps({ diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index 07c39989e8..16508e9762 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -22,9 +22,19 @@ import os import logging from typing import Dict, Any, List, Optional -from tools.openrouter_client import get_async_client as _get_client +from openai import AsyncOpenAI, OpenAI -SUMMARIZER_MODEL = "google/gemini-3-flash-preview" +from agent.auxiliary_client import get_text_auxiliary_client + +# Resolve the auxiliary client at import time so we have the model slug. +# We build an AsyncOpenAI from the same credentials for async summarization. +_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client() +_async_aux_client: AsyncOpenAI | None = None +if _aux_client is not None: + _async_aux_client = AsyncOpenAI( + api_key=_aux_client.api_key, + base_url=str(_aux_client.base_url), + ) MAX_SESSION_CHARS = 100_000 MAX_SUMMARY_TOKENS = 2000 @@ -126,11 +136,15 @@ async def _summarize_session( f"Summarize this conversation with focus on: {query}" ) + if _async_aux_client is None or _SUMMARIZER_MODEL is None: + logging.warning("No auxiliary model available for session summarization") + return None + max_retries = 3 for attempt in range(max_retries): try: - response = await _get_client().chat.completions.create( - model=SUMMARIZER_MODEL, + response = await _async_aux_client.chat.completions.create( + model=_SUMMARIZER_MODEL, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, @@ -252,8 +266,8 @@ def session_search( def check_session_search_requirements() -> bool: - """Requires SQLite state database and OpenRouter API key.""" - if not os.getenv("OPENROUTER_API_KEY"): + """Requires SQLite state database and an auxiliary text model.""" + if _async_aux_client is None: return False try: from hermes_state import DEFAULT_DB_PATH @@ -316,5 +330,4 @@ registry.register( limit=args.get("limit", 3), db=kw.get("db")), check_fn=check_session_search_requirements, - requires_env=["OPENROUTER_API_KEY"], ) diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 6b95d185db..09d1ff31dc 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -359,7 +359,6 @@ Do NOT use vim/nano/interactive tools without pty=true — they hang without a p # Global state for environment lifecycle management _active_environments: Dict[str, Any] = {} -_task_workdirs: Dict[str, str] = {} # Maps task_id to working directory _last_activity: Dict[str, float] = {} _env_lock = threading.Lock() _creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation @@ -530,7 +529,6 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300): if current_time - last_time > lifetime_seconds: env = _active_environments.pop(task_id, None) _last_activity.pop(task_id, None) - _task_workdirs.pop(task_id, None) if env is not None: envs_to_stop.append((task_id, env)) @@ -609,7 +607,7 @@ def get_active_environments_info() -> Dict[str, Any]: info = { "count": len(_active_environments), "task_ids": list(_active_environments.keys()), - "workdirs": dict(_task_workdirs), + "workdirs": {}, } # Calculate total disk usage @@ -632,7 +630,7 @@ def get_active_environments_info() -> Dict[str, Any]: def cleanup_all_environments(): """Clean up ALL active environments. Use with caution.""" - global _active_environments, _last_activity, _task_workdirs + global _active_environments, _last_activity task_ids = list(_active_environments.keys()) cleaned = 0 @@ -661,7 +659,7 @@ def cleanup_all_environments(): def cleanup_vm(task_id: str): """Manually clean up a specific environment by task_id.""" - global _active_environments, _last_activity, _task_workdirs + global _active_environments, _last_activity # Remove from tracking dicts while holding the lock, but defer the # actual (potentially slow) env.cleanup() call to outside the lock @@ -669,7 +667,6 @@ def cleanup_vm(task_id: str): env = None with _env_lock: env = _active_environments.pop(task_id, None) - _task_workdirs.pop(task_id, None) _last_activity.pop(task_id, None) # Clean up per-task creation lock @@ -782,17 +779,6 @@ def terminal_tool( default_timeout = config["timeout"] effective_timeout = timeout or default_timeout - # For local environment in batch mode, create a unique subdirectory per task - # This prevents parallel tasks from overwriting each other's files - # In CLI mode (HERMES_QUIET), use the cwd directly without subdirectories - if env_type == "local" and not os.getenv("HERMES_QUIET"): - with _env_lock: - if effective_task_id not in _task_workdirs: - task_workdir = Path(cwd) / f"hermes-{effective_task_id}-{uuid.uuid4().hex[:8]}" - task_workdir.mkdir(parents=True, exist_ok=True) - _task_workdirs[effective_task_id] = str(task_workdir) - cwd = _task_workdirs[effective_task_id] - # Start cleanup thread _start_cleanup_thread() @@ -874,11 +860,16 @@ def terminal_tool( "description": approval.get("description", "dangerous command"), "pattern_key": approval.get("pattern_key", ""), }, ensure_ascii=False) - # Command was blocked - return informative message + # Command was blocked - include the pattern category so the caller knows why + desc = approval.get("description", "potentially dangerous operation") + fallback_msg = ( + f"Command denied: matches '{desc}' pattern. " + "Use the approval prompt to allow it, or rephrase the command." + ) return json.dumps({ "output": "", "exit_code": -1, - "error": approval.get("message", "Command denied - potentially dangerous operation"), + "error": approval.get("message", fallback_msg), "status": "blocked" }, ensure_ascii=False) @@ -996,11 +987,17 @@ def terminal_tool( # Add helpful message for sudo failures in messaging context output = _handle_sudo_failure(output, env_type) - # Truncate output if too long + # Truncate output if too long, keeping both head and tail MAX_OUTPUT_CHARS = 50000 if len(output) > MAX_OUTPUT_CHARS: - truncated_notice = f"\n\n... [OUTPUT TRUNCATED - showing last {MAX_OUTPUT_CHARS} chars of {len(output)} total] ..." - output = truncated_notice + output[-MAX_OUTPUT_CHARS:] + head_chars = int(MAX_OUTPUT_CHARS * 0.4) # 40% head (error messages often appear early) + tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output) + omitted = len(output) - head_chars - tail_chars + truncated_notice = ( + f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted " + f"out of {len(output)} total] ...\n\n" + ) + output = output[:head_chars] + truncated_notice + output[-tail_chars:] return json.dumps({ "output": output.strip() if output else "", diff --git a/tools/vision_tools.py b/tools/vision_tools.py index 7750485d5d..90c0d430c2 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -36,13 +36,20 @@ import base64 from pathlib import Path from typing import Dict, Any, Optional import httpx -from tools.openrouter_client import get_async_client as _get_openrouter_client, check_api_key as check_openrouter_api_key +from openai import AsyncOpenAI +from agent.auxiliary_client import get_vision_auxiliary_client from tools.debug_helpers import DebugSession logger = logging.getLogger(__name__) -# Configuration for vision processing -DEFAULT_VISION_MODEL = "google/gemini-3-flash-preview" +# Resolve vision auxiliary client at module level; build an async wrapper. +_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client() +_aux_async_client: AsyncOpenAI | None = None +if _aux_sync_client is not None: + _aux_async_client = AsyncOpenAI( + api_key=_aux_sync_client.api_key, + base_url=str(_aux_sync_client.base_url), + ) _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG") @@ -230,9 +237,13 @@ async def vision_analyze_tool( logger.info("Analyzing image: %s", image_url[:60]) logger.info("User prompt: %s", user_prompt[:100]) - # Check API key availability - if not os.getenv("OPENROUTER_API_KEY"): - raise ValueError("OPENROUTER_API_KEY environment variable not set") + # Check auxiliary vision client availability + if _aux_async_client is None or DEFAULT_VISION_MODEL is None: + return json.dumps({ + "success": False, + "analysis": "Vision analysis unavailable: no auxiliary vision model configured. " + "Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools." + }, indent=2, ensure_ascii=False) # Determine if this is a local file path or a remote URL local_path = Path(image_url) @@ -291,18 +302,12 @@ async def vision_analyze_tool( logger.info("Processing image with %s...", model) - # Call the vision API with reasoning enabled - response = await _get_openrouter_client().chat.completions.create( + # Call the vision API + response = await _aux_async_client.chat.completions.create( model=model, messages=messages, - temperature=0.1, # Low temperature for consistent analysis - max_tokens=2000, # Generous limit for detailed analysis - extra_body={ - "reasoning": { - "enabled": True, - "effort": "xhigh" - } - } + temperature=0.1, + max_tokens=2000, ) # Extract the analysis @@ -353,13 +358,8 @@ async def vision_analyze_tool( def check_vision_requirements() -> bool: - """ - Check if all requirements for vision tools are met. - - Returns: - bool: True if requirements are met, False otherwise - """ - return check_openrouter_api_key() + """Check if an auxiliary vision model is available.""" + return _aux_async_client is not None def get_debug_session_info() -> Dict[str, Any]: @@ -379,16 +379,15 @@ if __name__ == "__main__": print("👁️ Vision Tools Module") print("=" * 40) - # Check if API key is available - api_available = check_openrouter_api_key() + # Check if vision model is available + api_available = check_vision_requirements() if not api_available: - print("❌ OPENROUTER_API_KEY environment variable not set") - print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'") - print("Get API key at: https://openrouter.ai/") + print("❌ No auxiliary vision model available") + print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.") exit(1) else: - print("✅ OpenRouter API key found") + print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}") print("🛠️ Vision tools ready for use!") print(f"🧠 Using model: {DEFAULT_VISION_MODEL}") @@ -455,7 +454,8 @@ def _handle_vision_analyze(args, **kw): image_url = args.get("image_url", "") question = args.get("question", "") full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}" - return vision_analyze_tool(image_url, full_prompt, "google/gemini-3-flash-preview") + model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview" + return vision_analyze_tool(image_url, full_prompt, model) registry.register( @@ -464,6 +464,5 @@ registry.register( schema=VISION_ANALYZE_SCHEMA, handler=_handle_vision_analyze, check_fn=check_vision_requirements, - requires_env=["OPENROUTER_API_KEY"], is_async=True, ) diff --git a/tools/web_tools.py b/tools/web_tools.py index 5809a26faf..868abb9420 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -47,7 +47,8 @@ import re import asyncio from typing import List, Dict, Any, Optional from firecrawl import Firecrawl -from tools.openrouter_client import get_async_client as _get_openrouter_client +from openai import AsyncOpenAI +from agent.auxiliary_client import get_text_auxiliary_client from tools.debug_helpers import DebugSession logger = logging.getLogger(__name__) @@ -64,9 +65,17 @@ def _get_firecrawl_client(): _firecrawl_client = Firecrawl(api_key=api_key) return _firecrawl_client -DEFAULT_SUMMARIZER_MODEL = "google/gemini-3-flash-preview" DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000 +# Resolve auxiliary text client at module level; build an async wrapper. +_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client() +_aux_async_client: AsyncOpenAI | None = None +if _aux_sync_client is not None: + _aux_async_client = AsyncOpenAI( + api_key=_aux_sync_client.api_key, + base_url=str(_aux_sync_client.base_url), + ) + _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG") @@ -223,7 +232,10 @@ Create a markdown summary that captures all key information in a well-organized, for attempt in range(max_retries): try: - response = await _get_openrouter_client().chat.completions.create( + if _aux_async_client is None: + logger.warning("No auxiliary model available for web content processing") + return None + response = await _aux_async_client.chat.completions.create( model=model, messages=[ {"role": "system", "content": system_prompt}, @@ -231,12 +243,6 @@ Create a markdown summary that captures all key information in a well-organized, ], temperature=0.1, max_tokens=max_tokens, - extra_body={ - "reasoning": { - "enabled": True, - "effort": "xhigh" - } - } ) return response.choices[0].message.content.strip() except Exception as api_error: @@ -342,7 +348,14 @@ Synthesize these into ONE cohesive, comprehensive summary that: Create a single, unified markdown summary.""" try: - response = await _get_openrouter_client().chat.completions.create( + if _aux_async_client is None: + logger.warning("No auxiliary model for synthesis, concatenating summaries") + fallback = "\n\n".join(summaries) + if len(fallback) > max_output_size: + fallback = fallback[:max_output_size] + "\n\n[... truncated ...]" + return fallback + + response = await _aux_async_client.chat.completions.create( model=model, messages=[ {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."}, @@ -350,12 +363,6 @@ Create a single, unified markdown summary.""" ], temperature=0.1, max_tokens=4000, - extra_body={ - "reasoning": { - "enabled": True, - "effort": "xhigh" - } - } ) final_summary = response.choices[0].message.content.strip() @@ -677,8 +684,8 @@ async def web_extract_tool( debug_call_data["pages_extracted"] = pages_extracted debug_call_data["original_response_size"] = len(json.dumps(response)) - # Process each result with LLM if enabled - if use_llm_processing and os.getenv("OPENROUTER_API_KEY"): + # Process each result with LLM if enabled and auxiliary client is available + if use_llm_processing and _aux_async_client is not None: logger.info("Processing extracted content with LLM (parallel)...") debug_call_data["processing_applied"].append("llm_processing") @@ -744,8 +751,8 @@ async def web_extract_tool( else: logger.warning("%s (no content to process)", url) else: - if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"): - logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content") + if use_llm_processing and _aux_async_client is None: + logger.warning("LLM processing requested but no auxiliary model available, returning raw content") debug_call_data["processing_applied"].append("llm_processing_unavailable") # Print summary of extracted pages for debugging (original behavior) @@ -973,8 +980,8 @@ async def web_crawl_tool( debug_call_data["pages_crawled"] = pages_crawled debug_call_data["original_response_size"] = len(json.dumps(response)) - # Process each result with LLM if enabled - if use_llm_processing and os.getenv("OPENROUTER_API_KEY"): + # Process each result with LLM if enabled and auxiliary client is available + if use_llm_processing and _aux_async_client is not None: logger.info("Processing crawled content with LLM (parallel)...") debug_call_data["processing_applied"].append("llm_processing") @@ -1040,8 +1047,8 @@ async def web_crawl_tool( else: logger.warning("%s (no content to process)", page_url) else: - if use_llm_processing and not os.getenv("OPENROUTER_API_KEY"): - logger.warning("LLM processing requested but OPENROUTER_API_KEY not set, returning raw content") + if use_llm_processing and _aux_async_client is None: + logger.warning("LLM processing requested but no auxiliary model available, returning raw content") debug_call_data["processing_applied"].append("llm_processing_unavailable") # Print summary of crawled pages for debugging (original behavior) @@ -1096,14 +1103,9 @@ def check_firecrawl_api_key() -> bool: return bool(os.getenv("FIRECRAWL_API_KEY")) -def check_nous_api_key() -> bool: - """ - Check if the Nous Research API key is available in environment variables. - - Returns: - bool: True if API key is set, False otherwise - """ - return bool(os.getenv("OPENROUTER_API_KEY")) +def check_auxiliary_model() -> bool: + """Check if an auxiliary text model is available for LLM content processing.""" + return _aux_async_client is not None def get_debug_session_info() -> Dict[str, Any]: @@ -1120,7 +1122,7 @@ if __name__ == "__main__": # Check if API keys are available firecrawl_available = check_firecrawl_api_key() - nous_available = check_nous_api_key() + nous_available = check_auxiliary_model() if not firecrawl_available: print("❌ FIRECRAWL_API_KEY environment variable not set") @@ -1130,12 +1132,11 @@ if __name__ == "__main__": print("✅ Firecrawl API key found") if not nous_available: - print("❌ OPENROUTER_API_KEY environment variable not set") - print("Please set your API key: export OPENROUTER_API_KEY='your-key-here'") - print("Get API key at: https://inference-api.nousresearch.com/") - print("⚠️ Without Nous API key, LLM content processing will be disabled") + print("❌ No auxiliary model available for LLM content processing") + print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY") + print("⚠️ Without an auxiliary model, LLM content processing will be disabled") else: - print("✅ Nous Research API key found") + print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}") if not firecrawl_available: exit(1) @@ -1143,7 +1144,7 @@ if __name__ == "__main__": print("🛠️ Web tools ready for use!") if nous_available: - print("🧠 LLM content processing available with Gemini 3 Flash Preview via OpenRouter") + print(f"🧠 LLM content processing available with {DEFAULT_SUMMARIZER_MODEL}") print(f" Default min length for processing: {DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION} chars") # Show debug mode status