diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000..1a71cb0776 Binary files /dev/null and b/.DS_Store differ diff --git a/.env.example b/.env.example index 38804aa120..4c13afec63 100644 --- a/.env.example +++ b/.env.example @@ -1,12 +1,42 @@ # Hermes Agent Environment Configuration # Copy this file to .env and fill in your API keys +# ============================================================================= +# CORE SETTINGS +# ============================================================================= +# Agent backend: +# - openai : default Hermes-Agent loop (OpenAI function-calling via OpenAI SDK) +# - atropos : Atroposlib ServerManager/ManagedServer-backed loop (training/env integration) +HERMES_BACKEND=openai + +# Debugging (prints to stdout; use with care) +# HERMES_DEBUG_ATROPOS_REQUEST=1 +# HERMES_DEBUG_ATROPOS_RESPONSE=1 +# HERMES_DEBUG_OPENAI_REQUEST=1 +# HERMES_DEBUG_OPENAI_RESPONSE=1 + +# ============================================================================= +# LOCAL / SELF-HOSTED OPENAI-COMPATIBLE ENDPOINTS (vLLM, SGLang, llama.cpp, etc.) +# ============================================================================= +# If you set ATROPOS_SERVER_BASE_URL or OPENAI_BASE_URL, Hermes will use it instead +# of OpenRouter. +# +# Local server convenience (base URL without /v1): +# ATROPOS_SERVER_BASE_URL=http://localhost:11434 +# ATROPOS_SERVER_MODEL=glm-4.7-flash +# ATROPOS_SERVER_API_KEY=local +# +# Generic OpenAI-compatible (base URL should include /v1): +# OPENAI_BASE_URL=http://localhost:11434/v1 +# OPENAI_API_KEY=local + # ============================================================================= # LLM PROVIDER (OpenRouter) # ============================================================================= # OpenRouter provides access to many models through one API # All LLM calls go through OpenRouter - no direct provider keys needed # Get your key at: https://openrouter.ai/keys +OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 OPENROUTER_API_KEY= # Default model to use (OpenRouter format: provider/model) diff --git a/atropos_compatible_agent.py b/atropos_compatible_agent.py index e0ba3402cb..a9102a03c4 100644 --- a/atropos_compatible_agent.py +++ b/atropos_compatible_agent.py @@ -19,6 +19,8 @@ import asyncio import json import re import time +import warnings +import os from contextlib import asynccontextmanager from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple @@ -75,8 +77,8 @@ class AtroposAIAgent(AIAgent): log_prefix_chars: int = 100, log_prefix: str = "", session_id: Optional[str] = None, - temperature: float = 0.7, - max_tokens: int = 4096, + temperature: Optional[float] = None, + max_tokens: Optional[int] = None, ): # Call parent init mainly to reuse tool selection + trajectory saving utilities. super().__init__( @@ -104,8 +106,14 @@ class AtroposAIAgent(AIAgent): @asynccontextmanager async def _managed(self) -> AsyncGenerator[Any, None]: if hasattr(self.server, "managed_server"): - async with self.server.managed_server(tokenizer=self.tokenizer) as managed: - yield managed + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=r"Using OpenAIServer with managed_server does not allow for state tracking", + category=UserWarning, + ) + async with self.server.managed_server(tokenizer=self.tokenizer) as managed: + yield managed return # Fall back to directly wrapping a single server object. @@ -206,20 +214,106 @@ class AtroposAIAgent(AIAgent): if active_system_prompt: api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages - response = await managed.chat_completion( - messages=api_messages, - n=1, - max_tokens=self.max_tokens, - temperature=self.temperature, - ) + chat_kwargs: Dict[str, Any] = {"messages": api_messages, "n": 1} + if self.max_tokens is not None: + chat_kwargs["max_tokens"] = self.max_tokens + if self.temperature is not None: + chat_kwargs["temperature"] = self.temperature + + # Prefer OpenAI tool calling when supported by the backend: + # - Many providers normalize Hermes-style tags into tool_calls when `tools` is provided. + # - ManagedServer (atroposlib) does prompt->completion conversion and does not support `tools`. + # Only pass `tools` when we're calling an OpenAI-compatible chat endpoint directly. + tool_schemas = self.tools if self.tools else None + managed_cls = type(managed).__name__ + if tool_schemas and managed_cls != "ManagedServer": + chat_kwargs["tools"] = tool_schemas + + if os.getenv("HERMES_DEBUG_ATROPOS_REQUEST") == "1": + meta = { + "managed_type": managed_cls, + "model": getattr(getattr(managed, "config", None), "model_name", self.model), + "base_url": getattr(getattr(managed, "config", None), "base_url", None), + "kwargs": chat_kwargs, + } + # Avoid dumping megabytes of data accidentally. + # (Messages can be large; this is still "full" but bounded.) + print("\n=== HERMES_DEBUG_ATROPOS_REQUEST ===", flush=True) + print(json.dumps(meta, ensure_ascii=False, indent=2)[:200_000], flush=True) + + response = await managed.chat_completion(**chat_kwargs) + + if os.getenv("HERMES_DEBUG_ATROPOS_RESPONSE") == "1": + try: + dumped = response.model_dump() # openai pydantic model + except Exception: + dumped = getattr(response, "__dict__", {"repr": repr(response)}) + print("\n=== HERMES_DEBUG_ATROPOS_RESPONSE: ChatCompletion (raw) ===", flush=True) + print(json.dumps(dumped, ensure_ascii=False, indent=2), flush=True) if hasattr(managed, "get_state"): managed_state = managed.get_state() - assistant_content = response.choices[0].message.content or "" - messages.append({"role": "assistant", "content": assistant_content}) + msg = response.choices[0].message + assistant_content = (msg.content or "") + msg_reasoning = getattr(msg, "reasoning", None) - tool_calls, parse_errors = self._parse_tool_calls(assistant_content) + # Use tool_calls if the backend provides them (preferred). + structured_tool_calls = getattr(msg, "tool_calls", None) + + # If the backend emits content="" but includes useful text in reasoning, + # use it for parsing *only if needed* (e.g. tool tags). + if assistant_content == "" and isinstance(msg_reasoning, str) and msg_reasoning: + if os.getenv("HERMES_DEBUG_ATROPOS_RESPONSE") == "1": + print("\n=== HERMES_DEBUG_ATROPOS_RESPONSE: message.reasoning present (content empty) ===", flush=True) + print(msg_reasoning, flush=True) + + assistant_msg: Dict[str, Any] = {"role": "assistant", "content": assistant_content} + if structured_tool_calls: + # Preserve tool_calls so the next request is consistent with OpenAI protocol. + try: + assistant_msg["tool_calls"] = [ + { + "id": tc.id, + "type": tc.type, + "function": {"name": tc.function.name, "arguments": tc.function.arguments}, + } + for tc in structured_tool_calls + ] + except Exception: + # Best-effort; keep conversation moving. + pass + messages.append(assistant_msg) + + # Mode A: OpenAI tool calling (preferred when supported) + if structured_tool_calls: + for tc in structured_tool_calls: + tool_start = time.time() + try: + tool_args = json.loads(tc.function.arguments or "{}") + except Exception: + tool_args = {} + tool_result = handle_function_call(tc.function.name, tool_args, effective_task_id) + tool_duration = time.time() - tool_start + + # Keep the raw tool result as tool content (OpenAI protocol expects role=tool). + messages.append( + { + "role": "tool", + "tool_call_id": tc.id, + "content": tool_result, + } + ) + + if self.tool_delay and self.tool_delay > 0: + await asyncio.sleep(self.tool_delay) + + # Continue loop after tool execution. + continue + + # Mode B: Hermes XML tool tags in assistant text (fallback). + parse_source = assistant_content or (msg_reasoning or "") + tool_calls, parse_errors = self._parse_tool_calls(parse_source) if parse_errors and not tool_calls: # Ask the model to retry with valid tool JSON. @@ -237,7 +331,7 @@ class AtroposAIAgent(AIAgent): if not tool_calls: # No tool calls: treat as final answer. - final_response = assistant_content + final_response = (assistant_content or "").strip() completed = True break diff --git a/cli.py b/cli.py index c4f436b0e8..c1d0b9f880 100755 --- a/cli.py +++ b/cli.py @@ -24,6 +24,8 @@ from typing import List, Dict, Any, Optional # Suppress startup messages for clean CLI experience os.environ["MSWEA_SILENT_STARTUP"] = "1" # mini-swe-agent os.environ["HERMES_QUIET"] = "1" # Our own modules +os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1") +os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error") import yaml @@ -451,6 +453,7 @@ class HermesCLI: max_turns: int = 20, verbose: bool = False, compact: bool = False, + interactive: bool = True, ): """ Initialize the Hermes CLI. @@ -471,9 +474,38 @@ class HermesCLI: self.verbose = verbose if verbose is not None else CLI_CONFIG["agent"].get("verbose", False) # Configuration - priority: CLI args > env vars > config file - self.model = model or os.getenv("LLM_MODEL", CLI_CONFIG["model"]["default"]) - self.base_url = base_url or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"]) - self.api_key = api_key or os.getenv("OPENROUTER_API_KEY") + self.model = ( + model + or os.getenv("LLM_MODEL") + or os.getenv("ATROPOS_SERVER_MODEL") + or CLI_CONFIG["model"]["default"] + ) + + env_openai_base_url = os.getenv("OPENAI_BASE_URL") + if env_openai_base_url: + env_openai_base_url = env_openai_base_url.rstrip("/") + if not env_openai_base_url.endswith("/v1"): + env_openai_base_url = f"{env_openai_base_url}/v1" + + env_atropos_base_url = os.getenv("ATROPOS_SERVER_BASE_URL") + if env_atropos_base_url: + env_atropos_base_url = env_atropos_base_url.rstrip("/") + if not env_atropos_base_url.endswith("/v1"): + env_atropos_base_url = f"{env_atropos_base_url}/v1" + + self.base_url = ( + base_url + or env_atropos_base_url + or env_openai_base_url + or os.getenv("OPENROUTER_BASE_URL") + or CLI_CONFIG["model"]["base_url"] + ) + self.api_key = ( + api_key + or os.getenv("ATROPOS_SERVER_API_KEY") + or os.getenv("OPENAI_API_KEY") + or os.getenv("OPENROUTER_API_KEY") + ) self.max_turns = max_turns if max_turns != 20 else CLI_CONFIG["agent"].get("max_turns", 20) self.backend = (backend or os.getenv("HERMES_BACKEND") or "openai").strip().lower() @@ -507,9 +539,11 @@ class HermesCLI: timestamp_str = self.session_start.strftime("%Y%m%d_%H%M%S") short_uuid = uuid.uuid4().hex[:6] self.session_id = f"{timestamp_str}_{short_uuid}" - - # Setup prompt_toolkit session with history - self._setup_prompt_session() + + self.interactive = interactive + if self.interactive: + # Setup prompt_toolkit session with history + self._setup_prompt_session() def _setup_prompt_session(self): """Setup prompt_toolkit session with history and styling.""" @@ -574,6 +608,9 @@ class HermesCLI: quiet_mode=True, # Suppress verbose output for clean CLI ephemeral_system_prompt=self.system_prompt if self.system_prompt else None, session_id=self.session_id, + # Do not force max_tokens/temperature by default; let the backend decide. + max_tokens=None, + temperature=None, ) else: self.agent = AIAgent( @@ -967,7 +1004,7 @@ class HermesCLI: return True - def chat(self, message: str) -> Optional[str]: + def chat(self, message: str, *, render: bool = True) -> Optional[str]: """ Send a message to the agent and get a response. @@ -984,8 +1021,9 @@ class HermesCLI: # Add user message to history self.conversation_history.append({"role": "user", "content": message}) - # Visual separator after user input - print("─" * 60, flush=True) + if render: + # Visual separator after user input + print("─" * 60, flush=True) try: # Run the conversation @@ -1000,7 +1038,7 @@ class HermesCLI: # Get the final response response = result.get("final_response", "") - if response: + if response and render: # Use simple print for compatibility with prompt_toolkit's patch_stdout print() print("╭" + "─" * 58 + "╮") @@ -1130,12 +1168,21 @@ def main( python cli.py -p "What is Python?" # Single query mode (alias) python cli.py --list-tools # List tools and exit """ - # Signal to terminal_tool that we're in interactive mode - # This enables interactive sudo password prompts with timeout - os.environ["HERMES_INTERACTIVE"] = "1" - - # Handle query shorthand - query = query or q or prompt or p + # Resolve prompt modes: + # - query/-q: single-shot, but keep the normal banner UX + # - prompt/-p: single-shot, NO TUI/banner (for wrapper scripts) + query_text = query or q + prompt_text = prompt or p + + # Signal to terminal_tool whether we can prompt the user (e.g. sudo password). + os.environ["HERMES_INTERACTIVE"] = "0" if prompt_text else "1" + if prompt_text: + # Wrapper mode should not emit spinners / interactive UX noise. + os.environ["HERMES_DISABLE_SPINNER"] = "1" + + # Optional debug dump of the full model response objects. + # - HERMES_DEBUG_ATROPOS_RESPONSE=1 dumps the Atropos backend ChatCompletion + # - HERMES_DEBUG_OPENAI_RESPONSE=1 dumps the OpenAI backend ChatCompletion # Parse toolsets - handle both string and tuple/list inputs toolsets_list = None @@ -1161,6 +1208,7 @@ def main( max_turns=max_turns, verbose=verbose, compact=compact, + interactive=not bool(prompt_text), ) # Handle list commands (don't init agent for these) @@ -1174,11 +1222,18 @@ def main( cli.show_toolsets() sys.exit(0) + # Handle wrapper-friendly prompt mode (no banner/TUI) + if prompt_text: + response = cli.chat(prompt_text, render=False) + if response is not None: + print(response) + return + # Handle single query mode - if query: + if query_text: cli.show_banner() - cli.console.print(f"[bold blue]Query:[/] {query}") - cli.chat(query) + cli.console.print(f"[bold blue]Query:[/] {query_text}") + cli.chat(query_text) return # Run interactive mode diff --git a/hermes b/hermes index f0feeb2bad..58ef5cc483 100755 --- a/hermes +++ b/hermes @@ -7,6 +7,40 @@ Usage: ./hermes [options] """ if __name__ == "__main__": + """ + Fire (google/python-fire) does not support POSIX-style short flags like `-p`. + We translate the most common shorthands to their long equivalents so wrapper + scripts can reliably use: + - `-p "..."` -> `--prompt "..."` (no TUI/banner; print result and exit) + - `-q "..."` -> `--query "..."` (single-shot with banner UX) + """ + + import sys + + def _rewrite_short_flags(argv: list[str]) -> list[str]: + rewritten: list[str] = [] + i = 0 + while i < len(argv): + arg = argv[i] + if arg == "-p": + rewritten.append("--prompt") + if i + 1 < len(argv): + rewritten.append(argv[i + 1]) + i += 2 + continue + if arg == "-q": + rewritten.append("--query") + if i + 1 < len(argv): + rewritten.append(argv[i + 1]) + i += 2 + continue + rewritten.append(arg) + i += 1 + return rewritten + + sys.argv = [sys.argv[0]] + _rewrite_short_flags(sys.argv[1:]) + from cli import main import fire + fire.Fire(main) diff --git a/local_server.py b/local_server.py index d2467a8bed..71ea093313 100644 --- a/local_server.py +++ b/local_server.py @@ -1,7 +1,7 @@ """ Local OpenAI-compatible server implementation for Hermes-Agent (Atropos integration). -Extends the Atropos APIServer to work with local OpenAI-compatible APIs (e.g. Ollama), +Extends the Atropos APIServer to work with local OpenAI-compatible APIs (e.g. vLLM, SGLang), providing tokens_and_logprobs_completion support via client-side tokenization. """ @@ -104,27 +104,35 @@ class LocalServer(APIServer): Create a LocalServer from environment variables (or explicit overrides). Env vars (checked in order): - - base URL: LOCAL_LLM_BASE_URL, LLM_BASE_URL, OLLAMA_BASE_URL - - model: LOCAL_LLM_MODEL, LLM_MODEL, OLLAMA_MODEL + - base URL: ATROPOS_SERVER_BASE_URL, OPENAI_BASE_URL, LOCAL_LLM_BASE_URL, LLM_BASE_URL + - model: ATROPOS_SERVER_MODEL, LLM_MODEL, LOCAL_LLM_MODEL + - api key: ATROPOS_SERVER_API_KEY, OPENAI_API_KEY, LOCAL_LLM_API_KEY, LLM_API_KEY """ from dotenv import load_dotenv load_dotenv() base_url = ( base_url + or os.getenv("ATROPOS_SERVER_BASE_URL") + or os.getenv("OPENAI_BASE_URL") or os.getenv("LOCAL_LLM_BASE_URL") or os.getenv("LLM_BASE_URL") - or os.getenv("OLLAMA_BASE_URL") or "http://localhost:11434" ) model = ( model - or os.getenv("LOCAL_LLM_MODEL") + or os.getenv("ATROPOS_SERVER_MODEL") or os.getenv("LLM_MODEL") - or os.getenv("OLLAMA_MODEL") + or os.getenv("LOCAL_LLM_MODEL") or "hermes3:8b" ) - api_key = api_key or os.getenv("LOCAL_LLM_API_KEY") or os.getenv("LLM_API_KEY") or os.getenv("OLLAMA_API_KEY") + api_key = ( + api_key + or os.getenv("ATROPOS_SERVER_API_KEY") + or os.getenv("OPENAI_API_KEY") + or os.getenv("LOCAL_LLM_API_KEY") + or os.getenv("LLM_API_KEY") + ) config = APIServerConfig( model_name=model, @@ -173,7 +181,7 @@ class LocalServer(APIServer): n = kwargs.get("n", 1) - # Ollama doesn't support n > 1, so we make multiple requests + # Some OpenAI-compatible servers don't support n > 1, so we make multiple requests. if n > 1: completion_list = await asyncio.gather( *[self.openai.chat.completions.create(**{**kwargs, "n": 1}) for _ in range(n)] @@ -197,7 +205,7 @@ class LocalServer(APIServer): n = kwargs.get("n", 1) - # Ollama doesn't support n > 1 + # Some OpenAI-compatible servers don't support n > 1. if n > 1: completion_list = await asyncio.gather( *[self.openai.completions.create(**{**kwargs, "n": 1}) for _ in range(n)] @@ -283,7 +291,7 @@ class LocalServer(APIServer): # Tokenize output output_tokens = self.tokenizer.encode(text, add_special_tokens=False) - # Placeholder logprobs (Ollama doesn't provide per-token logprobs easily) + # Placeholder logprobs (many local servers don't provide per-token logprobs). # In production, use vLLM/SGLang which return real logprobs output_logprobs = [0.0] * len(output_tokens) diff --git a/run_agent.py b/run_agent.py index 0aba84a5f9..fb594d8caa 100644 --- a/run_agent.py +++ b/run_agent.py @@ -910,6 +910,16 @@ class AIAgent: if active_system_prompt: # Insert system message at the beginning api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages + + if os.getenv("HERMES_DEBUG_OPENAI_REQUEST") == "1": + meta = { + "model": self.model, + "base_url": self.base_url, + "messages": api_messages, + "tools": self.tools if self.tools else None, + } + print("\n=== HERMES_DEBUG_OPENAI_REQUEST ===", flush=True) + print(json.dumps(meta, ensure_ascii=False, indent=2)[:200_000], flush=True) # Calculate approximate request size for logging total_chars = sum(len(str(msg)) for msg in api_messages) @@ -923,12 +933,13 @@ class AIAgent: print(f"{self.log_prefix} 📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)") print(f"{self.log_prefix} 🔧 Available tools: {len(self.tools) if self.tools else 0}") else: - # Animated thinking spinner in quiet mode - face = random.choice(KawaiiSpinner.KAWAII_THINKING) - verb = random.choice(KawaiiSpinner.THINKING_VERBS) - spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star']) - thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type) - thinking_spinner.start() + # Animated thinking spinner in quiet mode (disable for wrappers/non-TTY usage) + if os.getenv("HERMES_DISABLE_SPINNER") != "1": + face = random.choice(KawaiiSpinner.KAWAII_THINKING) + verb = random.choice(KawaiiSpinner.THINKING_VERBS) + spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star']) + thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type) + thinking_spinner.start() # Log request details if verbose if self.verbose_logging: @@ -979,6 +990,14 @@ class AIAgent: api_kwargs["extra_body"] = extra_body response = self.client.chat.completions.create(**api_kwargs) + + if os.getenv("HERMES_DEBUG_OPENAI_RESPONSE") == "1": + try: + dumped = response.model_dump() + except Exception: + dumped = getattr(response, "__dict__", {"repr": repr(response)}) + print("\n=== HERMES_DEBUG_OPENAI_RESPONSE: ChatCompletion (raw) ===", flush=True) + print(json.dumps(dumped, ensure_ascii=False, indent=2), flush=True) api_duration = time.time() - api_start_time @@ -1294,7 +1313,7 @@ class AIAgent: tool_start_time = time.time() # Execute the tool - with animated spinner in quiet mode - if self.quiet_mode: + if self.quiet_mode and os.getenv("HERMES_DISABLE_SPINNER") != "1": # Tool-specific spinner animations tool_spinners = { 'web_search': ('arrows', ['🔍', '🌐', '📡', '🔎']), @@ -1324,6 +1343,9 @@ class AIAgent: tool_duration = time.time() - tool_start_time cute_msg = self._get_cute_tool_message(function_name, function_args, tool_duration) spinner.stop(cute_msg) + elif self.quiet_mode: + function_result = handle_function_call(function_name, function_args, effective_task_id) + tool_duration = time.time() - tool_start_time else: function_result = handle_function_call(function_name, function_args, effective_task_id) tool_duration = time.time() - tool_start_time