#!/usr/bin/env python3 """ Atropos-compatible Hermes agent runner. This is a minimal subclass of Hermes-Agent's `AIAgent` that swaps the OpenAI function-calling backend for Atroposlib's `ManagedServer`/`ServerManager` backend and uses Hermes-style XML tool tags: - {"name": "...", "arguments": {...}} - {...} Tool observations are appended as `role="user"` messages containing one or more `` blocks so they survive common chat templates during tokenization. """ from __future__ import annotations import asyncio import json import re import time import warnings import os from contextlib import asynccontextmanager from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple from model_tools import cleanup_vm, handle_function_call from run_agent import AIAgent _TOOL_CALL_RE = re.compile(r"\\s*(.*?)\\s*", re.DOTALL) ATROPOS_TOOL_SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. ## Available Tools {tool_descriptions} ## How to Use Tools To call a tool, output: {{"name": "tool_name", "arguments": {{"arg1": "value1"}}}} You may include optional reasoning in ... before tool calls. After each tool call, you will receive tool results as: {{...}} Continue until finished, then provide a final response with no blocks. """ class AtroposAIAgent(AIAgent): """ Hermes `AIAgent` variant that uses Atroposlib ServerManager/ManagedServer. Notes: - The default Hermes `AIAgent` remains unchanged; this class is opt-in. - The underlying server must expose `managed_server(tokenizer=...)` OR be a single APIServer-compatible object usable by Atroposlib's `ManagedServer`. """ def __init__( self, *, server: Any, tokenizer: Any = None, model: str = "local", max_iterations: int = 10, tool_delay: float = 0.0, enabled_toolsets: Optional[List[str]] = None, disabled_toolsets: Optional[List[str]] = None, save_trajectories: bool = False, verbose_logging: bool = False, quiet_mode: bool = False, ephemeral_system_prompt: Optional[str] = None, log_prefix_chars: int = 100, log_prefix: str = "", session_id: Optional[str] = None, temperature: Optional[float] = None, max_tokens: Optional[int] = None, ): # Call parent init mainly to reuse tool selection + trajectory saving utilities. super().__init__( base_url="http://unused", api_key="dummy-key", model=model, max_iterations=max_iterations, tool_delay=tool_delay, enabled_toolsets=enabled_toolsets, disabled_toolsets=disabled_toolsets, save_trajectories=save_trajectories, verbose_logging=verbose_logging, quiet_mode=quiet_mode, ephemeral_system_prompt=ephemeral_system_prompt, log_prefix_chars=log_prefix_chars, log_prefix=log_prefix, session_id=session_id, ) self.server = server self.tokenizer = tokenizer self.temperature = temperature self.max_tokens = max_tokens @asynccontextmanager async def _managed(self) -> AsyncGenerator[Any, None]: if hasattr(self.server, "managed_server"): with warnings.catch_warnings(): warnings.filterwarnings( "ignore", message=r"Using OpenAIServer with managed_server does not allow for state tracking", category=UserWarning, ) async with self.server.managed_server(tokenizer=self.tokenizer) as managed: yield managed return # Fall back to directly wrapping a single server object. from atroposlib.envs.server_handling.managed_server import ManagedServer managed = ManagedServer(server=self.server, tokenizer=self.tokenizer) try: yield managed finally: managed.reset() def _tool_descriptions_text(self) -> str: if not self.tools: return "(no tools available)" parts: List[str] = [] for tool in self.tools: fn = (tool or {}).get("function", {}) name = fn.get("name", "") desc = (fn.get("description") or "").strip() if not name: continue if desc: parts.append(f"- {name}: {desc}") else: parts.append(f"- {name}") return "\n".join(parts) if parts else "(no tools available)" def _build_system_prompt(self, system_message: Optional[str]) -> Optional[str]: tool_prompt = ATROPOS_TOOL_SYSTEM_PROMPT.format( tool_descriptions=self._tool_descriptions_text() ) parts: List[str] = [] if system_message: parts.append(system_message) if self.ephemeral_system_prompt: parts.append(self.ephemeral_system_prompt) parts.append(tool_prompt) return "\n\n".join(parts) def _parse_tool_calls(self, content: str) -> Tuple[List[Tuple[str, Dict[str, Any]]], List[str]]: """ Returns: (calls, errors) """ calls: List[Tuple[str, Dict[str, Any]]] = [] errors: List[str] = [] for raw in _TOOL_CALL_RE.findall(content or ""): try: payload = json.loads(raw) except json.JSONDecodeError as exc: errors.append(f"Invalid JSON inside : {exc}") continue name = payload.get("name") args = payload.get("arguments", {}) if not isinstance(name, str) or not name: errors.append("Tool call missing 'name' string") continue if not isinstance(args, dict): errors.append("Tool call 'arguments' must be an object") continue calls.append((name, args)) return calls, errors async def run_conversation_async( self, user_message: str, system_message: Optional[str] = None, conversation_history: Optional[List[Dict[str, Any]]] = None, task_id: Optional[str] = None, ) -> Dict[str, Any]: import uuid effective_task_id = task_id or str(uuid.uuid4()) messages: List[Dict[str, Any]] = conversation_history.copy() if conversation_history else [] messages.append({"role": "user", "content": user_message}) active_system_prompt = self._build_system_prompt(system_message) api_call_count = 0 final_response: Optional[str] = None managed_state: Optional[Dict[str, Any]] = None completed = False try: async with self._managed() as managed: while api_call_count < self.max_iterations: api_call_count += 1 api_messages = messages.copy() if active_system_prompt: api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages chat_kwargs: Dict[str, Any] = {"messages": api_messages, "n": 1} if self.max_tokens is not None: chat_kwargs["max_tokens"] = self.max_tokens if self.temperature is not None: chat_kwargs["temperature"] = self.temperature # Prefer OpenAI tool calling when supported by the backend: # - Many providers normalize Hermes-style tags into tool_calls when `tools` is provided. # - ManagedServer (atroposlib) does prompt->completion conversion and does not support `tools`. # Only pass `tools` when we're calling an OpenAI-compatible chat endpoint directly. tool_schemas = self.tools if self.tools else None managed_cls = type(managed).__name__ if tool_schemas and managed_cls != "ManagedServer": chat_kwargs["tools"] = tool_schemas if os.getenv("HERMES_DEBUG_ATROPOS_REQUEST") == "1": meta = { "managed_type": managed_cls, "model": getattr(getattr(managed, "config", None), "model_name", self.model), "base_url": getattr(getattr(managed, "config", None), "base_url", None), "kwargs": chat_kwargs, } # Avoid dumping megabytes of data accidentally. # (Messages can be large; this is still "full" but bounded.) print("\n=== HERMES_DEBUG_ATROPOS_REQUEST ===", flush=True) print(json.dumps(meta, ensure_ascii=False, indent=2)[:200_000], flush=True) response = await managed.chat_completion(**chat_kwargs) if os.getenv("HERMES_DEBUG_ATROPOS_RESPONSE") == "1": try: dumped = response.model_dump() # openai pydantic model except Exception: dumped = getattr(response, "__dict__", {"repr": repr(response)}) print("\n=== HERMES_DEBUG_ATROPOS_RESPONSE: ChatCompletion (raw) ===", flush=True) print(json.dumps(dumped, ensure_ascii=False, indent=2), flush=True) if hasattr(managed, "get_state"): managed_state = managed.get_state() msg = response.choices[0].message assistant_content = (msg.content or "") msg_reasoning = getattr(msg, "reasoning", None) # Use tool_calls if the backend provides them (preferred). structured_tool_calls = getattr(msg, "tool_calls", None) # If the backend emits content="" but includes useful text in reasoning, # use it for parsing *only if needed* (e.g. tool tags). if assistant_content == "" and isinstance(msg_reasoning, str) and msg_reasoning: if os.getenv("HERMES_DEBUG_ATROPOS_RESPONSE") == "1": print("\n=== HERMES_DEBUG_ATROPOS_RESPONSE: message.reasoning present (content empty) ===", flush=True) print(msg_reasoning, flush=True) assistant_msg: Dict[str, Any] = {"role": "assistant", "content": assistant_content} if structured_tool_calls: # Preserve tool_calls so the next request is consistent with OpenAI protocol. try: assistant_msg["tool_calls"] = [ { "id": tc.id, "type": tc.type, "function": {"name": tc.function.name, "arguments": tc.function.arguments}, } for tc in structured_tool_calls ] except Exception: # Best-effort; keep conversation moving. pass messages.append(assistant_msg) # Mode A: OpenAI tool calling (preferred when supported) if structured_tool_calls: for tc in structured_tool_calls: tool_start = time.time() try: tool_args = json.loads(tc.function.arguments or "{}") except Exception: tool_args = {} tool_result = handle_function_call(tc.function.name, tool_args, effective_task_id) tool_duration = time.time() - tool_start # Keep the raw tool result as tool content (OpenAI protocol expects role=tool). messages.append( { "role": "tool", "tool_call_id": tc.id, "content": tool_result, } ) if self.tool_delay and self.tool_delay > 0: await asyncio.sleep(self.tool_delay) # Continue loop after tool execution. continue # Mode B: Hermes XML tool tags in assistant text (fallback). parse_source = assistant_content or (msg_reasoning or "") tool_calls, parse_errors = self._parse_tool_calls(parse_source) if parse_errors and not tool_calls: # Ask the model to retry with valid tool JSON. err_text = "; ".join(parse_errors[:3]) messages.append( { "role": "user", "content": ( f"{json.dumps({'error': err_text}, ensure_ascii=False)}\n" "The previous blocks were invalid. Please output valid JSON inside ." ), } ) continue if not tool_calls: # No tool calls: treat as final answer. final_response = (assistant_content or "").strip() completed = True break tool_responses: List[str] = [] for tool_name, tool_args in tool_calls: tool_start = time.time() tool_result = handle_function_call(tool_name, tool_args, effective_task_id) tool_duration = time.time() - tool_start try: parsed = json.loads(tool_result) payload: Any = parsed except Exception: payload = tool_result tool_payload = { "name": tool_name, "duration_s": round(tool_duration, 3), "result": payload, } tool_responses.append( f"{json.dumps(tool_payload, ensure_ascii=False)}" ) if self.tool_delay and self.tool_delay > 0: await asyncio.sleep(self.tool_delay) messages.append({"role": "user", "content": "\n".join(tool_responses)}) if final_response is None: final_response = "I've reached the maximum number of iterations." finally: try: cleanup_vm(effective_task_id) except Exception: pass # Save trajectory using Hermes formatting (optional). self._save_trajectory(messages, user_message, completed=completed) return { "final_response": final_response, "messages": messages, "api_calls": api_call_count, "completed": completed, "managed_state": managed_state, "system_prompt": active_system_prompt, "task_id": effective_task_id, } def run_conversation(self, *args: Any, **kwargs: Any) -> Dict[str, Any]: """ Sync wrapper for convenience. If called from within a running event loop (e.g. prompt_toolkit), this runs the async conversation in a dedicated thread to avoid nested loops. """ try: asyncio.get_running_loop() except RuntimeError: return asyncio.run(self.run_conversation_async(*args, **kwargs)) import queue import threading out: "queue.Queue[object]" = queue.Queue(maxsize=1) def runner() -> None: try: out.put(asyncio.run(self.run_conversation_async(*args, **kwargs))) except BaseException as exc: # noqa: BLE001 out.put(exc) thread = threading.Thread(target=runner, daemon=True) thread.start() result = out.get() if isinstance(result, BaseException): raise result return result # type: ignore[return-value]