added atropos as dependency, and extra flag, adding atropos as optional backend to agent

This commit is contained in:
Shannon Sands 2026-02-02 11:56:08 +10:00
parent e38c274f8d
commit 68fb0efe0e
11 changed files with 4990 additions and 19 deletions

View file

@ -70,9 +70,11 @@ class AtroposAIAgent(AIAgent):
disabled_toolsets: Optional[List[str]] = None,
save_trajectories: bool = False,
verbose_logging: bool = False,
quiet_mode: bool = False,
ephemeral_system_prompt: Optional[str] = None,
log_prefix_chars: int = 100,
log_prefix: str = "",
session_id: Optional[str] = None,
temperature: float = 0.7,
max_tokens: int = 4096,
):
@ -87,9 +89,11 @@ class AtroposAIAgent(AIAgent):
disabled_toolsets=disabled_toolsets,
save_trajectories=save_trajectories,
verbose_logging=verbose_logging,
quiet_mode=quiet_mode,
ephemeral_system_prompt=ephemeral_system_prompt,
log_prefix_chars=log_prefix_chars,
log_prefix=log_prefix,
session_id=session_id,
)
self.server = server
@ -131,14 +135,19 @@ class AtroposAIAgent(AIAgent):
return "\n".join(parts) if parts else "(no tools available)"
def _build_system_prompt(self, system_message: Optional[str]) -> Optional[str]:
if system_message is not None:
return system_message
if self.ephemeral_system_prompt:
return self.ephemeral_system_prompt
return ATROPOS_TOOL_SYSTEM_PROMPT.format(
tool_prompt = ATROPOS_TOOL_SYSTEM_PROMPT.format(
tool_descriptions=self._tool_descriptions_text()
)
parts: List[str] = []
if system_message:
parts.append(system_message)
if self.ephemeral_system_prompt:
parts.append(self.ephemeral_system_prompt)
parts.append(tool_prompt)
return "\n\n".join(parts)
def _parse_tool_calls(self, content: str) -> Tuple[List[Tuple[str, Dict[str, Any]]], List[str]]:
"""
Returns:
@ -284,10 +293,29 @@ class AtroposAIAgent(AIAgent):
"""
Sync wrapper for convenience.
If already inside an event loop, call `await run_conversation_async(...)` instead.
If called from within a running event loop (e.g. prompt_toolkit), this
runs the async conversation in a dedicated thread to avoid nested loops.
"""
try:
asyncio.get_running_loop()
except RuntimeError:
return asyncio.run(self.run_conversation_async(*args, **kwargs))
raise RuntimeError("AtroposAIAgent.run_conversation() cannot be called from a running event loop; use await run_conversation_async().")
import queue
import threading
out: "queue.Queue[object]" = queue.Queue(maxsize=1)
def runner() -> None:
try:
out.put(asyncio.run(self.run_conversation_async(*args, **kwargs)))
except BaseException as exc: # noqa: BLE001
out.put(exc)
thread = threading.Thread(target=runner, daemon=True)
thread.start()
result = out.get()
if isinstance(result, BaseException):
raise result
return result # type: ignore[return-value]

77
cli.py
View file

@ -447,6 +447,7 @@ class HermesCLI:
toolsets: List[str] = None,
api_key: str = None,
base_url: str = None,
backend: str = None,
max_turns: int = 20,
verbose: bool = False,
compact: bool = False,
@ -459,6 +460,7 @@ class HermesCLI:
toolsets: List of toolsets to enable (default: all)
api_key: API key (default: from environment)
base_url: API base URL (default: OpenRouter)
backend: Agent backend ("openai" or "atropos")
max_turns: Maximum conversation turns
verbose: Enable verbose logging
compact: Use compact display mode
@ -473,6 +475,13 @@ class HermesCLI:
self.base_url = base_url or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
self.api_key = api_key or os.getenv("OPENROUTER_API_KEY")
self.max_turns = max_turns if max_turns != 20 else CLI_CONFIG["agent"].get("max_turns", 20)
self.backend = (backend or os.getenv("HERMES_BACKEND") or "openai").strip().lower()
if self.backend not in {"openai", "atropos"}:
self.console.print(
f"[bold yellow]Warning:[/] unknown backend '{self.backend}', falling back to 'openai'"
)
self.backend = "openai"
# Parse and validate toolsets
self.enabled_toolsets = toolsets
@ -531,17 +540,53 @@ class HermesCLI:
return True
try:
self.agent = AIAgent(
model=self.model,
api_key=self.api_key,
base_url=self.base_url,
max_iterations=self.max_turns,
enabled_toolsets=self.enabled_toolsets,
verbose_logging=self.verbose,
quiet_mode=True, # Suppress verbose output for clean CLI
ephemeral_system_prompt=self.system_prompt if self.system_prompt else None,
session_id=self.session_id, # Pass CLI's session ID to agent
)
if self.backend == "atropos":
try:
from atroposlib.envs.server_handling.server_baseline import APIServerConfig
from atroposlib.envs.server_handling.server_manager import ServerManager
except ModuleNotFoundError as exc:
raise RuntimeError(
"Atropos backend requires `atroposlib`. Install Hermes-Agent with the extra "
"`.[atropos]` (e.g. `pip install -e '.[atropos]'` or `uv sync --extra atropos`)."
) from exc
from atropos_compatible_agent import AtroposAIAgent
server_cfg = APIServerConfig(
server_type="openai",
model_name=self.model,
base_url=self.base_url,
api_key=self.api_key or "",
timeout=120,
num_max_requests_at_once=1,
num_requests_for_eval=1,
health_check=False,
)
server = ServerManager([server_cfg], slurm=False, testing=False)
self.agent = AtroposAIAgent(
server=server,
tokenizer=None,
model=self.model,
max_iterations=self.max_turns,
enabled_toolsets=self.enabled_toolsets,
verbose_logging=self.verbose,
quiet_mode=True, # Suppress verbose output for clean CLI
ephemeral_system_prompt=self.system_prompt if self.system_prompt else None,
session_id=self.session_id,
)
else:
self.agent = AIAgent(
model=self.model,
api_key=self.api_key,
base_url=self.base_url,
max_iterations=self.max_turns,
enabled_toolsets=self.enabled_toolsets,
verbose_logging=self.verbose,
quiet_mode=True, # Suppress verbose output for clean CLI
ephemeral_system_prompt=self.system_prompt if self.system_prompt else None,
session_id=self.session_id, # Pass CLI's session ID to agent
)
return True
except Exception as e:
self.console.print(f"[bold red]Failed to initialize agent: {e}[/]")
@ -1046,10 +1091,13 @@ class HermesCLI:
def main(
query: str = None,
q: str = None,
prompt: str = None,
p: str = None,
toolsets: str = None,
model: str = None,
api_key: str = None,
base_url: str = None,
backend: str = None,
max_turns: int = 20,
verbose: bool = False,
compact: bool = False,
@ -1062,10 +1110,13 @@ def main(
Args:
query: Single query to execute (then exit). Alias: -q
q: Shorthand for --query
prompt: Alias for query (bypass TUI). Shorthand: -p
p: Shorthand for --prompt
toolsets: Comma-separated list of toolsets to enable (e.g., "web,terminal")
model: Model to use (default: anthropic/claude-opus-4-20250514)
api_key: API key for authentication
base_url: Base URL for the API
backend: Agent backend ("openai" default, or "atropos")
max_turns: Maximum conversation turns (default: 20)
verbose: Enable verbose logging
compact: Use compact display mode
@ -1076,6 +1127,7 @@ def main(
python cli.py # Start interactive mode
python cli.py --toolsets web,terminal # Use specific toolsets
python cli.py -q "What is Python?" # Single query mode
python cli.py -p "What is Python?" # Single query mode (alias)
python cli.py --list-tools # List tools and exit
"""
# Signal to terminal_tool that we're in interactive mode
@ -1083,7 +1135,7 @@ def main(
os.environ["HERMES_INTERACTIVE"] = "1"
# Handle query shorthand
query = query or q
query = query or q or prompt or p
# Parse toolsets - handle both string and tuple/list inputs
toolsets_list = None
@ -1105,6 +1157,7 @@ def main(
toolsets=toolsets_list,
api_key=api_key,
base_url=base_url,
backend=backend,
max_turns=max_turns,
verbose=verbose,
compact=compact,

View file

@ -0,0 +1,645 @@
Metadata-Version: 2.4
Name: hermes-agent
Version: 0.1.0
Summary: AI agent with advanced tool-calling and toolsets
Author: Nous Research
License: MIT
Requires-Python: >=3.10
Description-Content-Type: text/markdown
Requires-Dist: openai
Requires-Dist: python-dotenv
Requires-Dist: fire
Requires-Dist: httpx
Requires-Dist: rich
Requires-Dist: tenacity
Requires-Dist: pyyaml
Requires-Dist: prompt_toolkit
Requires-Dist: requests
Requires-Dist: jinja2
Requires-Dist: pydantic>=2.0
Requires-Dist: firecrawl-py
Requires-Dist: fal-client
Requires-Dist: litellm>=1.75.5
Requires-Dist: typer
Requires-Dist: platformdirs
Provides-Extra: modal
Requires-Dist: modal; extra == "modal"
Requires-Dist: boto3; extra == "modal"
Provides-Extra: dev
Requires-Dist: pytest; extra == "dev"
Requires-Dist: pytest-asyncio; extra == "dev"
Provides-Extra: atropos
Requires-Dist: atroposlib @ git+ssh://git@github.com/NousResearch/atropos.git ; extra == "atropos"
# Hermes Agent
An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.
## Features
- **Interactive CLI**: Beautiful terminal interface with animated feedback, personalities, and session management
- **Web Tools**: Search, extract content, and crawl websites
- **Terminal Tools**: Execute commands via local, Docker, Singularity, Modal, or SSH backends
- **Browser Tools**: Automate web browsers to navigate, click, type, and extract content
- **Vision Tools**: Analyze images from URLs
- **Reasoning Tools**: Advanced multi-model reasoning (Mixture of Agents)
- **Creative Tools**: Generate images from text prompts
- **Skills Tools**: On-demand knowledge documents with progressive disclosure
- **Toolsets System**: Organize tools into logical groups for different scenarios
- **Batch Processing**: Process datasets in parallel with checkpointing and statistics tracking
- **Ephemeral System Prompts**: Guide model behavior without polluting training datasets
## Quick Start (CLI)
```bash
# After setup (see below), just run:
./hermes
# Or with options:
./hermes --model "anthropic/claude-sonnet-4" --toolsets "web,terminal"
```
The CLI provides:
- Animated spinners during thinking and tool execution
- Kawaii-style feedback messages
- `/commands` for configuration, history, and session management
- Customizable personalities (`/personality kawaii`, `/personality pirate`, etc.)
- Persistent configuration via `cli-config.yaml`
## Setup
### 1. Clone the Repository
```bash
# Clone with submodules (recommended)
git clone --recurse-submodules https://github.com/NousResearch/Hermes-Agent.git
cd Hermes-Agent
# Or if already cloned without submodules:
git submodule update --init --recursive
```
### 2. Install Dependencies
```bash
# Create and activate virtual environment (recommended)
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
# Install Python packages
pip install -r requirements.txt
# Install mini-swe-agent for terminal tools
pip install -e ./mini-swe-agent
# Install Node.js dependencies for browser tools (requires Node.js)
npm install
```
### 3. Configure Environment Variables
```bash
# Copy the example environment file
cp .env.example .env
# Edit .env and add your API keys
nano .env # or use your preferred editor
```
**Required API Keys:**
- `OPENROUTER_API_KEY` - LLM access via OpenRouter (get at: https://openrouter.ai/keys)
- `FIRECRAWL_API_KEY` - Web tools (get at: https://firecrawl.dev/)
- `NOUS_API_KEY` - Vision & reasoning tools (get at: https://inference-api.nousresearch.com/)
- `FAL_KEY` - Image generation (get at: https://fal.ai/)
**Optional API Keys (for specific features):**
- `BROWSERBASE_API_KEY` - Browser automation (get at: https://browserbase.com/)
- `BROWSERBASE_PROJECT_ID` - From Browserbase dashboard
- `MORPH_API_KEY` - For legacy Hecate terminal backend (get at: https://morph.so/)
### 4. Configure Terminal Backend
The terminal tool uses **mini-swe-agent** environments. Configure in `.env` or `cli-config.yaml`:
```bash
# Backend: "local", "docker", "singularity", "modal", or "ssh"
TERMINAL_ENV=local # Default: runs on host machine (no isolation)
TERMINAL_ENV=ssh # Remote execution via SSH (agent code stays local)
TERMINAL_ENV=singularity # Recommended for HPC: Apptainer/Singularity containers
TERMINAL_ENV=docker # Isolated Docker containers
TERMINAL_ENV=modal # Cloud execution via Modal
# Container image (for docker/singularity/modal backends)
TERMINAL_DOCKER_IMAGE=python:3.11-slim
TERMINAL_SINGULARITY_IMAGE=docker://python:3.11-slim
TERMINAL_TIMEOUT=60
# SSH backend (for ssh)
TERMINAL_SSH_HOST=my-server.example.com
TERMINAL_SSH_USER=myuser
TERMINAL_SSH_KEY=~/.ssh/id_rsa # Optional, uses ssh-agent if not set
```
**Backend Requirements:**
- **local**: No extra setup (runs directly on your machine, no isolation)
- **ssh**: SSH access to remote machine (great for sandboxing - agent can't touch its own code)
- **singularity**: Requires Apptainer or Singularity installed (common on HPC clusters, no root needed)
- **docker**: Requires Docker installed and user in `docker` group
- **modal**: Requires Modal account (see setup below)
### Singularity/Apptainer Setup (Recommended for HPC)
Singularity/Apptainer provides rootless container execution, ideal for HPC clusters:
```bash
# 1. Verify Apptainer is installed
apptainer --version # or: singularity --version
# 2. Set up cache directories (important for parallel workers)
# Use /scratch if available (HPC), otherwise /tmp
export APPTAINER_CACHEDIR=/scratch/$USER/.apptainer
export APPTAINER_TMPDIR=/scratch/$USER/.apptainer/tmp
mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
# 3. Pre-build SIF image (recommended for parallel batch processing)
# This avoids race conditions when multiple workers start simultaneously
apptainer build $APPTAINER_CACHEDIR/python-nodejs.sif docker://nikolaik/python-nodejs:python3.11-nodejs20
# 4. Configure .env to use the local SIF
TERMINAL_ENV=singularity
TERMINAL_SINGULARITY_IMAGE=/scratch/$USER/.apptainer/python-nodejs.sif
```
**Tip:** The batch scripts in `configs/` automatically handle SIF pre-building if `/scratch` is available.
### Modal Cloud Backend Setup
[Modal](https://modal.com) provides serverless cloud compute for running sandboxed environments at scale.
```bash
# 1. Install Modal and dependencies
pip install modal boto3
# 2. Authenticate with Modal (opens browser)
modal setup
# 3. Set terminal backend to modal in .env
TERMINAL_ENV=modal
```
Modal uses CLI-based authentication (stored in `~/.modal/`), so no API key is needed in `.env`. After running `modal setup`, commands will automatically execute in Modal's cloud sandboxes.
### Browser Tools Setup
Browser tools enable the agent to navigate websites, fill forms, click buttons, and extract content. They use [agent-browser](https://github.com/vercel-labs/agent-browser) CLI with [Browserbase](https://browserbase.com) cloud execution.
```bash
# 1. Install Node.js (if not already installed)
# Use nvm (recommended) or your package manager
# 2. Install agent-browser CLI (choose one option):
npm install -g agent-browser # Option A: Global install (recommended)
npm install # Option B: Local install (uses npx fallback)
# 3. Get Browserbase credentials
# Sign up at https://browserbase.com/ and get your:
# - API Key (from Settings → API Keys)
# - Project ID (from your project dashboard)
# 4. Add to your .env file:
BROWSERBASE_API_KEY=your_api_key_here
BROWSERBASE_PROJECT_ID=your_project_id_here
```
**Available Browser Tools:**
| Tool | Description |
|------|-------------|
| `browser_navigate` | Navigate to a URL |
| `browser_snapshot` | Get text-based page snapshot with element refs |
| `browser_click` | Click an element by ref (e.g., `@e5`) |
| `browser_type` | Type text into an input field |
| `browser_scroll` | Scroll up or down |
| `browser_back` | Go back in browser history |
| `browser_press` | Press a keyboard key (Enter, Tab, etc.) |
| `browser_close` | Close the browser session |
| `browser_get_images` | Get list of images on the page |
**Example Usage:**
```bash
# Use browser tools with web search and vision
python run_agent.py \
--query "Go to amazon.com and find the price of the latest Kindle" \
--enabled_toolsets=browser,web,vision
# Use browser-focused distribution
python batch_runner.py \
--dataset_file=browser_tasks.jsonl \
--distribution=browser_use \
--run_name=browser_run
```
See `.env.example` for all available configuration options including debug settings.
### Skills Tools
Skills are on-demand knowledge documents the agent can load when needed. They follow a **progressive disclosure** pattern to minimize token usage:
```
skills/
├── mlops/ # Category folder
│ ├── axolotl/ # Skill folder
│ │ ├── SKILL.md # Main instructions (required)
│ │ ├── references/ # Additional docs, API specs
│ │ └── templates/ # Output formats, configs
│ └── vllm/
│ └── SKILL.md
```
**Available Skills Tools:**
| Tool | Description |
|------|-------------|
| `skills_categories` | List available skill categories (~50 tokens) |
| `skills_list` | List skills with name + description (~3k tokens for 40 skills) |
| `skill_view` | Load full skill content, tags, and linked files |
**Example Usage:**
```bash
# Use skills tools
python run_agent.py \
--query "What skills do you have for fine-tuning? Show me the axolotl skill." \
--enabled_toolsets=skills
```
**Creating Skills:**
Skills use YAML frontmatter for metadata:
```yaml
---
name: my-skill
description: Brief description shown in skills_list
tags: [tag1, tag2]
related_skills: [other-skill]
version: 1.0.0
---
# Skill Content
Instructions, examples, and guidelines here...
```
Skills can include:
- `references/` - Additional documentation, API specs, examples
- `templates/` - Output formats, config files, boilerplate code
- `scripts/` - Executable helpers (Python, shell scripts)
## Session Logging
Every conversation is automatically logged to `logs/` for debugging and inspection:
```
logs/
├── session_20260201_143052_a1b2c3.json
├── session_20260201_150217_d4e5f6.json
└── ...
```
**Log Format:**
```json
{
"session_id": "20260201_143052_a1b2c3",
"model": "anthropic/claude-sonnet-4",
"session_start": "2026-02-01T14:30:52.123456",
"last_updated": "2026-02-01T14:35:12.789012",
"message_count": 8,
"conversations": [
{"from": "system", "value": "..."},
{"from": "human", "value": "..."},
{"from": "gpt", "value": "..."},
{"from": "tool", "value": "..."}
]
}
```
- **Automatic**: Logs are created and updated automatically after each conversation turn
- **Session ID in Banner**: The CLI displays the session ID in the welcome banner
- **Trajectory Format**: Uses the same format as batch processing for consistency
- **Git Ignored**: `logs/` is in `.gitignore` so logs aren't committed
## Interactive CLI
The CLI provides a rich interactive experience for working with the agent.
### Running the CLI
```bash
# Basic usage
./hermes
# With specific model
./hermes --model "anthropic/claude-sonnet-4"
# With specific toolsets
./hermes --toolsets "web,terminal,skills"
```
### CLI Commands
| Command | Description |
|---------|-------------|
| `/help` | Show available commands |
| `/tools` | List available tools by toolset |
| `/toolsets` | List available toolsets |
| `/model [name]` | Show or change the current model |
| `/prompt [text]` | View/set custom system prompt |
| `/personality [name]` | Set a predefined personality |
| `/clear` | Clear screen and reset conversation |
| `/reset` | Reset conversation only |
| `/history` | Show conversation history |
| `/save` | Save current conversation to file |
| `/config` | Show current configuration |
| `/quit` | Exit the CLI |
### Configuration
Copy `cli-config.yaml.example` to `cli-config.yaml` and customize:
```yaml
# Model settings
model:
default: "anthropic/claude-sonnet-4"
# Terminal backend (local, docker, singularity, modal, or ssh)
terminal:
env_type: "local"
cwd: "." # Use current directory
# Or use SSH for remote execution (keeps agent code isolated)
# terminal:
# env_type: "ssh"
# ssh_host: "my-server.example.com"
# ssh_user: "myuser"
# ssh_key: "~/.ssh/id_rsa"
# cwd: "/home/myuser/project"
# Enable specific toolsets
toolsets:
- all # or: web, terminal, browser, vision, etc.
# Custom personalities (use with /personality command)
agent:
personalities:
helpful: "You are a helpful assistant."
kawaii: "You are a kawaii assistant! Use cute expressions..."
```
### Personalities
Built-in personalities available via `/personality`:
- `helpful`, `concise`, `technical`, `creative`, `teacher`
- `kawaii`, `catgirl`, `pirate`, `shakespeare`, `surfer`
- `noir`, `uwu`, `philosopher`, `hype`
## Toolsets System
The agent uses a toolsets system for organizing and managing tools. All tools must be part of a toolset to be accessible - individual tool selection is not supported. This ensures consistent and logical grouping of capabilities.
### Key Concepts
- **Toolsets**: Logical groups of tools for specific use cases (e.g., "research", "development", "debugging")
- **Composition**: Toolsets can include other toolsets for powerful combinations
- **Custom Toolsets**: Create your own toolsets at runtime or by editing `toolsets.py`
- **Toolset-Only Access**: Tools are only accessible through toolsets, not individually
### Available Toolsets
See `toolsets.py` for the complete list of predefined toolsets including:
- Basic toolsets (web, terminal, vision, creative, reasoning)
- Composite toolsets (research, development, analysis, etc.)
- Scenario-specific toolsets (debugging, documentation, API testing, etc.)
- Special toolsets (safe mode without terminal, minimal, offline)
### Using Toolsets
```bash
# Use a predefined toolset
python run_agent.py --enabled_toolsets=research --query "Find latest AI papers"
# Combine multiple toolsets
python run_agent.py --enabled_toolsets=web,vision --query "Analyze this website"
# Enable all toolsets explicitly (same as omitting the flag)
python run_agent.py --enabled_toolsets=all --query "Do web research and run commands if helpful"
# Safe mode (no terminal access)
python run_agent.py --enabled_toolsets=safe --query "Help without running commands"
# List all available toolsets and tools
python run_agent.py --list_tools
```
See `toolsets.py` for the complete list of available toolsets and how to create custom ones.
## Basic Usage
### Default (all tools enabled)
```bash
# Uses OpenRouter by default - just set OPENROUTER_API_KEY in .env
python run_agent.py \
--query "search up the latest docs on jit in python 3.13 and write me basic example that's not in their docs. profile its perf" \
--max_turns 20 \
--model anthropic/claude-sonnet-4-20250514
```
### With specific toolset
```bash
python run_agent.py \
--query "Debug this Python error" \
--enabled_toolsets=debugging \
--model anthropic/claude-sonnet-4-20250514
```
### Python API
```python
from run_agent import AIAgent
# Uses OpenRouter by default (reads OPENROUTER_API_KEY from .env)
agent = AIAgent(
model="anthropic/claude-sonnet-4-20250514",
enabled_toolsets=["research"]
)
response = agent.chat("Find information about quantum computing")
# Create custom toolset at runtime
from toolsets import create_custom_toolset
create_custom_toolset(
name="my_tools",
description="My custom toolkit",
tools=["web_search"],
includes=["terminal", "vision"]
)
agent = AIAgent(enabled_toolsets=["my_tools"])
```
## Batch Processing
Process multiple prompts from a dataset in parallel with automatic checkpointing and statistics tracking:
```bash
# Basic batch processing
python batch_runner.py \
--dataset_file=prompts.jsonl \
--batch_size=20 \
--run_name=my_run
# With specific distribution
python batch_runner.py \
--dataset_file=prompts.jsonl \
--batch_size=20 \
--run_name=image_run \
--distribution=image_gen \
--num_workers=4
```
**Key Features:**
- Parallel processing with configurable workers
- Toolset distributions for varied data generation
- Automatic checkpointing and resume capability
- Combined output in `data/<run_name>/trajectories.jsonl`
- Tool usage statistics and success rates
Use `--list_distributions` to see available toolset distributions for varied data generation.
### Trajectory Compression
Post-process trajectories to fit within token budgets for training:
```bash
# Compress a directory of JSONL files
python trajectory_compressor.py --input=data/my_run
# Compress a single JSONL file
python trajectory_compressor.py --input=data/trajectories.jsonl
# Compress a 15% sample (useful for creating smaller training sets)
python trajectory_compressor.py --input=data/trajectories.jsonl --sample_percent=15
# Custom output and token target
python trajectory_compressor.py \
--input=data/trajectories.jsonl \
--output=data/compressed.jsonl \
--target_max_tokens=16000
```
**Features:**
- Protects first turns (system, human, first GPT response, first tool call)
- Protects last N turns (configurable)
- Summarizes middle turns using LLM to fit target token budget
- Supports both directory and single file input
- Optional random sampling with `--sample_percent`
- Configurable via `configs/trajectory_compression.yaml`
### Ephemeral System Prompts
The ephemeral system prompt feature allows you to guide the model's behavior during batch processing **without** saving that prompt to the training dataset trajectories. This is useful for:
- Guiding model behavior during data collection
- Adding task-specific instructions
- Keeping saved trajectories clean and focused on tool-calling format
**Example:**
```bash
python batch_runner.py \
--dataset_file=prompts.jsonl \
--batch_size=10 \
--run_name=my_run \
--ephemeral_system_prompt="You are a helpful assistant focused on image generation."
```
The ephemeral prompt will influence the model's behavior during execution, but **only the standard tool-calling system prompt** will be saved in the trajectory files.
The ephemeral prompt influences model behavior during execution, but **only the standard tool-calling system prompt** is saved in trajectory files.
## Command Line Arguments
**Single Agent (`run_agent.py`):**
- `--query`: The question or task for the agent
- `--model`: Model to use (default: claude-opus-4-20250514)
- `--api_key`: API key for authentication
- `--base_url`: API endpoint URL
- `--max_turns`: Maximum number of tool-calling iterations
- `--enabled_toolsets`: Comma-separated list of toolsets to enable. Use `all` (or `*`) to enable everything. If omitted, all toolsets are enabled by default.
- `--disabled_toolsets`: Comma-separated list of toolsets to disable
- `--list_tools`: List all available toolsets and tools
- `--save_trajectories`: Save conversation trajectories to JSONL files
**Batch Processing (`batch_runner.py`):**
- `--dataset_file`: Path to JSONL file with prompts
- `--batch_size`: Number of prompts per batch
- `--run_name`: Name for this run (for output/checkpointing)
- `--distribution`: Toolset distribution to use (default: "default")
- `--num_workers`: Number of parallel workers (default: 4)
- `--resume`: Resume from checkpoint if interrupted
- `--ephemeral_system_prompt`: System prompt used during execution but NOT saved to trajectories
- `--list_distributions`: List available toolset distributions
## Environment Variables
All environment variables can be configured in the `.env` file (copy from `.env.example`).
**LLM Provider (OpenRouter):**
- `OPENROUTER_API_KEY`: Primary LLM access via OpenRouter (supports Claude, GPT-4, Gemini, etc.)
- `LLM_MODEL`: Default model (e.g., `anthropic/claude-sonnet-4`, `openai/gpt-4o`)
**Tool API Keys:**
- `FIRECRAWL_API_KEY`: Web tools (search, extract, crawl)
- `NOUS_API_KEY`: Vision and reasoning tools
- `FAL_KEY`: Image generation tools
**Terminal Tool Configuration (mini-swe-agent backend):**
- `TERMINAL_ENV`: Backend type - `local`, `docker`, `singularity`, `modal`, or `ssh` (default: `local`)
- `TERMINAL_DOCKER_IMAGE`: Docker image for docker backend (default: `python:3.11-slim`)
- `TERMINAL_SINGULARITY_IMAGE`: Singularity/Apptainer image (can be `docker://...` URL or local `.sif` path)
- `TERMINAL_TIMEOUT`: Command timeout in seconds (default: `60`)
- `TERMINAL_LIFETIME_SECONDS`: Cleanup inactive environments after this time (default: `300`)
- `TERMINAL_CWD`: Working directory inside containers (default: `/tmp`)
- `TERMINAL_SCRATCH_DIR`: Custom scratch directory for sandbox storage (optional, auto-detects `/scratch`)
- `SUDO_PASSWORD`: Enable sudo commands by piping password via `sudo -S` (works with all backends)
- If unset in CLI mode, you'll be prompted interactively when sudo is needed (45s timeout)
**SSH Backend Configuration (for remote execution):**
- `TERMINAL_SSH_HOST`: Remote server hostname or IP
- `TERMINAL_SSH_USER`: SSH username
- `TERMINAL_SSH_PORT`: SSH port (default: `22`)
- `TERMINAL_SSH_KEY`: Path to SSH private key (optional, uses ssh-agent if not set)
**Browser Tool Configuration (agent-browser + Browserbase):**
- `BROWSERBASE_API_KEY`: Browserbase API key for cloud browser execution
- `BROWSERBASE_PROJECT_ID`: Browserbase project ID
- `BROWSER_SESSION_TIMEOUT`: Session timeout in seconds (default: `300`)
**Legacy Hecate Terminal Backend (optional):**
- `MORPH_API_KEY`: For Hecate/MorphCloud terminal backend
- `HECATE_VM_LIFETIME_SECONDS`: VM lifetime (default: 300)
- `HECATE_DEFAULT_SNAPSHOT_ID`: Default snapshot (default: snapshot_p5294qxt)
**Debug Options:**
- `WEB_TOOLS_DEBUG`, `VISION_TOOLS_DEBUG`, `MOA_TOOLS_DEBUG`, `IMAGE_TOOLS_DEBUG`: Enable debug logging
## Key Files
| File | Purpose |
|------|---------|
| `hermes` | CLI launcher script (run with `./hermes`) |
| `cli.py` | Interactive CLI implementation |
| `cli-config.yaml` | CLI configuration (copy from `.example`) |
| `run_agent.py` | Main agent runner - single query execution |
| `batch_runner.py` | Parallel batch processing with checkpointing |
| `model_tools.py` | Core tool definitions and handlers |
| `toolsets.py` | Toolset definitions and composition |
| `toolset_distributions.py` | Probability distributions for data generation |
| `trajectory_compressor.py` | Post-process trajectories for training |
| `tools/` | Individual tool implementations |
| `tools/skills_tool.py` | Skills system with progressive disclosure |
| `skills/` | On-demand knowledge documents |
| `docs/` | Documentation |
| `configs/` | Example batch run scripts |

View file

@ -0,0 +1,32 @@
README.md
atropos_compatible_agent.py
batch_runner.py
local_server.py
model_tools.py
pyproject.toml
run_agent.py
toolset_distributions.py
toolsets.py
trajectory_compressor.py
hermes_agent.egg-info/PKG-INFO
hermes_agent.egg-info/SOURCES.txt
hermes_agent.egg-info/dependency_links.txt
hermes_agent.egg-info/entry_points.txt
hermes_agent.egg-info/requires.txt
hermes_agent.egg-info/top_level.txt
tests/test_batch_runner.py
tests/test_checkpoint_resumption.py
tests/test_modal_terminal.py
tests/test_nous_api_limits.py
tests/test_nous_api_pattern.py
tests/test_temperature_fix.py
tests/test_web_tools.py
tools/__init__.py
tools/browser_tool.py
tools/image_generation_tool.py
tools/mixture_of_agents_tool.py
tools/skills_tool.py
tools/terminal_hecate.py
tools/terminal_tool.py
tools/vision_tools.py
tools/web_tools.py

View file

@ -0,0 +1 @@

View file

@ -0,0 +1,2 @@
[console_scripts]
hermes-agent = run_agent:main

View file

@ -0,0 +1,27 @@
openai
python-dotenv
fire
httpx
rich
tenacity
pyyaml
prompt_toolkit
requests
jinja2
pydantic>=2.0
firecrawl-py
fal-client
litellm>=1.75.5
typer
platformdirs
[atropos]
atroposlib @ git+ssh://git@github.com/NousResearch/atropos.git
[dev]
pytest
pytest-asyncio
[modal]
modal
boto3

View file

@ -0,0 +1,9 @@
atropos_compatible_agent
batch_runner
local_server
model_tools
run_agent
tools
toolset_distributions
toolsets
trajectory_compressor

345
local_server.py Normal file
View file

@ -0,0 +1,345 @@
"""
Local OpenAI-compatible server implementation for Hermes-Agent (Atropos integration).
Extends the Atropos APIServer to work with local OpenAI-compatible APIs (e.g. Ollama),
providing tokens_and_logprobs_completion support via client-side tokenization.
"""
import asyncio
import os
import warnings
from typing import Any, List, Optional
import openai
from openai.types.chat.chat_completion import ChatCompletion
from openai.types.completion import Completion
from atroposlib.envs.server_handling.server_baseline import (
APIServer,
APIServerConfig,
ReasoningConfig,
)
class LocalServer(APIServer):
"""
OpenAI-compatible local server with tokens_and_logprobs support.
Uses an OpenAI-compatible API (typically at a /v1 endpoint) and handles
token extraction via client-side tokenization.
Note: Many local servers don't return per-token logprobs in the standard API,
so this implementation uses placeholder logprobs (0.0) for PoC purposes.
For production training, use vLLM/SGLang servers that return real logprobs.
"""
def __init__(
self,
config: APIServerConfig,
tokenizer: Optional[Any] = None,
tokenizer_name: str = "gpt2",
reasoning_config: Optional[ReasoningConfig] = None,
):
"""
Initialize the local server.
Args:
config: Server configuration
tokenizer: Pre-initialized tokenizer (optional)
tokenizer_name: Name of tokenizer to load if tokenizer not provided
reasoning_config: Optional reasoning configuration
"""
# Build the OpenAI client pointing to the server's /v1 endpoint
base_url = config.base_url
if base_url and not base_url.endswith("/v1"):
base_url = f"{base_url.rstrip('/')}/v1"
self.openai = openai.AsyncClient(
api_key=config.api_key or "local", # Local servers often ignore auth
base_url=base_url,
timeout=config.timeout,
)
# Initialize tokenizer
if tokenizer is not None:
self.tokenizer = tokenizer
else:
try:
from transformers import AutoTokenizer # type: ignore
except ModuleNotFoundError as exc:
raise ModuleNotFoundError(
"Missing optional dependency 'transformers'. Pass a tokenizer instance to LocalServer, "
"or install transformers to enable `tokenizer_name` auto-loading."
) from exc
self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
# Add a simple chat template if the tokenizer doesn't have one
# This is needed for ManagedServer's chat_completion to work
if not hasattr(self.tokenizer, 'chat_template') or self.tokenizer.chat_template is None:
# Simple ChatML-style template
self.tokenizer.chat_template = (
"{% for message in messages %}"
"{% if message['role'] == 'system' %}<|im_start|>system\n{{ message['content'] }}<|im_end|>\n"
"{% elif message['role'] == 'user' %}<|im_start|>user\n{{ message['content'] }}<|im_end|>\n"
"{% elif message['role'] == 'assistant' %}<|im_start|>assistant\n{{ message['content'] }}<|im_end|>\n"
"{% endif %}"
"{% endfor %}"
"{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
)
super().__init__(config, reasoning_config=reasoning_config)
# Local servers are treated as always-healthy unless a status task is enabled.
self.server_healthy = True
@classmethod
def from_env(
cls,
base_url: Optional[str] = None,
model: Optional[str] = None,
api_key: Optional[str] = None,
tokenizer_name: str = "gpt2",
**kwargs,
) -> "LocalServer":
"""
Create a LocalServer from environment variables (or explicit overrides).
Env vars (checked in order):
- base URL: LOCAL_LLM_BASE_URL, LLM_BASE_URL, OLLAMA_BASE_URL
- model: LOCAL_LLM_MODEL, LLM_MODEL, OLLAMA_MODEL
"""
from dotenv import load_dotenv
load_dotenv()
base_url = (
base_url
or os.getenv("LOCAL_LLM_BASE_URL")
or os.getenv("LLM_BASE_URL")
or os.getenv("OLLAMA_BASE_URL")
or "http://localhost:11434"
)
model = (
model
or os.getenv("LOCAL_LLM_MODEL")
or os.getenv("LLM_MODEL")
or os.getenv("OLLAMA_MODEL")
or "hermes3:8b"
)
api_key = api_key or os.getenv("LOCAL_LLM_API_KEY") or os.getenv("LLM_API_KEY") or os.getenv("OLLAMA_API_KEY")
config = APIServerConfig(
model_name=model,
base_url=base_url,
api_key=api_key or "local",
timeout=kwargs.get("timeout", 120),
num_max_requests_at_once=kwargs.get("num_max_requests_at_once", 4),
num_requests_for_eval=kwargs.get("num_requests_for_eval", 4),
health_check=False, # Local dev servers often lack /health
)
return cls(config, tokenizer_name=tokenizer_name)
async def check_server_status_task(self, chat_completion: bool = True):
"""
Check if the server is healthy.
For local development, we generally assume the server is healthy.
"""
while True:
try:
# Simple health check via a minimal completion
if chat_completion:
await self.openai.chat.completions.create(
model=self.config.model_name,
messages=[{"role": "user", "content": "hi"}],
max_tokens=1,
)
else:
await self.openai.completions.create(
model=self.config.model_name,
prompt="hi",
max_tokens=1,
)
self.server_healthy = True
except Exception:
self.server_healthy = False
await asyncio.sleep(5)
async def _chat_completion_wrapper(self, **kwargs) -> ChatCompletion:
"""
Wrapper for chat completion using an OpenAI-compatible API.
"""
assert kwargs.get("model") is not None, "Model is required!"
assert kwargs.get("messages") is not None, "Messages are required!"
n = kwargs.get("n", 1)
# Ollama doesn't support n > 1, so we make multiple requests
if n > 1:
completion_list = await asyncio.gather(
*[self.openai.chat.completions.create(**{**kwargs, "n": 1}) for _ in range(n)]
)
# Merge completions
completions = completion_list[0]
for c in completion_list[1:]:
for choice in c.choices:
choice.index = len(completions.choices)
completions.choices.append(choice)
return completions
else:
return await self.openai.chat.completions.create(**kwargs)
async def _completion_wrapper(self, **kwargs) -> Completion:
"""
Wrapper for completion using an OpenAI-compatible API.
"""
assert kwargs.get("model") is not None, "Model is required!"
assert kwargs.get("prompt") is not None, "Prompt is required!"
n = kwargs.get("n", 1)
# Ollama doesn't support n > 1
if n > 1:
completion_list = await asyncio.gather(
*[self.openai.completions.create(**{**kwargs, "n": 1}) for _ in range(n)]
)
completions = completion_list[0]
for c in completion_list[1:]:
for choice in c.choices:
choice.index = len(completions.choices)
completions.choices.append(choice)
return completions
else:
return await self.openai.completions.create(**kwargs)
async def _tokens_and_logprobs_completion_wrapper(
self, **kwargs
) -> tuple[List[int], List[List[int]], List[List[float]], List[str]]:
"""
Wrapper for tokens and logprobs completion.
Returns:
Tuple of (prompt_tokens, output_tokens_list, output_logprobs_list, finish_reasons)
Note: Many OpenAI-compatible local servers don't return per-token logprobs,
so we use placeholder logprobs (0.0). For real training, use vLLM/SGLang.
"""
model = kwargs.get("model")
assert model is not None, "Model is required!"
# Handle input_ids (from ManagedServer) or prompt
if "input_ids" in kwargs:
prompt_tokens = kwargs.pop("input_ids")
prompt = self.tokenizer.decode(prompt_tokens)
kwargs.pop("prompt", None)
else:
prompt = kwargs.pop("prompt", "")
prompt_tokens = self.tokenizer.encode(prompt, add_special_tokens=True)
n = kwargs.pop("n", 1)
max_tokens = kwargs.pop("max_tokens", 256)
temperature = kwargs.pop("temperature", 0.7)
stop = kwargs.pop("stop", None)
# Make completion requests
completions = []
for _ in range(n):
try:
response = await self.openai.completions.create(
model=model,
prompt=prompt,
max_tokens=max_tokens,
temperature=temperature,
stop=stop,
)
completions.append(response)
except Exception as e:
# Fallback to chat completion if completion endpoint not supported
warnings.warn(f"Completion API failed, trying chat: {e}")
response = await self.openai.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=temperature,
stop=stop,
)
# Convert to completion-like response
completions.append(response)
output_tokens_list = []
output_logprobs_list = []
finish_reasons = []
for completion in completions:
# Extract text from response
if hasattr(completion.choices[0], "text"):
# Completion API response
text = completion.choices[0].text
finish_reason = completion.choices[0].finish_reason or "stop"
else:
# Chat completion API response
text = completion.choices[0].message.content or ""
finish_reason = completion.choices[0].finish_reason or "stop"
# Tokenize output
output_tokens = self.tokenizer.encode(text, add_special_tokens=False)
# Placeholder logprobs (Ollama doesn't provide per-token logprobs easily)
# In production, use vLLM/SGLang which return real logprobs
output_logprobs = [0.0] * len(output_tokens)
output_tokens_list.append(output_tokens)
output_logprobs_list.append(output_logprobs)
finish_reasons.append(finish_reason)
return prompt_tokens, output_tokens_list, output_logprobs_list, finish_reasons
def managed_server(self, tokenizer=None, track_tree: bool = False):
"""
Create a ManagedServer context manager for this server.
Args:
tokenizer: Optional tokenizer override
track_tree: Whether to maintain tree structure for multi-turn
Returns:
ManagedServer context manager
"""
from atroposlib.envs.server_handling.managed_server import ManagedServer
return ManagedServerContext(
self,
tokenizer=tokenizer or self.tokenizer,
track_tree=track_tree,
)
class ManagedServerContext:
"""
Context manager wrapper for ManagedServer.
Usage:
async with server.managed_server(tokenizer=tokenizer) as managed:
response = await managed.chat_completion(...)
state = managed.get_state()
"""
def __init__(self, server: LocalServer, tokenizer, track_tree: bool = False):
self.server = server
self.tokenizer = tokenizer
self.track_tree = track_tree
self.managed = None
async def __aenter__(self):
from atroposlib.envs.server_handling.managed_server import ManagedServer
self.managed = ManagedServer(
self.server,
tokenizer=self.tokenizer,
track_tree=self.track_tree,
)
return self.managed
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.managed:
self.managed.reset()
return False

View file

@ -19,6 +19,7 @@ dependencies = [
"rich",
"tenacity",
"pyyaml",
"prompt_toolkit",
"requests",
"jinja2",
"pydantic>=2.0",
@ -34,6 +35,8 @@ dependencies = [
[project.optional-dependencies]
modal = ["modal", "boto3"]
dev = ["pytest", "pytest-asyncio"]
# Install Atropos from source (PyPI is often stale for this internal dependency).
atropos = ["atroposlib @ git+ssh://git@github.com/NousResearch/atropos.git"]
[project.scripts]
hermes-agent = "run_agent:main"
@ -47,6 +50,7 @@ py-modules = [
"trajectory_compressor",
"toolset_distributions",
"atropos_compatible_agent",
"local_server",
]
[tool.setuptools.packages.find]

3825
uv.lock generated Normal file

File diff suppressed because it is too large Load diff