Update environment configuration and enhance tool definitions

- Modified `.env.example` to set default terminal environment to 'local' and updated Docker, Singularity, and Modal image references to use 'python:3.11-slim'. - Updated `package.json` to include Node.js engine requirements and modified post-install script for better user guidance. - Enhanced `pyproject.toml` to reflect new dependencies and optional dependencies for modal and development environments. - Improved `README.md` with additional setup instructions for Singularity and Node.js dependencies, along with clearer toolset documentation. - Refactored `model_tools.py` to include new tool definitions and ensure consistency across toolsets. - Updated architecture documentation to clarify tool structure and registration processes.
2026-04-26 01:01:40 +00:00 · 2026-01-29 22:36:07 +00:00 · 2026-01-29 22:36:07 +00:00 · 7ea17bb957
commit 7ea17bb957
parent f8846f85a1
8 changed files with 535 additions and 257 deletions
--- a/.env.example
+++ b/.env.example
@ -37,12 +37,12 @@ FAL_KEY=
 # - singularity: Runs in Apptainer/Singularity containers (HPC clusters, no root needed)
 # - docker: Runs in Docker containers (isolated, requires Docker + docker group)
 # - modal: Runs in Modal cloud sandboxes (scalable, requires Modal account)
-TERMINAL_ENV=singularity
+TERMINAL_ENV=local
 # Container images (for singularity/docker/modal backends)
-TERMINAL_DOCKER_IMAGE=python:3.11
+TERMINAL_DOCKER_IMAGE=python:3.11-slim
-TERMINAL_SINGULARITY_IMAGE=docker://python:3.11
+TERMINAL_SINGULARITY_IMAGE=docker://python:3.11-slim
-TERMINAL_MODAL_IMAGE=python:3.11
+TERMINAL_MODAL_IMAGE=python:3.11-slim
 # Working directory inside the container
 TERMINAL_CWD=/tmp
@ -53,13 +53,19 @@ TERMINAL_TIMEOUT=60
 # Cleanup inactive environments after this many seconds
 TERMINAL_LIFETIME_SECONDS=300
 # Scratch directory for Singularity sandboxes (optional)
 # If not set, uses /scratch (if available) or /tmp
 # TERMINAL_SCRATCH_DIR=/scratch/myuser
 # Disk usage warning threshold in GB (default: 500)
 TERMINAL_DISK_WARNING_GB=500
 # =============================================================================
 # MODAL CLOUD BACKEND (Optional - for TERMINAL_ENV=modal)
 # =============================================================================
 # Modal uses CLI authentication, not environment variables.
 # Run: pip install modal && modal setup
 # This will authenticate via browser and store credentials locally.
 # No API key needed in .env - Modal handles auth automatically.
 # =============================================================================
 # BROWSER TOOL CONFIGURATION (agent-browser + Browserbase)
@ -79,60 +85,19 @@ BROWSERBASE_API_KEY=
 BROWSERBASE_PROJECT_ID=
 # Enable residential proxies for better CAPTCHA solving (default: true)
 # Routes traffic through residential IPs, significantly improves success rate
 BROWSERBASE_PROXIES=true
 # Enable advanced stealth mode (default: false, requires Scale Plan)
 # Uses custom Chromium build to avoid bot detection altogether
 BROWSERBASE_ADVANCED_STEALTH=false
-# Browser session timeout in seconds (optional, default: 300)
+# Browser session timeout in seconds (default: 300)
 # Sessions are cleaned up after this duration of inactivity
 BROWSER_SESSION_TIMEOUT=300
 # =============================================================================
-# Browser automation requires Browserbase cloud service for remote browser execution.
+# LEGACY/OPTIONAL
 # This allows the agent to navigate websites, fill forms, and extract information.
 # Browserbase API Key - Cloud browser execution
 # Get at: https://browserbase.com/
 BROWSERBASE_API_KEY=
 # Browserbase Project ID - From your Browserbase dashboard
 BROWSERBASE_PROJECT_ID=
 # Enable proxies for better CAPTCHA solving and anti-bot avoidance (default: true)
 # Proxies route traffic through residential IPs for more reliable access
 BROWSERBASE_PROXIES=true
 # Enable advanced stealth mode (default: false, requires Scale Plan)
 # Uses custom Chromium build to avoid bot detection altogether
 BROWSERBASE_ADVANCED_STEALTH=false
 # Browser session timeout in seconds (optional, default: 300)
 # Sessions are cleaned up after this duration of inactivity
 BROWSER_SESSION_TIMEOUT=300
 # =============================================================================
 # Browser automation requires Browserbase cloud service for remote browser execution.
 # This allows the agent to navigate websites, fill forms, and extract information.
 # Browserbase API Key - Cloud browser execution
 # Get at: https://browserbase.com/
 BROWSERBASE_API_KEY=
 # Browserbase Project ID - From your Browserbase dashboard
 BROWSERBASE_PROJECT_ID=
 # Browser session timeout in seconds (optional, default: 300)
 # Sessions are cleaned up after this duration of inactivity
 BROWSER_SESSION_TIMEOUT=300
 # =============================================================================
 # LEGACY/OPTIONAL API KEYS
 # =============================================================================
-# Morph API Key - For legacy Hecate terminal backend (terminal-hecate tool)
+# Morph API Key - For legacy Hecate terminal backend
 # Get at: https://morph.so/
 MORPH_API_KEY=
@ -147,12 +112,3 @@ WEB_TOOLS_DEBUG=false
 VISION_TOOLS_DEBUG=false
 MOA_TOOLS_DEBUG=false
 IMAGE_TOOLS_DEBUG=false
 # Scratch directory for Singularity sandboxes (optional)
 # If not set, uses /scratch (if available) or /tmp
 # Set this to a directory with lots of space for large pip installs
 # TERMINAL_SCRATCH_DIR=/scratch/myuser
 # Disk usage warning threshold in GB (default: 500)
 # Warning is printed when total sandbox disk usage exceeds this
 TERMINAL_DISK_WARNING_GB=500
--- a/README.md
+++ b/README.md
@ -32,11 +32,14 @@ git submodule update --init --recursive
 python3 -m venv venv
 source venv/bin/activate  # On Windows: venv\Scripts\activate
-# Install required packages
+# Install Python packages
 pip install -r requirements.txt
 # Install mini-swe-agent for terminal tools
 pip install -e ./mini-swe-agent
 # Install Node.js dependencies for browser tools (requires Node.js)
 npm install
 ```
 ### 3. Configure Environment Variables
@ -82,6 +85,31 @@ TERMINAL_TIMEOUT=60
 - **docker**: Requires Docker installed and user in `docker` group
 - **modal**: Requires Modal account (see setup below)
 ### Singularity/Apptainer Setup (Recommended for HPC)
 Singularity/Apptainer provides rootless container execution, ideal for HPC clusters:
 ```bash
 # 1. Verify Apptainer is installed
 apptainer --version  # or: singularity --version
 # 2. Set up cache directories (important for parallel workers)
 # Use /scratch if available (HPC), otherwise /tmp
 export APPTAINER_CACHEDIR=/scratch/$USER/.apptainer
 export APPTAINER_TMPDIR=/scratch/$USER/.apptainer/tmp
 mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR"
 # 3. Pre-build SIF image (recommended for parallel batch processing)
 # This avoids race conditions when multiple workers start simultaneously
 apptainer build $APPTAINER_CACHEDIR/python-nodejs.sif docker://nikolaik/python-nodejs:python3.11-nodejs20
 # 4. Configure .env to use the local SIF
 TERMINAL_ENV=singularity
 TERMINAL_SINGULARITY_IMAGE=/scratch/$USER/.apptainer/python-nodejs.sif
 ```
 **Tip:** The batch scripts in `configs/` automatically handle SIF pre-building if `/scratch` is available.
 ### Modal Cloud Backend Setup
 [Modal](https://modal.com) provides serverless cloud compute for running sandboxed environments at scale.
@ -107,8 +135,9 @@ Browser tools enable the agent to navigate websites, fill forms, click buttons,
 # 1. Install Node.js (if not already installed)
 # Use nvm (recommended) or your package manager
-# 2. Install agent-browser CLI globally
+# 2. Install agent-browser CLI (choose one option):
-npm install -g agent-browser
+npm install -g agent-browser     # Option A: Global install (recommended)
 npm install                      # Option B: Local install (uses npx fallback)
 # 3. Get Browserbase credentials
 # Sign up at https://browserbase.com/ and get your:
@ -188,7 +217,7 @@ python run_agent.py --enabled_toolsets=safe --query "Help without running comman
 python run_agent.py --list_tools
 ```
-For detailed documentation on toolsets, see `TOOLSETS_README.md`.
+See `toolsets.py` for the complete list of available toolsets and how to create custom ones.
 ## Basic Usage
@ -260,8 +289,36 @@ python batch_runner.py \
 - Combined output in `data/<run_name>/trajectories.jsonl`
 - Tool usage statistics and success rates
-**Quick Start:** See [QUICKSTART_BATCH.md](QUICKSTART_BATCH.md) for a 5-minute getting started guide.  
+Use `--list_distributions` to see available toolset distributions for varied data generation.
-**Full Documentation:** See [BATCH_PROCESSING.md](BATCH_PROCESSING.md) for comprehensive documentation.
+
 ### Trajectory Compression
 Post-process trajectories to fit within token budgets for training:
 ```bash
 # Compress a directory of JSONL files
 python trajectory_compressor.py --input=data/my_run
 # Compress a single JSONL file
 python trajectory_compressor.py --input=data/trajectories.jsonl
 # Compress a 15% sample (useful for creating smaller training sets)
 python trajectory_compressor.py --input=data/trajectories.jsonl --sample_percent=15
 # Custom output and token target
 python trajectory_compressor.py \
  --input=data/trajectories.jsonl \
  --output=data/compressed.jsonl \
  --target_max_tokens=16000
 ```
 **Features:**
 - Protects first turns (system, human, first GPT response, first tool call)
 - Protects last N turns (configurable)
 - Summarizes middle turns using LLM to fit target token budget
 - Supports both directory and single file input
 - Optional random sampling with `--sample_percent`
 - Configurable via `configs/trajectory_compression.yaml`
 ### Ephemeral System Prompts
@ -282,7 +339,7 @@ python batch_runner.py \
 The ephemeral prompt will influence the model's behavior during execution, but **only the standard tool-calling system prompt** will be saved in the trajectory files.
-**Documentation:** See [docs/ephemeral_system_prompt.md](docs/ephemeral_system_prompt.md) for complete details.
+The ephemeral prompt influences model behavior during execution, but **only the standard tool-calling system prompt** is saved in trajectory files.
 ## Command Line Arguments
@ -321,11 +378,13 @@ All environment variables can be configured in the `.env` file (copy from `.env.
 - `FAL_KEY`: Image generation tools
 **Terminal Tool Configuration (mini-swe-agent backend):**
- `TERMINAL_ENV`: Backend type - `local`, `docker`, or `modal` (default: `local`)
+- `TERMINAL_ENV`: Backend type - `local`, `docker`, `singularity`, or `modal` (default: `local`)
- `TERMINAL_DOCKER_IMAGE`: Docker image to use (default: `python:3.11-slim`)
+- `TERMINAL_DOCKER_IMAGE`: Docker image for docker backend (default: `python:3.11-slim`)
 - `TERMINAL_SINGULARITY_IMAGE`: Singularity/Apptainer image (can be `docker://...` URL or local `.sif` path)
 - `TERMINAL_TIMEOUT`: Command timeout in seconds (default: `60`)
 - `TERMINAL_LIFETIME_SECONDS`: Cleanup inactive environments after this time (default: `300`)
 - `TERMINAL_CWD`: Working directory inside containers (default: `/tmp`)
 - `TERMINAL_SCRATCH_DIR`: Custom scratch directory for sandbox storage (optional, auto-detects `/scratch`)
 **Browser Tool Configuration (agent-browser + Browserbase):**
 - `BROWSERBASE_API_KEY`: Browserbase API key for cloud browser execution
@ -340,18 +399,16 @@ All environment variables can be configured in the `.env` file (copy from `.env.
 **Debug Options:**
 - `WEB_TOOLS_DEBUG`, `VISION_TOOLS_DEBUG`, `MOA_TOOLS_DEBUG`, `IMAGE_TOOLS_DEBUG`: Enable debug logging
-## Documentation
+## Key Files
-**Single Agent Usage:**
+| File | Purpose |
- `TOOLSETS_README.md`: Comprehensive guide to the toolsets system
+|------|---------|
- `toolsets.py`: View and modify available toolsets
+| `run_agent.py` | Main agent runner - single query execution |
- `model_tools.py`: Core tool definitions and handlers
+| `batch_runner.py` | Parallel batch processing with checkpointing |
-
+| `model_tools.py` | Core tool definitions and handlers |
-**Batch Processing:**
+| `toolsets.py` | Toolset definitions and composition |
- `QUICKSTART_BATCH.md`: 5-minute quick start guide
+| `toolset_distributions.py` | Probability distributions for data generation |
- `BATCH_PROCESSING.md`: Complete batch processing documentation
+| `trajectory_compressor.py` | Post-process trajectories for training |
- `toolset_distributions.py`: Toolset distributions for data generation
+| `tools/` | Individual tool implementations |
-
+| `architecture/` | Design documentation |
-## Examples
+| `configs/` | Example batch run scripts |
 See `TOOLSETS_README.md` for extensive examples of using different toolsets for various scenarios.
--- a/architecture/agents.md
+++ b/architecture/agents.md
@ -1,55 +1,104 @@
 # Agents
-Agents can be viewed as an FSM using an LLM to generate inputs into the system that operates over a DAG.
+The agent is the core loop that orchestrates LLM calls and tool execution.
-What this really means is that the agent is just a function without memory that uses text inputs and outputs in a
+## AIAgent Class
-defined order.
+
 The main agent is implemented in `run_agent.py`:
 ```python
-def my_agent(*args, **kwargs) -> str:
+class AIAgent:
-    # do whatever you want!
+    def __init__(
-    return "Hi I'm an agent!"
+        self,
        model: str = "anthropic/claude-sonnet-4",
        api_key: str = None,
        base_url: str = "https://openrouter.ai/api/v1",
        max_turns: int = 20,
        enabled_toolsets: list = None,
        disabled_toolsets: list = None,
        verbose_logging: bool = False,
    ):
        # Initialize OpenAI client, load tools based on toolsets
        ...
    def chat(self, user_message: str, task_id: str = None) -> str:
        # Main entry point - runs the agent loop
        ...
 ```
-Now obviously, that's like saying water's wet, but we're going to be using that definition to inform our design of the
+## Agent Loop
 library, namely, that we should *not* store agent state outside the function call.
-## The Agent Class
+The core loop in `_run_agent_loop()`:
-So we don't have state, why are we using a class?
+```
-
+1. Add user message to conversation
-Well, we want to initialize things, we want to have some configuration, and we want to have some helper functions.
+2. Call LLM with tools
-Preferably all in a single place.
+3. If LLM returns tool calls:
   - Execute each tool
   - Add tool results to conversation
   - Go to step 2
 4. If LLM returns text response:
   - Return response to user
 ```
 ```python
-class BaseAgent:
+while turns < max_turns:
-    def agent_primitives(self) -> list[BaseAgent]:
+    response = client.chat.completions.create(
-        # Returns a list of Agents that are utilized by this agent to generate inputs
+        model=model,
-        # We use agent primitives here instead of subagents because these are going to be part
+        messages=messages,
-        # of the message graph, not a subagent tool call.
+        tools=tool_schemas,
-        raise NotImplementedError
+    )
-    def tools(self) -> list[BaseTool]:
+    if response.tool_calls:
-        # Returns a list of tools that the agent needs to run
+        for tool_call in response.tool_calls:
-        raise NotImplementedError
+            result = await execute_tool(tool_call)
-    
+            messages.append(tool_result_message(result))
-    
+        turns += 1
-    def run(self, config, *args, **kwargs) -> ConversationGraph:
+    else:
-        llm = get_llm(config)
+        return response.content
        tools = self.tools()
        for agent in self.agent_primitives():
            tools.extend(agent.tools())
        tools = remove_duplicates(tools)
        tools = initialize_tools(tools, config)
        return self(llm, tools, config, *args, **kwargs)
    @staticmethod
    def __call__(self, llm, tools, config, *args, **kwargs) -> ConversationGraph:
        # Returns a ConversationGraph that can be parsed to get the output of the agent
        # Use w/e args/kwargs you want, as long as llm/tools/config are satisfied. 
        raise NotImplementedError
 ```
-Doesn't seem too bad (I hope), it is a bit annoying that we don't initialize everything in the constructor, but
+## Conversation Management
 hopefully we all kinda like it :)
 Messages are stored as a list of dicts following OpenAI format:
 ```python
 messages = [
    {"role": "system", "content": "You are a helpful assistant..."},
    {"role": "user", "content": "Search for Python tutorials"},
    {"role": "assistant", "content": None, "tool_calls": [...]},
    {"role": "tool", "tool_call_id": "...", "content": "..."},
    {"role": "assistant", "content": "Here's what I found..."},
 ]
 ```
 ## Reasoning Context
 For models that support reasoning (chain-of-thought), the agent:
 1. Extracts `reasoning_content` from API responses
 2. Stores it in `assistant_msg["reasoning"]` for trajectory export
 3. Passes it back via `reasoning_content` field on subsequent turns
 ## Trajectory Export
 Conversations can be exported for training:
 ```python
 agent = AIAgent(save_trajectories=True)
 agent.chat("Do something")
 # Saves to trajectories/*.jsonl in ShareGPT format
 ```
 ## Batch Processing
 For processing multiple prompts, use `batch_runner.py`:
 ```bash
 python batch_runner.py \
    --dataset_file=prompts.jsonl \
    --batch_size=20 \
    --num_workers=4 \
    --run_name=my_run
 ```
 See `batch_runner.py` for parallel execution with checkpointing.
--- a/architecture/llm_client.md
+++ b/architecture/llm_client.md
@ -1,14 +1,124 @@
 # LLM Client
-A quick wrapper over openai apis
+Hermes Agent uses the OpenAI Python SDK with OpenRouter as the backend, providing access to many models through a single API.
-## Responsibilities
+## Configuration
- Transform "normal" chat/completions requests into graphs
+```python
- Translate graphs into LLM requests
+from openai import OpenAI
 - Keep a history of graphs parsed by it
  - On Policy Data
  - Deduplicating graphs, so we don't keep previous history as separate graphs
-## How to use
+client = OpenAI(
-Exactly the same as the openai api! Just with the additional support of graph inputs and outputs.
+    api_key=os.getenv("OPENROUTER_API_KEY"),
    base_url="https://openrouter.ai/api/v1"
 )
 ```
 ## Supported Models
 Any model available on [OpenRouter](https://openrouter.ai/models):
 ```python
 # Anthropic
 model = "anthropic/claude-sonnet-4"
 model = "anthropic/claude-opus-4"
 # OpenAI
 model = "openai/gpt-4o"
 model = "openai/o1"
 # Google
 model = "google/gemini-2.0-flash"
 # Open models
 model = "meta-llama/llama-3.3-70b-instruct"
 model = "deepseek/deepseek-chat-v3"
 model = "moonshotai/kimi-k2.5"
 ```
 ## Tool Calling
 Standard OpenAI function calling format:
 ```python
 response = client.chat.completions.create(
    model=model,
    messages=messages,
    tools=[
        {
            "type": "function",
            "function": {
                "name": "web_search",
                "description": "Search the web",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "query": {"type": "string"}
                    },
                    "required": ["query"]
                }
            }
        }
    ],
 )
 # Check for tool calls
 if response.choices[0].message.tool_calls:
    for tool_call in response.choices[0].message.tool_calls:
        name = tool_call.function.name
        args = json.loads(tool_call.function.arguments)
        # Execute tool...
 ```
 ## Reasoning Models
 Some models return reasoning/thinking content:
 ```python
 # Access reasoning if available
 message = response.choices[0].message
 if hasattr(message, 'reasoning_content') and message.reasoning_content:
    reasoning = message.reasoning_content
    # Store for trajectory export
 ```
 ## Provider Selection
 OpenRouter allows selecting specific providers:
 ```python
 response = client.chat.completions.create(
    model=model,
    messages=messages,
    extra_body={
        "provider": {
            "order": ["Anthropic", "Google"],  # Preferred providers
            "ignore": ["Novita"],              # Providers to skip
        }
    }
 )
 ```
 ## Error Handling
 Common errors and handling:
 ```python
 try:
    response = client.chat.completions.create(...)
 except openai.RateLimitError:
    # Back off and retry
 except openai.APIError as e:
    # Check e.code for specific errors
    # 400 = bad request (often provider-specific)
    # 502 = bad gateway (retry with different provider)
 ```
 ## Cost Tracking
 OpenRouter returns usage info:
 ```python
 usage = response.usage
 print(f"Tokens: {usage.prompt_tokens} + {usage.completion_tokens}")
 print(f"Cost: ${usage.cost:.6f}")  # If available
 ```
--- a/architecture/message_graph.md
+++ b/architecture/message_graph.md
@ -1,114 +1,121 @@
-# Message Graph
+# Message Format & Trajectories
-```mermaid
+Hermes Agent uses two message formats: the **API format** for LLM calls and the **trajectory format** for training data export.
 graph TD
    %% Message nodes
    SystemMsg["📋 System Message<br/>Role: System<br/>Content: Messages are nodes in a graph"]
    UserMsg["👤 User Message<br/>Role: User<br/>Content: But messages aren't the only thing in the graph"]
    subgraph PrevMessages["Previous Messages"]
        PrevSystemMsg["📋 System Message<br/>Role: System<br/>Content: Edits are kept in the graph as context"]
        PrevUserMsg["👤 User Message<br/>Role: User<br/>Content: So we can ensure they're immutable while keeping them editable"]
    end
-    %% Chat Response as a subgraph
+## API Message Format
    subgraph ChatResponseBox["💬 Chat Response"]
        ChatMetadata["📊 Metadata<br/>Temp: 1.0<br/>..."]
        ChatResponseText["📝 Response<br/>Hello, Here's a subagent call: &lt;tool&gt;subagent&lt;/tool&gt;"]
        ChatContent["Content: Hello, Here's a subagent call..."]
    end
-    %% Tool Response as a subgraph
+Standard OpenAI chat format used during execution:
    subgraph ToolResponseBox["🔧 Tool Response"]
        subgraph ToolMetadata["📊 Tool Metadata"]
            ToolMetadataLength["Length: 3"]
            subgraph ToolChat["💭 Subagent Chat"]
                SubagentSystem["📋 System<br/>Content: Subagent call received"]
                SubagentUser["👤 User<br/>Content: Process this request"]
                SubagentAssistant["🤖 Assistant<br/>Content: Processing..."]
                SubagentSystem --> SubagentUser
                SubagentUser --> SubagentAssistant
            end
        end
        ToolContent["Content: Subagent call output"]
    end
    %% Graph flow connections
    SystemMsg --> UserMsg
    PrevSystemMsg --> PrevUserMsg
    PrevMessages -.-> UserMsg
    UserMsg --> ChatResponseBox
    ChatResponseBox --> ToolResponseBox
    class SystemMsg,UserMsg messageNode
    class ChatResponseBox responseNode
    class ToolResponseBox responseNode
    class ChatMetadata,ChatResponseText,ChatContent,ToolMetadata,ToolChat,ToolContent,ToolMetadataLength metadataNode
 ```
 Messages should be a graph (DAG, specifically) of immutable elements.
 ## Why immutable elements?
 We want to train on policy
 - This means the context cannot change after we call a response.
 ## Why a graph?
 Nodes and connections are a natural way to represent the flow of information in an agent conversation.
 ## Will this be annoying to deal with?
 It shouldn't be! While there will be internal stuff that may look ???, for the interface, it should be as simple as your
 normal context window edits, so `message_history[2]['content'] = my_edit`, but internally we'll deal with the recordkeeping
 and how this ends up parsing into on policy training data, if requested.
 ## Edges
 Edges are the connections between nodes, and there are two types we are concerned with:
 - **Sequential edges**: These represent the flow of conversation, connecting messages in the order they were sent. For example, a user message followed by an assistant response.
 - **Parallel edges**: These represent versioning, e.g. edit history, context squishing, etc.
 We, however, are only concerned about parallel edges when we break the prefix, and ignore any other parallel edges.
 ## So what does this look like in practice?
 ```python
-import copy
+messages = [
    # System prompt
    {"role": "system", "content": "You are a helpful assistant with tools..."},
    # User query
    {"role": "user", "content": "Search for Python tutorials"},
-class MessageGraph:
+    # Assistant with tool call
-    def __init__(self):
+    {
-        self.messages = []
+        "role": "assistant",
-        self.prev_graph = None
+        "content": None,
        "tool_calls": [{
            "id": "call_abc123",
            "type": "function",
            "function": {
                "name": "web_search",
                "arguments": "{\"query\": \"Python tutorials\"}"
            }
        }]
    },
-    def append(self, message):
+    # Tool result
-        self.messages.append(message)
+    {
        "role": "tool",
        "tool_call_id": "call_abc123",
        "content": "{\"results\": [...]}"
    },
-    def __getitem__(self, index):
+    # Final response
-        return self.messages[index]
+    {"role": "assistant", "content": "Here's what I found..."}
-
+]
    def __setitem__(self, key, value):
        # check if an assistant message is after this indx
        needs_new_graph = False
        first_idx = -1
        for i in range(key, len(self.messages)):
            if (i == key) and (value['role'] == 'assistant') and (value['content'] == self.messages[i]['content']):
                # no op
                return
            needs_new_graph = needs_new_graph or (self.messages[i]['role'] == 'assistant')
            if needs_new_graph and first_idx == -1:
                first_idx = i
        if needs_new_graph:
            self.prev_graph = copy.deepcopy(self)
        self.messages[key] = value
    def __len__(self):
        return len(self.messages)
    def __eq__(self, other):
        return "\n\n".join(f"{msg['role']}: {msg['content']}" for msg in self) == "\n\n".join(
            f"{msg['role']}: {msg['content']}" for msg in other)
 # in use
 messages = MessageGraph()
 messages.append({'role': 'system', 'content': 'Hello, I am a system message'})
 messages[0] = {'role': 'user', 'content': 'Hello, I am a user message'}
 ```
 ## Trajectory Format (ShareGPT)
 Exported for training in ShareGPT format:
 ```json
 {
    "conversations": [
        {"from": "system", "value": "You are a helpful assistant..."},
        {"from": "human", "value": "Search for Python tutorials"},
        {"from": "gpt", "value": "<tool_call>\n{\"name\": \"web_search\", \"arguments\": {\"query\": \"Python tutorials\"}}\n</tool_call>"},
        {"from": "tool", "value": "<tool_response>\n{\"results\": [...]}\n</tool_response>"},
        {"from": "gpt", "value": "Here's what I found..."}
    ],
    "tools": "[{\"type\": \"function\", \"function\": {...}}]",
    "source": "hermes-agent"
 }
 ```
 ## Reasoning Content
 For models that output reasoning/chain-of-thought:
 **During execution** (API format):
 ```python
 # Stored internally but not sent back to model in content
 assistant_msg = {
    "role": "assistant",
    "content": "Here's what I found...",
    "reasoning": "Let me think about this step by step..."  # Internal only
 }
 ```
 **In trajectory export** (reasoning wrapped in tags):
 ```json
 {
    "from": "gpt",
    "value": "<think>\nLet me think about this step by step...\n</think>\nHere's what I found..."
 }
 ```
 ## Conversion Flow
 ```
 API Response → Internal Storage → Trajectory Export
     ↓              ↓                    ↓
 tool_calls    reasoning field      <tool_call> tags
 reasoning_content                  <think> tags
 ```
 The conversion happens in `_convert_to_trajectory_format()` in `run_agent.py`.
 ## Ephemeral System Prompts
 Batch processing supports ephemeral system prompts that guide behavior during execution but are NOT saved to trajectories:
 ```python
 # During execution: full system prompt + ephemeral guidance
 messages = [
    {"role": "system", "content": SYSTEM_PROMPT + "\n\n" + ephemeral_prompt},
    ...
 ]
 # In saved trajectory: only the base system prompt
 trajectory = {
    "conversations": [
        {"from": "system", "value": SYSTEM_PROMPT},  # No ephemeral
        ...
    ]
 }
 ```
 ## Trajectory Compression
 Long trajectories can be compressed for training using `trajectory_compressor.py`:
 - Protects first/last N turns
 - Summarizes middle turns with LLM
 - Targets specific token budget
 - See `configs/trajectory_compression.yaml` for settings
--- a/architecture/tools.md
+++ b/architecture/tools.md
@ -1,16 +1,102 @@
 # Tools
-Not much on this, yet. Tools are just a stateful wrapper around a function, so we can do things like:
+Tools are functions that extend the agent's capabilities. Each tool is defined with an OpenAI-compatible JSON schema and an async handler function.
- Keep a docker container running
+
- Keep a game online
+## Tool Structure
 Each tool module in `tools/` exports:
 1. **Schema definitions** - OpenAI function-calling format
 2. **Handler functions** - Async functions that execute the tool
 ```python
-class BaseTool:
+# Example: tools/web_tools.py
    def definitions(self) -> List[Dict[str, Any]]:
        # OpenAI API compatible definitions
        raise NotImplementedError
-    def __call__(self, *args, **kwargs) -> Dict[str, Any]:
+# Schema definition
-        # Returns at minimum {'role': 'tool', 'content': '...'}
+WEB_SEARCH_SCHEMA = {
-        raise NotImplementedError
+    "type": "function",
    "function": {
        "name": "web_search",
        "description": "Search the web for information",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {"type": "string", "description": "Search query"}
            },
            "required": ["query"]
        }
    }
 }
 # Handler function
 async def web_search(query: str) -> dict:
    """Execute web search and return results."""
    # Implementation...
    return {"results": [...]}
 ```
 ## Tool Categories
 | Category | Module | Tools |
 |----------|--------|-------|
 | **Web** | `web_tools.py` | `web_search`, `web_extract`, `web_crawl` |
 | **Terminal** | `terminal_tool.py` | `terminal` (local/docker/singularity/modal backends) |
 | **Browser** | `browser_tool.py` | `browser_navigate`, `browser_click`, `browser_type`, etc. |
 | **Vision** | `vision_tools.py` | `vision_analyze` |
 | **Image Gen** | `image_generation_tool.py` | `image_generate` |
 | **Reasoning** | `mixture_of_agents_tool.py` | `mixture_of_agents` |
 ## Tool Registration
 Tools are registered in `model_tools.py`:
 ```python
 # model_tools.py
 TOOL_SCHEMAS = [
    *WEB_TOOL_SCHEMAS,
    *TERMINAL_TOOL_SCHEMAS,
    *BROWSER_TOOL_SCHEMAS,
    # ...
 ]
 TOOL_HANDLERS = {
    "web_search": web_search,
    "terminal": terminal_tool,
    "browser_navigate": browser_navigate,
    # ...
 }
 ```
 ## Toolsets
 Tools are grouped into **toolsets** for logical organization (see `toolsets.py`):
 ```python
 TOOLSETS = {
    "web": {
        "description": "Web search and content extraction",
        "tools": ["web_search", "web_extract", "web_crawl"]
    },
    "terminal": {
        "description": "Command execution",
        "tools": ["terminal"]
    },
    # ...
 }
 ```
 ## Adding a New Tool
 1. Create handler function in `tools/your_tool.py`
 2. Define JSON schema following OpenAI format
 3. Register in `model_tools.py` (schemas and handlers)
 4. Add to appropriate toolset in `toolsets.py`
 5. Update `tools/__init__.py` exports
 ## Stateful Tools
 Some tools maintain state across calls within a session:
 - **Terminal**: Keeps container/sandbox running between commands
 - **Browser**: Maintains browser session for multi-step navigation
 State is managed per `task_id` and cleaned up automatically.
--- a/package.json
+++ b/package.json
@ -2,27 +2,23 @@
  "name": "hermes-agent",
  "version": "1.0.0",
  "description": "An AI agent with advanced tool-calling capabilities, featuring a flexible toolsets system for organizing and managing tools.",
-  "main": "index.js",
+  "private": true,
  "directories": {
    "doc": "docs",
    "example": "examples",
    "test": "tests"
  },
  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
+    "postinstall": "echo '✅ Browser tools ready. Run: python run_agent.py --help'"
  },
  "repository": {
    "type": "git",
    "url": "git+https://github.com/NousResearch/Hermes-Agent.git"
  },
-  "keywords": [],
+  "license": "MIT",
  "author": "",
  "license": "ISC",
  "bugs": {
    "url": "https://github.com/NousResearch/Hermes-Agent/issues"
  },
  "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
  "dependencies": {
    "agent-browser": "^0.7.6"
  },
  "engines": {
    "node": ">=18.0.0"
  }
 }
--- a/pyproject.toml
+++ b/pyproject.toml
@ -8,21 +8,38 @@ version = "0.1.0"
 description = "AI agent with advanced tool-calling and toolsets"
 readme = "README.md"
 requires-python = ">=3.10"
-authors = [{ name = "Hermes Agent" }]
+authors = [{ name = "Nous Research" }]
 license = { text = "MIT" }
 dependencies = [
-  "firecrawl-py",
+  # Core
  "openai",
  "fal-client",
  "python-dotenv",
-  "fire"
+  "fire",
  "httpx",
  "rich",
  "tenacity",
  "pyyaml",
  "requests",
  "jinja2",
  "pydantic>=2.0",
  # Tools
  "firecrawl-py",
  "fal-client",
  # mini-swe-agent deps (terminal tool)
  "litellm>=1.75.5",
  "typer",
  "platformdirs",
 ]
 [project.optional-dependencies]
 modal = ["modal", "boto3"]
 dev = ["pytest", "pytest-asyncio"]
 [project.scripts]
 hermes-agent = "run_agent:main"
 [tool.setuptools]
-py-modules = ["run_agent", "model_tools", "toolsets"]
+py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions"]
 [tool.setuptools.packages.find]
 include = ["tools"]