mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
Add sandbox pool support to HermesAgentBaseEnv
Added directly to HermesAgentBaseEnv (no subclass needed): Config fields: - tool_pool_mode: 'default' (terminal tool), 'nomad', or 'modal' - Full Nomad settings: nomad_address, sandbox_job_id, slots_per_container, etc. - Full Modal settings: modal_image, modal_gpu, modal_slots_per_sandbox, etc. - Shared: allow_network, require_sandbox, purge_job_on_start/shutdown Methods: - _start_sandbox_backend() / _stop_sandbox_backend() - lifecycle - setup_trajectory_workspace() - optional hook for workspace prep - verify_and_score_trajectory() - optional hook for in-sandbox verification - env_manager() / process_manager() - lifecycle cleanup When tool_pool_mode='default': everything works as before (terminal tool) When tool_pool_mode='nomad'/'modal': activates sandbox pool from atropos/backends/
This commit is contained in:
parent
507b77c4ac
commit
98d945f6de
2 changed files with 197 additions and 88 deletions
|
|
@ -140,6 +140,48 @@ class HermesAgentEnvConfig(BaseEnvConfig):
|
||||||
"Options: hermes, mistral, llama3_json, qwen, deepseek_v3, etc.",
|
"Options: hermes, mistral, llama3_json, qwen, deepseek_v3, etc.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# --- Sandbox pool mode (optional, for scaled environments) ---
|
||||||
|
tool_pool_mode: str = Field(
|
||||||
|
default="default",
|
||||||
|
description="Tool execution mode: 'default' (terminal tool per task_id), "
|
||||||
|
"'nomad' (slot pool via Nomad/Docker/Singularity), or 'modal' (Modal sandbox pool).",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sandbox pool: shared settings
|
||||||
|
allow_network: bool = Field(default=True, description="Whether sandbox bash commands may access the network.")
|
||||||
|
require_sandbox: bool = Field(default=False, description="Fail closed if bubblewrap is unavailable.")
|
||||||
|
purge_job_on_start: bool = Field(default=False, description="Purge existing sandbox job on startup.")
|
||||||
|
purge_job_on_shutdown: bool = Field(default=True, description="Purge sandbox job on shutdown.")
|
||||||
|
acquire_timeout_s: float = Field(default=30.0, description="Slot acquisition timeout (seconds).")
|
||||||
|
|
||||||
|
# Sandbox pool: Nomad settings
|
||||||
|
nomad_address: str = Field(default="http://localhost:4646", description="Nomad API address.")
|
||||||
|
sandbox_job_id: str = Field(default="atropos-sandbox", description="Nomad job id for sandbox containers.")
|
||||||
|
sandbox_image: str = Field(default="atropos-sandbox:local", description="Docker image for sandbox containers.")
|
||||||
|
slots_per_container: int = Field(default=10, description="Nomad: slots per container.")
|
||||||
|
min_containers: int = Field(default=1, description="Nomad: minimum containers.")
|
||||||
|
max_containers: int = Field(default=10, description="Nomad: maximum containers.")
|
||||||
|
privileged: bool = Field(default=False, description="Nomad: run container privileged.")
|
||||||
|
driver: str = Field(default="docker", description="Nomad task driver: 'docker' or 'singularity'.")
|
||||||
|
singularity_image: Optional[str] = Field(default=None, description="Path to .sif file for Singularity driver.")
|
||||||
|
|
||||||
|
# Sandbox pool: Modal settings
|
||||||
|
modal_app_name: str = Field(default="atropos-sandbox", description="Modal app name prefix.")
|
||||||
|
modal_image: str = Field(default="python:3.11", description="Modal: container image.")
|
||||||
|
modal_gpu: Optional[str] = Field(default=None, description="Modal: GPU type (None, 'T4', 'A10G', 'A100', 'H100').")
|
||||||
|
modal_cpu: float = Field(default=1.0, description="Modal: CPU cores.")
|
||||||
|
modal_memory: int = Field(default=2048, description="Modal: memory in MB.")
|
||||||
|
modal_slots_per_sandbox: int = Field(default=10, description="Modal: slots per sandbox.")
|
||||||
|
modal_min_sandboxes: int = Field(default=1, description="Modal: minimum sandboxes.")
|
||||||
|
modal_max_sandboxes: int = Field(default=5, description="Modal: maximum sandboxes.")
|
||||||
|
modal_idle_timeout: int = Field(default=120, description="Modal: idle timeout (seconds).")
|
||||||
|
modal_max_lifetime: int = Field(default=3600, description="Modal: max sandbox lifetime (seconds).")
|
||||||
|
modal_acquire_timeout: float = Field(default=60.0, description="Modal: slot acquisition timeout (seconds).")
|
||||||
|
modal_execution_timeout: float = Field(default=30.0, description="Modal: command execution timeout (seconds).")
|
||||||
|
modal_secrets: str = Field(default="", description="Modal: comma-separated Modal Secret names.")
|
||||||
|
modal_env_vars: str = Field(default="", description="Modal: semicolon-separated KEY=VALUE pairs.")
|
||||||
|
modal_workspace_base: str = Field(default="/data", description="Modal: workspace base directory.")
|
||||||
|
|
||||||
|
|
||||||
class HermesAgentBaseEnv(BaseEnv):
|
class HermesAgentBaseEnv(BaseEnv):
|
||||||
"""
|
"""
|
||||||
|
|
@ -186,6 +228,9 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||||
# Tool error tracking for wandb logging
|
# Tool error tracking for wandb logging
|
||||||
self._tool_error_buffer: List[Dict[str, Any]] = []
|
self._tool_error_buffer: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
# Sandbox pool backend (only used when tool_pool_mode != "default")
|
||||||
|
self._sandbox_backend = None
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Toolset resolution (per-group)
|
# Toolset resolution (per-group)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
@ -242,6 +287,114 @@ class HermesAgentBaseEnv(BaseEnv):
|
||||||
from atroposlib.envs.server_handling.openai_server import OpenAIServer
|
from atroposlib.envs.server_handling.openai_server import OpenAIServer
|
||||||
return not isinstance(server, OpenAIServer)
|
return not isinstance(server, OpenAIServer)
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Sandbox pool backend (tool_pool_mode != "default")
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
async def _start_sandbox_backend(self) -> None:
|
||||||
|
"""Start the sandbox pool backend if tool_pool_mode is not 'default'."""
|
||||||
|
if self.config.tool_pool_mode == "default":
|
||||||
|
return
|
||||||
|
|
||||||
|
from atropos.backends import create_tool_backend
|
||||||
|
logger.info("Starting sandbox backend (mode=%s)", self.config.tool_pool_mode)
|
||||||
|
self._sandbox_backend = create_tool_backend(self.config)
|
||||||
|
await self._sandbox_backend.start()
|
||||||
|
logger.info("Sandbox backend started")
|
||||||
|
|
||||||
|
async def _stop_sandbox_backend(self) -> None:
|
||||||
|
"""Stop the sandbox pool backend."""
|
||||||
|
if self._sandbox_backend is not None:
|
||||||
|
logger.info("Stopping sandbox backend")
|
||||||
|
await self._sandbox_backend.stop(
|
||||||
|
purge=bool(self.config.purge_job_on_shutdown)
|
||||||
|
)
|
||||||
|
self._sandbox_backend = None
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Optional hooks for sandbox environments
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
async def setup_trajectory_workspace(
|
||||||
|
self,
|
||||||
|
item: Item,
|
||||||
|
*,
|
||||||
|
trajectory_id: str,
|
||||||
|
exec_tool,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Optional hook: prepare the sandbox workspace before the agent starts.
|
||||||
|
|
||||||
|
Override in subclasses for environments that need workspace setup
|
||||||
|
(e.g., git clone, worktree creation, dependency installation).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
item: The dataset item being rolled out
|
||||||
|
trajectory_id: Unique ID for this trajectory
|
||||||
|
exec_tool: Callable to execute tool calls in the sandbox
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict of workspace metadata (passed to verify_and_score_trajectory)
|
||||||
|
"""
|
||||||
|
return {}
|
||||||
|
|
||||||
|
async def verify_and_score_trajectory(
|
||||||
|
self,
|
||||||
|
item: Item,
|
||||||
|
result: AgentResult,
|
||||||
|
*,
|
||||||
|
trajectory_id: str,
|
||||||
|
exec_tool,
|
||||||
|
workspace_meta: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> Tuple[float, Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Optional hook: run in-sandbox verification before scoring.
|
||||||
|
|
||||||
|
Override in subclasses for environments that need to verify results
|
||||||
|
inside the sandbox (e.g., run pytest, check file contents).
|
||||||
|
|
||||||
|
Default: calls compute_reward() with ToolContext.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
item: The dataset item
|
||||||
|
result: The agent's rollout result
|
||||||
|
trajectory_id: Unique ID for this trajectory
|
||||||
|
exec_tool: Callable to execute tool calls in the sandbox
|
||||||
|
workspace_meta: Metadata from setup_trajectory_workspace
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (reward, metadata_dict)
|
||||||
|
"""
|
||||||
|
ctx = ToolContext(trajectory_id)
|
||||||
|
try:
|
||||||
|
reward = await self.compute_reward(item, result, ctx)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("compute_reward failed: %s", e)
|
||||||
|
reward = 0.0
|
||||||
|
finally:
|
||||||
|
ctx.cleanup()
|
||||||
|
return reward, {}
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Lifecycle hooks for env_manager/process_manager cleanup
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
async def env_manager(self):
|
||||||
|
"""Start sandbox backend, run env, then clean up."""
|
||||||
|
await self._start_sandbox_backend()
|
||||||
|
try:
|
||||||
|
return await super().env_manager()
|
||||||
|
finally:
|
||||||
|
await self._stop_sandbox_backend()
|
||||||
|
|
||||||
|
async def process_manager(self):
|
||||||
|
"""Start sandbox backend, run process, then clean up."""
|
||||||
|
await self._start_sandbox_backend()
|
||||||
|
try:
|
||||||
|
return await super().process_manager()
|
||||||
|
finally:
|
||||||
|
await self._stop_sandbox_backend()
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Core Atropos integration
|
# Core Atropos integration
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
|
||||||
|
|
@ -1,99 +1,55 @@
|
||||||
# Active Context
|
# Active Context
|
||||||
|
|
||||||
## Current Focus
|
## Current Focus
|
||||||
Consolidating the two Atropos environment systems and fixing tool calling to use proper OpenAI-spec approach instead of ICL.
|
Adding sandbox pool support directly to `HermesAgentBaseEnv` so that `tool_pool_mode=modal/nomad` works alongside the default terminal-tool approach.
|
||||||
|
|
||||||
## PR Feedback from Lead Dev (Feb 10, 2026)
|
## Implementation Plan (Feb 10, 2026)
|
||||||
|
|
||||||
The PR was rejected because our approach has three fundamental issues:
|
### Goal
|
||||||
|
The command should work:
|
||||||
### Issue 1: ManagedServer doesn't pass `tools={}` to `apply_chat_template()`
|
```bash
|
||||||
- When using Phase 2 (VLLM/SGLang for RL training), `ManagedServer` needs to pass tools to `tokenizer.apply_chat_template(tools=...)`
|
python environments/swe_smith_oracle_env.py process \
|
||||||
- This makes the system prompt include tool definitions the way models were trained to expect
|
--env.tool_pool_mode modal \
|
||||||
- **Fix**: Atropos PR #366 adds `tool_call_parser` support to ManagedServer (branch: `tool_call_support`)
|
--env.modal_image python:3.11
|
||||||
|
|
||||||
### Issue 2: ICL prompt vs proper tool calling
|
|
||||||
- Our code embeds tools as XML in the system prompt (`<tools>...</tools>`)
|
|
||||||
- Proper approach: pass `tools=` parameter in `chat_completion()` calls and let the tokenizer's chat template handle formatting
|
|
||||||
- All Hermes datasets train on the proper format, not ICL
|
|
||||||
|
|
||||||
### Issue 3: Only Hermes `<tool_call>` parser, no multi-model support
|
|
||||||
- Our code only handles Hermes-style `<tool_call>` XML parsing
|
|
||||||
- Proper approach: parser registry supporting 11+ model families (hermes, qwen, deepseek, llama, mistral, etc.)
|
|
||||||
|
|
||||||
## Architecture: What Exists Now (Two Parallel Systems)
|
|
||||||
|
|
||||||
### `environments/` (Teknium's proper approach) ✅ CORRECT
|
|
||||||
```
|
|
||||||
environments/
|
|
||||||
├── agent_loop.py ← Uses tools= in chat_completion() (OpenAI spec)
|
|
||||||
├── hermes_base_env.py ← Phase 1 (OpenAI) + Phase 2 (ManagedServer + parser)
|
|
||||||
├── tool_context.py ← ToolContext for reward functions
|
|
||||||
├── tool_call_parsers/ ← 11 model parsers (hermes, qwen, deepseek, llama, etc.)
|
|
||||||
│ ├── __init__.py ← Registry with get_parser(), register_parser()
|
|
||||||
│ ├── hermes_parser.py
|
|
||||||
│ ├── qwen_parser.py
|
|
||||||
│ ├── deepseek_v3_parser.py
|
|
||||||
│ ├── llama_parser.py
|
|
||||||
│ ├── mistral_parser.py
|
|
||||||
│ └── ... (11 total)
|
|
||||||
├── terminal_test_env.py ← Working example: file creation tasks
|
|
||||||
├── hermes_swe_env.py ← SWE environment
|
|
||||||
└── patches.py ← Async-safe monkey patches
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**How it works correctly:**
|
### Changes to `environments/hermes_base_env.py`:
|
||||||
1. `HermesAgentLoop.run()` passes `tools=self.tool_schemas` to `chat_completion()`
|
|
||||||
2. ManagedServer passes tools to `tokenizer.apply_chat_template(tools=...)`
|
|
||||||
3. Parser registry reconstructs `tool_calls` from raw model output
|
|
||||||
4. Tool execution uses hermes-agent's `handle_function_call()` from `model_tools.py`
|
|
||||||
|
|
||||||
### `atropos/` (Our sandbox-optimized code) - PARTIALLY REDUNDANT
|
**1. Add config fields to `HermesAgentEnvConfig`:**
|
||||||
```
|
- `tool_pool_mode: str = "default"` — "default" (terminal tool), "nomad", or "modal"
|
||||||
atropos/
|
- Nomad fields: `nomad_address`, `sandbox_job_id`, `sandbox_image`, `slots_per_container`, etc.
|
||||||
├── agent/atropos_agent.py ← ICL-based agent (REDUNDANT with agent_loop.py)
|
- Modal fields: `modal_app_name`, `modal_image`, `modal_gpu`, `modal_slots_per_sandbox`, etc.
|
||||||
├── envs/agent_env.py ← Environment with sandbox backends (PARTIALLY REDUNDANT)
|
- Shared: `allow_network`, `require_sandbox`, `purge_job_on_start`, `purge_job_on_shutdown`
|
||||||
├── envs/swe_smith_oracle_env.py ← SWE env using sandbox (KEEP - port to new base)
|
|
||||||
├── backends/ ← Sandbox backends (KEEP - valuable infrastructure)
|
**2. Add methods to `HermesAgentBaseEnv`:**
|
||||||
│ ├── modal_backend.py ← Modal sandbox pool
|
- `_start_sandbox_backend()` / `_stop_sandbox_backend()` — lifecycle management
|
||||||
│ ├── nomad_backend.py ← Nomad/Docker/Singularity
|
- `setup_trajectory_workspace(item, exec_tool, trajectory_id)` → optional hook (no-op default)
|
||||||
│ └── base.py ← ToolBackend protocol
|
- `verify_and_score_trajectory(item, result, exec_tool)` → optional hook (calls compute_reward by default)
|
||||||
├── slots/ ← Slot multiplexing (KEEP)
|
|
||||||
├── nomad/ ← Nomad client (KEEP)
|
**3. Modify `collect_trajectory()`:**
|
||||||
├── tools/ ← Sandbox tool registry (PARTIALLY REDUNDANT)
|
- When `tool_pool_mode == "default"`: existing behavior (terminal tool handles isolation)
|
||||||
└── sandbox_server.py ← HTTP server in containers (KEEP)
|
- When `tool_pool_mode in ("nomad", "modal")`: acquire slot → run agent with sandbox-backed tools → verify → release
|
||||||
|
|
||||||
|
**4. Port SWE env to `environments/`:**
|
||||||
|
- Move/rewrite `swe_smith_oracle_env.py` to subclass `HermesAgentBaseEnv`
|
||||||
|
- Override `setup_trajectory_workspace()` (git clone/worktree)
|
||||||
|
- Override `verify_and_score_trajectory()` (pytest verification)
|
||||||
|
|
||||||
|
### Key Imports
|
||||||
|
```python
|
||||||
|
from atropos.backends import create_tool_backend # Nomad/Modal backends
|
||||||
|
from atropos.backends.base import ToolBackend
|
||||||
|
from atropos.slots.executor import ExecutionResult
|
||||||
```
|
```
|
||||||
|
|
||||||
## Plan: Consolidate into `environments/`
|
### What's Already Working
|
||||||
|
- ✅ atroposlib with tool_call_support (ManagedServer has tool_call_parser)
|
||||||
|
- ✅ GSM8k agent env with HermesAgentBaseEnv (Phase 1 tested, process mode)
|
||||||
|
- ✅ mini-swe-agent installed (terminal tool available)
|
||||||
|
- ✅ Modal backend (tested, working with sandboxes)
|
||||||
|
- ✅ Nomad/Singularity backends (tested, working)
|
||||||
|
- ✅ Tool call parsers (11+ models)
|
||||||
|
|
||||||
### What to KEEP from `atropos/`:
|
### What Blocks
|
||||||
- `backends/` - Modal, Nomad, Singularity backends (valuable infrastructure for scale)
|
- Tinker billing (402 error) — can't test Phase 2 training yet
|
||||||
- `slots/` - Slot multiplexing
|
- No VLLM on this machine — can't test ManagedServer locally
|
||||||
- `nomad/` - Nomad client
|
|
||||||
- `sandbox_server.py` - Container HTTP server
|
|
||||||
- `Dockerfile` - Sandbox container image
|
|
||||||
|
|
||||||
### What to REMOVE/REPLACE:
|
|
||||||
- `atropos/agent/atropos_agent.py` → replaced by `environments/agent_loop.py`
|
|
||||||
- `atropos/envs/agent_env.py` → functionality merged into `environments/hermes_base_env.py`
|
|
||||||
- `atropos/tools/` → replaced by `model_tools.py` + `tools/` (hermes-agent's standard tools)
|
|
||||||
|
|
||||||
### What to CREATE:
|
|
||||||
- `environments/gsm8k_agent_env.py` → GSM8k with tool calling, subclasses `HermesAgentBaseEnv`
|
|
||||||
- Update `environments/hermes_base_env.py` to optionally use sandbox backends (Nomad/Modal) for terminal isolation when needed for scale
|
|
||||||
|
|
||||||
### Steps:
|
|
||||||
1. Install atropos `tool_call_support` branch (PR #366)
|
|
||||||
2. Create `environments/gsm8k_agent_env.py` using `HermesAgentBaseEnv`
|
|
||||||
3. Port `swe_smith_oracle_env.py` to use `HermesAgentBaseEnv`
|
|
||||||
4. Make sandbox backends accessible from `HermesAgentBaseEnv` (terminal_backend config)
|
|
||||||
5. Remove redundant `atropos/agent/` and `atropos/envs/agent_env.py`
|
|
||||||
6. Clean up `atropos/tools/` (keep only sandbox-specific tools)
|
|
||||||
7. Update tinker-atropos gsm8k env to use proper base class
|
|
||||||
8. Test everything end-to-end
|
|
||||||
|
|
||||||
## Previous Completed Work
|
|
||||||
- Modal backend integration (Feb 8) - KEEP backends, update integration point
|
|
||||||
- Main branch merge (Feb 9) - completed
|
|
||||||
- Singularity/Apptainer (Feb 6) - KEEP
|
|
||||||
- Memory Bank initialized (Feb 5)
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue