get tokenizer from .env

This commit is contained in:
Shannon Sands 2026-02-03 14:50:37 +10:00
parent b5b1fef20a
commit 30221d8c20
8 changed files with 97 additions and 6 deletions

View file

@ -25,6 +25,7 @@ HERMES_BACKEND=openai
# llama.cpp example (see `Hermes-Agent/scripts/launch_llama_cpp_hermes_4_36b.sh`): # llama.cpp example (see `Hermes-Agent/scripts/launch_llama_cpp_hermes_4_36b.sh`):
# ATROPOS_SERVER_BASE_URL=http://127.0.0.1:8080 # ATROPOS_SERVER_BASE_URL=http://127.0.0.1:8080
# ATROPOS_SERVER_MODEL=hermes-4-36b # ATROPOS_SERVER_MODEL=hermes-4-36b
# ATROPOS_TOKENIZER_NAME=NousResearch/Hermes-4.3-36B
# ATROPOS_SERVER_API_KEY=local # ATROPOS_SERVER_API_KEY=local
# #
# Generic OpenAI-compatible (base URL should include /v1): # Generic OpenAI-compatible (base URL should include /v1):

View file

@ -23,6 +23,7 @@ from ..slots import SlotPool, SlotPoolConfig
from ..tools import ToolRegistry, build_tool_registry from ..tools import ToolRegistry, build_tool_registry
from ..tools.tool_executor import ToolExecutor, ToolExecutorConfig from ..tools.tool_executor import ToolExecutor, ToolExecutorConfig
# Main BaseEnv child classes. Child class THESE to get agent+tooling functionality easily.
class AgentEnvConfig(BaseEnvConfig): class AgentEnvConfig(BaseEnvConfig):
tool_pool_mode: str = Field(default="nomad", description="Tool execution backend (only 'nomad' is supported)") tool_pool_mode: str = Field(default="nomad", description="Tool execution backend (only 'nomad' is supported)")

View file

@ -55,6 +55,7 @@ class HermesCompatTestEnvConfig(AgentEnvConfig):
description="Base URL for an OpenAI-compatible chat server (without /v1).", description="Base URL for an OpenAI-compatible chat server (without /v1).",
) )
server_model: str = Field(default="hermes-4-36b", description="Model name") server_model: str = Field(default="hermes-4-36b", description="Model name")
tokenizer_name: str = Field(default="NousResearch/Hermes-4.3-36B", description="Tokenizer name for RL tokenization")
class HermesCompatTestEnv(AgentEnv[HermesCompatTestEnvConfig]): class HermesCompatTestEnv(AgentEnv[HermesCompatTestEnvConfig]):
@ -83,7 +84,7 @@ class HermesCompatTestEnv(AgentEnv[HermesCompatTestEnvConfig]):
api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local" api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local"
env_config = HermesCompatTestEnvConfig( env_config = HermesCompatTestEnvConfig(
tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct", # tokenization only tokenizer_name=os.getenv("ATROPOS_TOKENIZER_NAME") or "NousResearch/Hermes-4.3-36B",
group_size=1, group_size=1,
use_wandb=False, use_wandb=False,
include_messages=True, include_messages=True,

View file

@ -56,6 +56,7 @@ class SandboxTerminalSmokeEnvConfig(AgentEnvConfig):
description="Base URL for an OpenAI-compatible chat server (without /v1).", description="Base URL for an OpenAI-compatible chat server (without /v1).",
) )
server_model: str = Field(default="hermes-4-36b", description="Model name") server_model: str = Field(default="hermes-4-36b", description="Model name")
tokenizer_name: str = Field(default="NousResearch/Hermes-4.3-36B", description="Tokenizer name for RL tokenization")
class SandboxTerminalSmokeEnv(AgentEnv[SandboxTerminalSmokeEnvConfig]): class SandboxTerminalSmokeEnv(AgentEnv[SandboxTerminalSmokeEnvConfig]):
@ -84,7 +85,7 @@ class SandboxTerminalSmokeEnv(AgentEnv[SandboxTerminalSmokeEnvConfig]):
api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local" api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local"
env_config = SandboxTerminalSmokeEnvConfig( env_config = SandboxTerminalSmokeEnvConfig(
tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct", # tokenization only tokenizer_name=os.getenv("ATROPOS_TOKENIZER_NAME") or "NousResearch/Hermes-4.3-36B",
group_size=1, group_size=1,
use_wandb=False, use_wandb=False,
include_messages=True, include_messages=True,

View file

@ -41,6 +41,9 @@ class SweSmithOracleEnvConfig(AgentEnvConfig):
install_timeout_s: float = Field(default=600.0) install_timeout_s: float = Field(default=600.0)
test_timeout_s: float = Field(default=600.0) test_timeout_s: float = Field(default=600.0)
# Tokenization: should match the model used for training.
tokenizer_name: str = Field(default="NousResearch/Hermes-4.3-36B", description="Tokenizer name for RL tokenization")
class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]): class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
""" """
@ -78,7 +81,7 @@ class SweSmithOracleEnv(AgentEnv[SweSmithOracleEnvConfig]):
api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local" api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local"
env_config = SweSmithOracleEnvConfig( env_config = SweSmithOracleEnvConfig(
tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct", # tokenization only tokenizer_name=os.getenv("ATROPOS_TOKENIZER_NAME") or "NousResearch/Hermes-4.3-36B",
group_size=1, group_size=1,
use_wandb=False, use_wandb=False,
rollout_server_url="http://localhost:8000", rollout_server_url="http://localhost:8000",

View file

@ -68,6 +68,7 @@ class SimpleTestEnvConfig(AgentEnvConfig):
default="hermes-4-36b", default="hermes-4-36b",
description="Model name", description="Model name",
) )
tokenizer_name: str = Field(default="NousResearch/Hermes-4.3-36B", description="Tokenizer name for RL tokenization")
class SimpleTestEnv(AgentEnv[SimpleTestEnvConfig]): class SimpleTestEnv(AgentEnv[SimpleTestEnvConfig]):
@ -108,7 +109,7 @@ class SimpleTestEnv(AgentEnv[SimpleTestEnvConfig]):
api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local" api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local"
env_config = SimpleTestEnvConfig( env_config = SimpleTestEnvConfig(
tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct", # For tokenization only tokenizer_name=os.getenv("ATROPOS_TOKENIZER_NAME") or "NousResearch/Hermes-4.3-36B",
group_size=4, group_size=4,
use_wandb=False, # Disable wandb for simple testing use_wandb=False, # Disable wandb for simple testing
rollout_server_url="http://localhost:8000", rollout_server_url="http://localhost:8000",

View file

@ -34,6 +34,7 @@ class ToolServerSmokeEnvConfig(AgentEnvConfig):
description="Base URL for an OpenAI-compatible chat server (without /v1).", description="Base URL for an OpenAI-compatible chat server (without /v1).",
) )
server_model: str = Field(default="hermes-4-36b", description="Model name") server_model: str = Field(default="hermes-4-36b", description="Model name")
tokenizer_name: str = Field(default="NousResearch/Hermes-4.3-36B", description="Tokenizer name for RL tokenization")
class ToolServerSmokeEnv(AgentEnv[ToolServerSmokeEnvConfig]): class ToolServerSmokeEnv(AgentEnv[ToolServerSmokeEnvConfig]):
@ -62,7 +63,7 @@ class ToolServerSmokeEnv(AgentEnv[ToolServerSmokeEnvConfig]):
api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local" api_key = os.getenv("ATROPOS_SERVER_API_KEY") or os.getenv("OPENAI_API_KEY") or "local"
env_config = ToolServerSmokeEnvConfig( env_config = ToolServerSmokeEnvConfig(
tokenizer_name="Qwen/Qwen2.5-1.5B-Instruct", # tokenization only tokenizer_name=os.getenv("ATROPOS_TOKENIZER_NAME") or "NousResearch/Hermes-4.3-36B",
group_size=1, group_size=1,
use_wandb=False, use_wandb=False,
include_messages=True, include_messages=True,

View file

@ -10,7 +10,10 @@ The SlotPool is the core abstraction for slot-based multiplexing:
import asyncio import asyncio
import logging import logging
import os
import subprocess
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
from ..nomad.client import ( from ..nomad.client import (
@ -53,6 +56,11 @@ class SlotPoolConfig:
# Job lifecycle # Job lifecycle
purge_job_on_start: bool = False # Purge any pre-existing job before starting (local dev/training friendly) purge_job_on_start: bool = False # Purge any pre-existing job before starting (local dev/training friendly)
# Local Docker image convenience (macOS/Nomad dev mode)
auto_build_local_image: bool = True # If image endswith :local and is missing, build it from the bundled Dockerfile.
dockerfile_path: Optional[str] = None # Override Dockerfile path (default: Hermes-Agent/atropos/Dockerfile).
docker_build_context: Optional[str] = None # Override build context (default: Hermes-Agent/atropos).
class SlotPool: class SlotPool:
""" """
@ -108,7 +116,77 @@ class SlotPool:
self._health_task: Optional[asyncio.Task] = None self._health_task: Optional[asyncio.Task] = None
self._scale_task: Optional[asyncio.Task] = None self._scale_task: Optional[asyncio.Task] = None
self._last_scale_time = 0.0 self._last_scale_time = 0.0
def _default_dockerfile_path(self) -> Path:
# Hermes-Agent/atropos/Dockerfile lives next to this module in source checkouts.
return Path(__file__).resolve().parents[1] / "Dockerfile"
def _default_build_context(self) -> Path:
return Path(__file__).resolve().parents[1]
def _docker_image_exists(self, image: str) -> bool:
try:
proc = subprocess.run(
["docker", "image", "inspect", image],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
env={**os.environ, "DOCKER_CLI_HINTS": "false"},
)
return proc.returncode == 0
except FileNotFoundError:
return False
def _try_build_local_image(self, image: str) -> None:
dockerfile = Path(self.config.dockerfile_path) if self.config.dockerfile_path else self._default_dockerfile_path()
context = Path(self.config.docker_build_context) if self.config.docker_build_context else self._default_build_context()
if not dockerfile.exists():
raise RuntimeError(
f"Sandbox Dockerfile not found at {dockerfile}. "
"Build the sandbox image manually or set --env.purge_job_on_start false and provide a non-local image."
)
if not context.exists():
raise RuntimeError(f"Docker build context not found at {context}")
# Prefer buildx+--load to ensure the image ends up in the local daemon (required by Nomad's docker driver).
buildx_cmd = [
"docker",
"buildx",
"build",
"--load",
"-t",
image,
"-f",
str(dockerfile),
str(context),
]
proc = subprocess.run(buildx_cmd, check=False, env={**os.environ, "DOCKER_CLI_HINTS": "false"})
if proc.returncode == 0:
return
# Fallback to classic docker build if buildx isn't available.
build_cmd = ["docker", "build", "-t", image, "-f", str(dockerfile), str(context)]
proc2 = subprocess.run(build_cmd, check=False, env={**os.environ, "DOCKER_CLI_HINTS": "false"})
if proc2.returncode != 0:
raise RuntimeError(
f"Failed to build local sandbox image {image}. "
f"Tried: {' '.join(buildx_cmd)} and {' '.join(build_cmd)}"
)
def _ensure_local_image(self) -> None:
image = (self.config.image or "").strip()
if not image.endswith(":local"):
return
if not self.config.auto_build_local_image:
return
if self._docker_image_exists(image):
return
logger.info(f"Local sandbox image {image} not found; building it now...")
self._try_build_local_image(image)
def _slot_key(self, alloc_id: str, slot_id: str) -> str: def _slot_key(self, alloc_id: str, slot_id: str) -> str:
"""Generate unique key for a slot.""" """Generate unique key for a slot."""
return f"{alloc_id}:{slot_id}" return f"{alloc_id}:{slot_id}"
@ -143,6 +221,10 @@ class SlotPool:
logger.info(f"Starting SlotPool (job_id={self.config.job_id})") logger.info(f"Starting SlotPool (job_id={self.config.job_id})")
try: try:
# Make sure local sandbox images exist before Nomad tries to pull them.
# This is a common footgun in macOS dev mode with :local tags.
self._ensure_local_image()
# Check Nomad health # Check Nomad health
if not await self.nomad.is_healthy(): if not await self.nomad.is_healthy():
raise RuntimeError(f"Nomad is not reachable at {self.config.nomad_address}") raise RuntimeError(f"Nomad is not reachable at {self.config.nomad_address}")