mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
- Default Modal behavior unchanged - New pooled mode reuses warm Modal environments across tasks - Per-task working directory (/root/hermes_tasks/<task_id>) with best-effort cleanup - Minimal integration: terminal_tool _create_environment now accepts task_id and selects pooled modal wrapper when configured
128 lines
4.1 KiB
Python
128 lines
4.1 KiB
Python
"""Modal sandbox pooling backend for terminal_tool.
|
|
|
|
This module provides an OPTIONAL pooled Modal backend that is compatible with
|
|
Hermes-Agent's existing terminal_tool interface.
|
|
|
|
Goals:
|
|
- Keep the default Modal path unchanged.
|
|
- Allow switching to pooled behavior with minimal friction:
|
|
TERMINAL_ENV=modal
|
|
TERMINAL_MODAL_MODE=pool
|
|
|
|
Design:
|
|
- Pool stores warm `_ModalEnvironment` instances (each wraps a live ModalDeployment).
|
|
- Each task acquires one environment exclusively, uses a task-specific working dir,
|
|
then releases it back to the pool.
|
|
- Release attempts to remove the task working directory to reduce cross-task leakage.
|
|
|
|
NOTE: This is intentionally conservative and self-contained. It does not change
|
|
any tool schemas or model-facing behavior.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import threading
|
|
import time
|
|
import uuid
|
|
from dataclasses import dataclass
|
|
from typing import Optional
|
|
|
|
|
|
@dataclass
|
|
class _PooledEnv:
|
|
env: object
|
|
created_at: float
|
|
|
|
|
|
class ModalEnvPool:
|
|
"""Thread-safe pool of warm Modal environments."""
|
|
|
|
def __init__(self, max_size: int = 4):
|
|
self.max_size = max_size
|
|
self._lock = threading.Lock()
|
|
self._cond = threading.Condition(self._lock)
|
|
self._idle: list[_PooledEnv] = []
|
|
self._total = 0
|
|
|
|
def acquire(self, create_fn, *, wait_s: int = 300):
|
|
deadline = time.time() + wait_s
|
|
with self._cond:
|
|
while True:
|
|
if self._idle:
|
|
return self._idle.pop().env
|
|
|
|
if self._total < self.max_size:
|
|
self._total += 1
|
|
break
|
|
|
|
remaining = deadline - time.time()
|
|
if remaining <= 0:
|
|
# As a last resort, allow temporary oversubscription.
|
|
self._total += 1
|
|
break
|
|
|
|
self._cond.wait(timeout=min(5, remaining))
|
|
|
|
# Create outside lock (slow)
|
|
try:
|
|
return create_fn()
|
|
except Exception:
|
|
# Roll back count if create fails
|
|
with self._cond:
|
|
self._total -= 1
|
|
self._cond.notify()
|
|
raise
|
|
|
|
def release(self, env: object):
|
|
with self._cond:
|
|
self._idle.append(_PooledEnv(env=env, created_at=time.time()))
|
|
self._cond.notify()
|
|
|
|
|
|
# Global pool (process-level)
|
|
_global_pool: Optional[ModalEnvPool] = None
|
|
|
|
|
|
def get_global_pool() -> ModalEnvPool:
|
|
global _global_pool
|
|
if _global_pool is None:
|
|
max_size = int(os.getenv("TERMINAL_MODAL_POOL_MAX", os.getenv("TERMINAL_MODAL_POOL_SIZE", "4")))
|
|
_global_pool = ModalEnvPool(max_size=max_size)
|
|
return _global_pool
|
|
|
|
|
|
class ModalPooledTaskEnvironment:
|
|
"""Per-task environment wrapper that leases a pooled Modal env."""
|
|
|
|
def __init__(self, *, inner, base_cwd: str, timeout: int, task_id: str):
|
|
self._inner = inner
|
|
self.timeout = timeout
|
|
self.task_id = task_id or str(uuid.uuid4())
|
|
self.base_cwd = base_cwd.rstrip("/") or "/root"
|
|
self.cwd = f"{self.base_cwd}/hermes_tasks/{self.task_id}"
|
|
|
|
# Ensure workdir exists and is empty-ish
|
|
self._inner.execute(f"mkdir -p {self.cwd} && rm -rf {self.cwd}/*", cwd="/", timeout=60)
|
|
|
|
@classmethod
|
|
def acquire(cls, *, image: str, base_cwd: str, timeout: int, task_id: str, create_modal_env_fn):
|
|
pool = get_global_pool()
|
|
inner = pool.acquire(create_modal_env_fn)
|
|
return cls(inner=inner, base_cwd=base_cwd, timeout=timeout, task_id=task_id)
|
|
|
|
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict:
|
|
# Always execute in the task workdir unless an explicit cwd is given.
|
|
workdir = cwd or self.cwd
|
|
return self._inner.execute(command, cwd=workdir, timeout=timeout or self.timeout)
|
|
|
|
def cleanup(self):
|
|
# Best-effort cleanup of task directory, then return to pool
|
|
try:
|
|
self._inner.execute(f"rm -rf {self.cwd}", cwd="/", timeout=60)
|
|
except Exception:
|
|
pass
|
|
get_global_pool().release(self._inner)
|
|
|
|
def stop(self):
|
|
self.cleanup()
|