hermes-agent/tools/modal_pool.py
Shannon Sands 6ff573fef2 Add optional Modal sandbox pooling backend (TERMINAL_MODAL_MODE=pool)
- Default Modal behavior unchanged
- New pooled mode reuses warm Modal environments across tasks
- Per-task working directory (/root/hermes_tasks/<task_id>) with best-effort cleanup
- Minimal integration: terminal_tool _create_environment now accepts task_id and
  selects pooled modal wrapper when configured
2026-02-14 13:18:59 +10:00

128 lines
4.1 KiB
Python

"""Modal sandbox pooling backend for terminal_tool.
This module provides an OPTIONAL pooled Modal backend that is compatible with
Hermes-Agent's existing terminal_tool interface.
Goals:
- Keep the default Modal path unchanged.
- Allow switching to pooled behavior with minimal friction:
TERMINAL_ENV=modal
TERMINAL_MODAL_MODE=pool
Design:
- Pool stores warm `_ModalEnvironment` instances (each wraps a live ModalDeployment).
- Each task acquires one environment exclusively, uses a task-specific working dir,
then releases it back to the pool.
- Release attempts to remove the task working directory to reduce cross-task leakage.
NOTE: This is intentionally conservative and self-contained. It does not change
any tool schemas or model-facing behavior.
"""
from __future__ import annotations
import os
import threading
import time
import uuid
from dataclasses import dataclass
from typing import Optional
@dataclass
class _PooledEnv:
env: object
created_at: float
class ModalEnvPool:
"""Thread-safe pool of warm Modal environments."""
def __init__(self, max_size: int = 4):
self.max_size = max_size
self._lock = threading.Lock()
self._cond = threading.Condition(self._lock)
self._idle: list[_PooledEnv] = []
self._total = 0
def acquire(self, create_fn, *, wait_s: int = 300):
deadline = time.time() + wait_s
with self._cond:
while True:
if self._idle:
return self._idle.pop().env
if self._total < self.max_size:
self._total += 1
break
remaining = deadline - time.time()
if remaining <= 0:
# As a last resort, allow temporary oversubscription.
self._total += 1
break
self._cond.wait(timeout=min(5, remaining))
# Create outside lock (slow)
try:
return create_fn()
except Exception:
# Roll back count if create fails
with self._cond:
self._total -= 1
self._cond.notify()
raise
def release(self, env: object):
with self._cond:
self._idle.append(_PooledEnv(env=env, created_at=time.time()))
self._cond.notify()
# Global pool (process-level)
_global_pool: Optional[ModalEnvPool] = None
def get_global_pool() -> ModalEnvPool:
global _global_pool
if _global_pool is None:
max_size = int(os.getenv("TERMINAL_MODAL_POOL_MAX", os.getenv("TERMINAL_MODAL_POOL_SIZE", "4")))
_global_pool = ModalEnvPool(max_size=max_size)
return _global_pool
class ModalPooledTaskEnvironment:
"""Per-task environment wrapper that leases a pooled Modal env."""
def __init__(self, *, inner, base_cwd: str, timeout: int, task_id: str):
self._inner = inner
self.timeout = timeout
self.task_id = task_id or str(uuid.uuid4())
self.base_cwd = base_cwd.rstrip("/") or "/root"
self.cwd = f"{self.base_cwd}/hermes_tasks/{self.task_id}"
# Ensure workdir exists and is empty-ish
self._inner.execute(f"mkdir -p {self.cwd} && rm -rf {self.cwd}/*", cwd="/", timeout=60)
@classmethod
def acquire(cls, *, image: str, base_cwd: str, timeout: int, task_id: str, create_modal_env_fn):
pool = get_global_pool()
inner = pool.acquire(create_modal_env_fn)
return cls(inner=inner, base_cwd=base_cwd, timeout=timeout, task_id=task_id)
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict:
# Always execute in the task workdir unless an explicit cwd is given.
workdir = cwd or self.cwd
return self._inner.execute(command, cwd=workdir, timeout=timeout or self.timeout)
def cleanup(self):
# Best-effort cleanup of task directory, then return to pool
try:
self._inner.execute(f"rm -rf {self.cwd}", cwd="/", timeout=60)
except Exception:
pass
get_global_pool().release(self._inner)
def stop(self):
self.cleanup()