Add optional Modal sandbox pooling backend (TERMINAL_MODAL_MODE=pool)

- Default Modal behavior unchanged - New pooled mode reuses warm Modal environments across tasks - Per-task working directory (/root/hermes_tasks/<task_id>) with best-effort cleanup - Minimal integration: terminal_tool _create_environment now accepts task_id and selects pooled modal wrapper when configured
2026-04-27 01:11:40 +00:00 · 2026-02-14 13:18:59 +10:00 · 2026-02-14 13:18:59 +10:00 · 6ff573fef2
commit 6ff573fef2
parent ae6435f787
2 changed files with 151 additions and 2 deletions
--- a/tools/modal_pool.py
+++ b/tools/modal_pool.py
@ -0,0 +1,128 @@
+"""Modal sandbox pooling backend for terminal_tool.
+
+This module provides an OPTIONAL pooled Modal backend that is compatible with
+Hermes-Agent's existing terminal_tool interface.
+
+Goals:
+- Keep the default Modal path unchanged.
+- Allow switching to pooled behavior with minimal friction:
+    TERMINAL_ENV=modal
+    TERMINAL_MODAL_MODE=pool
+
+Design:
+- Pool stores warm `_ModalEnvironment` instances (each wraps a live ModalDeployment).
+- Each task acquires one environment exclusively, uses a task-specific working dir,
+  then releases it back to the pool.
+- Release attempts to remove the task working directory to reduce cross-task leakage.
+
+NOTE: This is intentionally conservative and self-contained. It does not change
+any tool schemas or model-facing behavior.
+"""
+
+from __future__ import annotations
+
+import os
+import threading
+import time
+import uuid
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class _PooledEnv:
+    env: object
+    created_at: float
+
+
+class ModalEnvPool:
+    """Thread-safe pool of warm Modal environments."""
+
+    def __init__(self, max_size: int = 4):
+        self.max_size = max_size
+        self._lock = threading.Lock()
+        self._cond = threading.Condition(self._lock)
+        self._idle: list[_PooledEnv] = []
+        self._total = 0
+
+    def acquire(self, create_fn, *, wait_s: int = 300):
+        deadline = time.time() + wait_s
+        with self._cond:
+            while True:
+                if self._idle:
+                    return self._idle.pop().env
+
+                if self._total < self.max_size:
+                    self._total += 1
+                    break
+
+                remaining = deadline - time.time()
+                if remaining <= 0:
+                    # As a last resort, allow temporary oversubscription.
+                    self._total += 1
+                    break
+
+                self._cond.wait(timeout=min(5, remaining))
+
+        # Create outside lock (slow)
+        try:
+            return create_fn()
+        except Exception:
+            # Roll back count if create fails
+            with self._cond:
+                self._total -= 1
+                self._cond.notify()
+            raise
+
+    def release(self, env: object):
+        with self._cond:
+            self._idle.append(_PooledEnv(env=env, created_at=time.time()))
+            self._cond.notify()
+
+
+# Global pool (process-level)
+_global_pool: Optional[ModalEnvPool] = None
+
+
+def get_global_pool() -> ModalEnvPool:
+    global _global_pool
+    if _global_pool is None:
+        max_size = int(os.getenv("TERMINAL_MODAL_POOL_MAX", os.getenv("TERMINAL_MODAL_POOL_SIZE", "4")))
+        _global_pool = ModalEnvPool(max_size=max_size)
+    return _global_pool
+
+
+class ModalPooledTaskEnvironment:
+    """Per-task environment wrapper that leases a pooled Modal env."""
+
+    def __init__(self, *, inner, base_cwd: str, timeout: int, task_id: str):
+        self._inner = inner
+        self.timeout = timeout
+        self.task_id = task_id or str(uuid.uuid4())
+        self.base_cwd = base_cwd.rstrip("/") or "/root"
+        self.cwd = f"{self.base_cwd}/hermes_tasks/{self.task_id}"
+
+        # Ensure workdir exists and is empty-ish
+        self._inner.execute(f"mkdir -p {self.cwd} && rm -rf {self.cwd}/*", cwd="/", timeout=60)
+
+    @classmethod
+    def acquire(cls, *, image: str, base_cwd: str, timeout: int, task_id: str, create_modal_env_fn):
+        pool = get_global_pool()
+        inner = pool.acquire(create_modal_env_fn)
+        return cls(inner=inner, base_cwd=base_cwd, timeout=timeout, task_id=task_id)
+
+    def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict:
+        # Always execute in the task workdir unless an explicit cwd is given.
+        workdir = cwd or self.cwd
+        return self._inner.execute(command, cwd=workdir, timeout=timeout or self.timeout)
+
+    def cleanup(self):
+        # Best-effort cleanup of task directory, then return to pool
+        try:
+            self._inner.execute(f"rm -rf {self.cwd}", cwd="/", timeout=60)
+        except Exception:
+            pass
+        get_global_pool().release(self._inner)
+
+    def stop(self):
+        self.cleanup()