feat(environments): unified spawn-per-call execution layer

Replace dual execution model (PersistentShellMixin + per-backend oneshot) with spawn-per-call + session snapshot for all backends except ManagedModal. Core changes: - Every command spawns a fresh bash process; session snapshot (env vars, functions, aliases) captured at init and re-sourced before each command - CWD persists via file-based read (local) or in-band stdout markers (remote) - ProcessHandle protocol + _ThreadedProcessHandle adapter for SDK backends - cancel_fn wired for Modal (sandbox.terminate) and Daytona (sandbox.stop) - Shared utilities extracted: _pipe_stdin, _popen_bash, _load_json_store, _save_json_store, _file_mtime_key, _SYNC_INTERVAL_SECONDS - Rate-limited file sync unified in base _before_execute() with _sync_files() hook - execute_oneshot() removed; all 11 call sites in code_execution_tool.py migrated to execute() - Daytona timeout wrapper replaced with SDK-native timeout parameter - persistent_shell.py deleted (291 lines) Backend-specific: - Local: process-group kill via os.killpg, file-based CWD read - Docker: -e env flags only on init_session, not per-command - SSH: shlex.quote transport, ControlMaster connection reuse - Singularity: apptainer exec with instance://, no forced --pwd - Modal: _AsyncWorker + _ThreadedProcessHandle, cancel_fn -> sandbox.terminate - Daytona: SDK-level timeout (not shell wrapper), cancel_fn -> sandbox.stop - ManagedModal: unchanged (gateway owns execution); docstring added explaining why
2026-04-26 01:01:40 +00:00 · 2026-04-08 13:38:04 -07:00 · 2026-04-08 13:38:04 -07:00 · d684d7ee7e
commit d684d7ee7e
parent 7d26feb9a3
17 changed files with 1170 additions and 1686 deletions
--- a/tools/environments/base.py
+++ b/tools/environments/base.py
@ -1,11 +1,27 @@
-"""Base class for all Hermes execution environment backends."""
+"""Base class for all Hermes execution environment backends.

-from abc import ABC, abstractmethod
+Unified spawn-per-call model: every command spawns a fresh ``bash -c`` process.
+A session snapshot (env vars, functions, aliases) is captured once at init and
+re-sourced before each command. CWD persists via in-band stdout markers (remote)
+or a temp file (local).
+"""
+
+import json
+import logging
 import os
+import shlex
 import subprocess
+import threading
+import time
+import uuid
+from abc import ABC, abstractmethod
 from pathlib import Path
+from typing import IO, Callable, Protocol

 from hermes_constants import get_hermes_home
+from tools.interrupt import is_interrupted
+
+logger = logging.getLogger(__name__)


 def get_sandbox_dir() -> Path:
@ -23,30 +39,501 @@ def get_sandbox_dir() -> Path:
    return p


-class BaseEnvironment(ABC):
-    """Common interface for all Hermes execution backends.
+# ---------------------------------------------------------------------------
+# Shared constants and utilities
+# ---------------------------------------------------------------------------

-    Subclasses implement execute() and cleanup(). Shared helpers eliminate
-    duplicated subprocess boilerplate across backends.
+_SYNC_INTERVAL_SECONDS = 5.0
+
+
+def _pipe_stdin(proc: subprocess.Popen, data: str) -> None:
+    """Write *data* to proc.stdin on a daemon thread to avoid pipe-buffer deadlocks."""
+
+    def _write():
+        try:
+            proc.stdin.write(data)
+            proc.stdin.close()
+        except (BrokenPipeError, OSError):
+            pass
+
+    threading.Thread(target=_write, daemon=True).start()
+
+
+def _popen_bash(
+    cmd: list[str], stdin_data: str | None = None, **kwargs
+) -> subprocess.Popen:
+    """Spawn a subprocess with standard stdout/stderr/stdin setup.
+
+    If *stdin_data* is provided, writes it asynchronously via :func:`_pipe_stdin`.
+    Backends with special Popen needs (e.g. local's ``preexec_fn``) can bypass
+    this and call :func:`_pipe_stdin` directly.
    """
+    proc = subprocess.Popen(
+        cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        stdin=subprocess.PIPE if stdin_data is not None else subprocess.DEVNULL,
+        text=True,
+        **kwargs,
+    )
+    if stdin_data is not None:
+        _pipe_stdin(proc, stdin_data)
+    return proc
+
+
+def _load_json_store(path: Path) -> dict:
+    """Load a JSON file as a dict, returning ``{}`` on any error."""
+    if path.exists():
+        try:
+            return json.loads(path.read_text())
+        except Exception:
+            pass
+    return {}
+
+
+def _save_json_store(path: Path, data: dict) -> None:
+    """Write *data* as pretty-printed JSON to *path*."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(data, indent=2))
+
+
+def _file_mtime_key(host_path: str) -> tuple[float, int] | None:
+    """Return ``(mtime, size)`` for cache comparison, or ``None`` if unreadable."""
+    try:
+        st = Path(host_path).stat()
+        return (st.st_mtime, st.st_size)
+    except OSError:
+        return None
+
+
+# ---------------------------------------------------------------------------
+# ProcessHandle protocol
+# ---------------------------------------------------------------------------
+
+
+class ProcessHandle(Protocol):
+    """Duck type that every backend's _run_bash() must return.
+
+    subprocess.Popen satisfies this natively.  SDK backends (Modal, Daytona)
+    return _ThreadedProcessHandle which adapts their blocking calls.
+    """
+
+    def poll(self) -> int | None: ...
+    def kill(self) -> None: ...
+    def wait(self, timeout: float | None = None) -> int: ...
+
+    @property
+    def stdout(self) -> IO[str] | None: ...
+
+    @property
+    def returncode(self) -> int | None: ...
+
+
+class _ThreadedProcessHandle:
+    """Adapter for SDK backends (Modal, Daytona) that have no real subprocess.
+
+    Wraps a blocking ``exec_fn() -> (output_str, exit_code)`` in a background
+    thread and exposes a ProcessHandle-compatible interface.  An optional
+    ``cancel_fn`` is invoked on ``kill()`` for backend-specific cancellation
+    (e.g. Modal sandbox.terminate, Daytona sandbox.stop).
+    """
+
+    def __init__(
+        self,
+        exec_fn: Callable[[], tuple[str, int]],
+        cancel_fn: Callable[[], None] | None = None,
+    ):
+        self._cancel_fn = cancel_fn
+        self._done = threading.Event()
+        self._returncode: int | None = None
+        self._error: Exception | None = None
+
+        # Pipe for stdout — drain thread in _wait_for_process reads the read end.
+        read_fd, write_fd = os.pipe()
+        self._stdout = os.fdopen(read_fd, "r", encoding="utf-8", errors="replace")
+        self._write_fd = write_fd
+
+        def _worker():
+            try:
+                output, exit_code = exec_fn()
+                self._returncode = exit_code
+                # Write output into the pipe so drain thread picks it up.
+                try:
+                    os.write(self._write_fd, output.encode("utf-8", errors="replace"))
+                except OSError:
+                    pass
+            except Exception as exc:
+                self._error = exc
+                self._returncode = 1
+            finally:
+                try:
+                    os.close(self._write_fd)
+                except OSError:
+                    pass
+                self._done.set()
+
+        t = threading.Thread(target=_worker, daemon=True)
+        t.start()
+
+    @property
+    def stdout(self):
+        return self._stdout
+
+    @property
+    def returncode(self) -> int | None:
+        return self._returncode
+
+    def poll(self) -> int | None:
+        return self._returncode if self._done.is_set() else None
+
+    def kill(self):
+        if self._cancel_fn:
+            try:
+                self._cancel_fn()
+            except Exception:
+                pass
+
+    def wait(self, timeout: float | None = None) -> int:
+        self._done.wait(timeout=timeout)
+        return self._returncode
+
+
+# ---------------------------------------------------------------------------
+# CWD marker for remote backends
+# ---------------------------------------------------------------------------
+
+
+def _cwd_marker(session_id: str) -> str:
+    return f"__HERMES_CWD_{session_id}__"
+
+
+# ---------------------------------------------------------------------------
+# BaseEnvironment
+# ---------------------------------------------------------------------------
+
+
+class BaseEnvironment(ABC):
+    """Common interface and unified execution flow for all Hermes backends.
+
+    Subclasses implement ``_run_bash()`` and ``cleanup()``.  The base class
+    provides ``execute()`` with session snapshot sourcing, CWD tracking,
+    interrupt handling, and timeout enforcement.
+    """
+
+    # Subclasses that embed stdin as a heredoc (Modal, Daytona) set this.
+    _stdin_mode: str = "pipe"  # "pipe" or "heredoc"
+
+    # Snapshot creation timeout (override for slow cold-starts).
+    _snapshot_timeout: int = 30

    def __init__(self, cwd: str, timeout: int, env: dict = None):
        self.cwd = cwd
        self.timeout = timeout
        self.env = env or {}

-    @abstractmethod
-    def execute(self, command: str, cwd: str = "", *,
-                timeout: int | None = None,
-                stdin_data: str | None = None) -> dict:
-        """Execute a command, return {"output": str, "returncode": int}."""
-        ...
+        self._session_id = uuid.uuid4().hex[:12]
+        self._snapshot_path = f"/tmp/hermes-snap-{self._session_id}.sh"
+        self._cwd_file = f"/tmp/hermes-cwd-{self._session_id}.txt"
+        self._cwd_marker = _cwd_marker(self._session_id)
+        self._snapshot_ready = False
+        self._last_sync_time: float | None = (
+            None  # set to 0 by backends that need file sync
+        )
+
+    # ------------------------------------------------------------------
+    # Abstract methods
+    # ------------------------------------------------------------------
+
+    def _run_bash(
+        self,
+        cmd_string: str,
+        *,
+        login: bool = False,
+        timeout: int = 120,
+        stdin_data: str | None = None,
+    ) -> ProcessHandle:
+        """Spawn a bash process to run *cmd_string*.
+
+        Returns a ProcessHandle (subprocess.Popen or _ThreadedProcessHandle).
+        Must be overridden by every backend.
+        """
+        raise NotImplementedError(f"{type(self).__name__} must implement _run_bash()")

    @abstractmethod
    def cleanup(self):
        """Release backend resources (container, instance, connection)."""
        ...

+    # ------------------------------------------------------------------
+    # Session snapshot (init_session)
+    # ------------------------------------------------------------------
+
+    def init_session(self):
+        """Capture login shell environment into a snapshot file.
+
+        Called once after backend construction.  On success, sets
+        ``_snapshot_ready = True`` so subsequent commands source the snapshot
+        instead of running with ``bash -l``.
+        """
+        # Full capture: env vars, functions (filtered), aliases, shell options.
+        bootstrap = (
+            f"export -p > {self._snapshot_path}\n"
+            f"declare -f | grep -vE '^_[^_]' >> {self._snapshot_path}\n"
+            f"alias -p >> {self._snapshot_path}\n"
+            f"echo 'shopt -s expand_aliases' >> {self._snapshot_path}\n"
+            f"echo 'set +e' >> {self._snapshot_path}\n"
+            f"echo 'set +u' >> {self._snapshot_path}\n"
+            f"pwd -P > {self._cwd_file} 2>/dev/null || true\n"
+            f"printf '\\n{self._cwd_marker}%s{self._cwd_marker}\\n' \"$(pwd -P)\"\n"
+        )
+        try:
+            proc = self._run_bash(bootstrap, login=True, timeout=self._snapshot_timeout)
+            result = self._wait_for_process(proc, timeout=self._snapshot_timeout)
+            self._snapshot_ready = True
+            self._update_cwd(result)
+            logger.info(
+                "Session snapshot created (session=%s, cwd=%s)",
+                self._session_id,
+                self.cwd,
+            )
+        except Exception as exc:
+            logger.warning(
+                "init_session failed (session=%s): %s — "
+                "falling back to bash -l per command",
+                self._session_id,
+                exc,
+            )
+            self._snapshot_ready = False
+
+    # ------------------------------------------------------------------
+    # Command wrapping
+    # ------------------------------------------------------------------
+
+    def _wrap_command(self, command: str, cwd: str) -> str:
+        """Build the full bash script that sources snapshot, cd's, runs command,
+        re-dumps env vars, and emits CWD markers."""
+        escaped = command.replace("'", "'\\''")
+
+        parts = []
+
+        # Source snapshot (env vars from previous commands)
+        if self._snapshot_ready:
+            parts.append(f"source {self._snapshot_path} 2>/dev/null || true")
+
+        # cd to working directory — let bash expand ~ natively
+        quoted_cwd = (
+            shlex.quote(cwd) if cwd != "~" and not cwd.startswith("~/") else cwd
+        )
+        parts.append(f"cd {quoted_cwd} || exit 126")
+
+        # Run the actual command
+        parts.append(f"eval '{escaped}'")
+        parts.append("__hermes_ec=$?")
+
+        # Re-dump env vars to snapshot (last-writer-wins for concurrent calls)
+        if self._snapshot_ready:
+            parts.append(f"export -p > {self._snapshot_path} 2>/dev/null || true")
+
+        # Write CWD to file (local reads this) and stdout marker (remote parses this)
+        parts.append(f"pwd -P > {self._cwd_file} 2>/dev/null || true")
+        # Use a distinct line for the marker. The leading \n ensures
+        # the marker starts on its own line even if the command doesn't
+        # end with a newline (e.g. printf 'exact'). We'll strip this
+        # injected newline in _extract_cwd_from_output.
+        parts.append(
+            f"printf '\\n{self._cwd_marker}%s{self._cwd_marker}\\n' \"$(pwd -P)\""
+        )
+        parts.append("exit $__hermes_ec")
+
+        return "\n".join(parts)
+
+    # ------------------------------------------------------------------
+    # Stdin heredoc embedding (for SDK backends)
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _embed_stdin_heredoc(command: str, stdin_data: str) -> str:
+        """Append stdin_data as a shell heredoc to the command string."""
+        delimiter = f"HERMES_STDIN_{uuid.uuid4().hex[:12]}"
+        return f"{command} << '{delimiter}'\n{stdin_data}\n{delimiter}"
+
+    # ------------------------------------------------------------------
+    # Process lifecycle
+    # ------------------------------------------------------------------
+
+    def _wait_for_process(self, proc: ProcessHandle, timeout: int = 120) -> dict:
+        """Poll-based wait with interrupt checking and stdout draining.
+
+        Shared across all backends — not overridden.
+        """
+        output_chunks: list[str] = []
+
+        def _drain():
+            try:
+                for line in proc.stdout:
+                    output_chunks.append(line)
+            except UnicodeDecodeError:
+                output_chunks.clear()
+                output_chunks.append(
+                    "[binary output detected — raw bytes not displayable]"
+                )
+            except (ValueError, OSError):
+                pass
+
+        drain_thread = threading.Thread(target=_drain, daemon=True)
+        drain_thread.start()
+        deadline = time.monotonic() + timeout
+
+        while proc.poll() is None:
+            if is_interrupted():
+                self._kill_process(proc)
+                drain_thread.join(timeout=2)
+                return {
+                    "output": "".join(output_chunks) + "\n[Command interrupted]",
+                    "returncode": 130,
+                }
+            if time.monotonic() > deadline:
+                self._kill_process(proc)
+                drain_thread.join(timeout=2)
+                partial = "".join(output_chunks)
+                timeout_msg = f"\n[Command timed out after {timeout}s]"
+                return {
+                    "output": partial + timeout_msg
+                    if partial
+                    else timeout_msg.lstrip(),
+                    "returncode": 124,
+                }
+            time.sleep(0.2)
+
+        drain_thread.join(timeout=5)
+
+        try:
+            proc.stdout.close()
+        except Exception:
+            pass
+
+        return {"output": "".join(output_chunks), "returncode": proc.returncode}
+
+    def _kill_process(self, proc: ProcessHandle):
+        """Terminate a process. Subclasses may override for process-group kill."""
+        try:
+            proc.kill()
+        except (ProcessLookupError, PermissionError, OSError):
+            pass
+
+    # ------------------------------------------------------------------
+    # CWD extraction
+    # ------------------------------------------------------------------
+
+    def _update_cwd(self, result: dict):
+        """Extract CWD from command output. Override for local file-based read."""
+        self._extract_cwd_from_output(result)
+
+    def _extract_cwd_from_output(self, result: dict):
+        """Parse the __HERMES_CWD_{session}__ marker from stdout output.
+
+        Updates self.cwd and strips the marker from result["output"].
+        Used by remote backends (Docker, SSH, Modal, Daytona, Singularity).
+        """
+        output = result.get("output", "")
+        marker = self._cwd_marker
+        last = output.rfind(marker)
+        if last == -1:
+            return
+
+        # Find the opening marker before this closing one
+        search_start = max(0, last - 4096)  # CWD path won't be >4KB
+        first = output.rfind(marker, search_start, last)
+        if first == -1 or first == last:
+            return
+
+        cwd_path = output[first + len(marker) : last].strip()
+        if cwd_path:
+            self.cwd = cwd_path
+
+        # Strip the marker line AND the \n we injected before it.
+        # The wrapper emits: printf '\n__MARKER__%s__MARKER__\n'
+        # So the output looks like: <cmd output>\n__MARKER__path__MARKER__\n
+        # We want to remove everything from the injected \n onwards.
+        line_start = output.rfind("\n", 0, first)
+        if line_start == -1:
+            line_start = first
+        line_end = output.find("\n", last + len(marker))
+        line_end = line_end + 1 if line_end != -1 else len(output)
+
+        result["output"] = output[:line_start] + output[line_end:]
+
+    # ------------------------------------------------------------------
+    # Hooks
+    # ------------------------------------------------------------------
+
+    def _before_execute(self):
+        """Rate-limited file sync before each command.
+
+        Backends that need pre-command sync set ``self._last_sync_time = 0``
+        in ``__init__`` and override :meth:`_sync_files`.  Backends needing
+        extra pre-exec logic (e.g. Daytona sandbox restart check) override
+        this method and call ``super()._before_execute()``.
+        """
+        if self._last_sync_time is not None:
+            now = time.monotonic()
+            if now - self._last_sync_time >= _SYNC_INTERVAL_SECONDS:
+                self._sync_files()
+                self._last_sync_time = now
+
+    def _sync_files(self):
+        """Push files to remote environment. Called rate-limited by _before_execute."""
+        pass
+
+    # ------------------------------------------------------------------
+    # Unified execute()
+    # ------------------------------------------------------------------
+
+    def execute(
+        self,
+        command: str,
+        cwd: str = "",
+        *,
+        timeout: int | None = None,
+        stdin_data: str | None = None,
+    ) -> dict:
+        """Execute a command, return {"output": str, "returncode": int}."""
+        self._before_execute()
+
+        exec_command, sudo_stdin = self._prepare_command(command)
+        effective_timeout = timeout or self.timeout
+        effective_cwd = cwd or self.cwd
+
+        # Merge sudo stdin with caller stdin
+        if sudo_stdin is not None and stdin_data is not None:
+            effective_stdin = sudo_stdin + stdin_data
+        elif sudo_stdin is not None:
+            effective_stdin = sudo_stdin
+        else:
+            effective_stdin = stdin_data
+
+        # Embed stdin as heredoc for backends that need it
+        if effective_stdin and self._stdin_mode == "heredoc":
+            exec_command = self._embed_stdin_heredoc(exec_command, effective_stdin)
+            effective_stdin = None
+
+        wrapped = self._wrap_command(exec_command, effective_cwd)
+
+        # Use login shell if snapshot failed (so user's profile still loads)
+        login = not self._snapshot_ready
+
+        proc = self._run_bash(
+            wrapped, login=login, timeout=effective_timeout, stdin_data=effective_stdin
+        )
+        result = self._wait_for_process(proc, timeout=effective_timeout)
+        self._update_cwd(result)
+
+        return result
+
+    # ------------------------------------------------------------------
+    # Shared helpers
+    # ------------------------------------------------------------------
+
    def stop(self):
        """Alias for cleanup (compat with older callers)."""
        self.cleanup()
@ -57,53 +544,12 @@ class BaseEnvironment(ABC):
        except Exception:
            pass

-    # ------------------------------------------------------------------
-    # Shared helpers (eliminate duplication across backends)
-    # ------------------------------------------------------------------
-
    def _prepare_command(self, command: str) -> tuple[str, str | None]:
-        """Transform sudo commands if SUDO_PASSWORD is available.
-
-        Returns:
-            (transformed_command, sudo_stdin) — see _transform_sudo_command
-            for the full contract.  Callers that drive a subprocess directly
-            should prepend sudo_stdin (when not None) to any stdin_data they
-            pass to Popen.  Callers that embed stdin via heredoc (modal,
-            daytona) handle sudo_stdin in their own execute() method.
-        """
+        """Transform sudo commands if SUDO_PASSWORD is available."""
        from tools.terminal_tool import _transform_sudo_command
+
        return _transform_sudo_command(command)

-    def _build_run_kwargs(self, timeout: int | None,
-                          stdin_data: str | None = None) -> dict:
-        """Build common subprocess.run kwargs for non-interactive execution."""
-        kw = {
-            "text": True,
-            "timeout": timeout or self.timeout,
-            "encoding": "utf-8",
-            "errors": "replace",
-            "stdout": subprocess.PIPE,
-            "stderr": subprocess.STDOUT,
-        }
-        if stdin_data is not None:
-            kw["input"] = stdin_data
-        else:
-            kw["stdin"] = subprocess.DEVNULL
-        return kw
-
-    def execute_oneshot(self, command: str, cwd: str = "", *,
-                        timeout: int | None = None,
-                        stdin_data: str | None = None) -> dict:
-        """Execute a command bypassing any persistent shell.
-
-        Safe for concurrent use alongside a long-running execute() call.
-        Backends that maintain a persistent shell (SSH, Local) override this
-        to route through their oneshot path, avoiding the shell lock.
-        Non-persistent backends delegate to execute().
-        """
-        return self.execute(command, cwd=cwd, timeout=timeout,
-                            stdin_data=stdin_data)
-
    def _timeout_result(self, timeout: int | None) -> dict:
        """Standard return dict when a command times out."""
        return {