feat(environments): unified spawn-per-call execution layer

Replace dual execution model (PersistentShellMixin + per-backend oneshot) with spawn-per-call + session snapshot for all backends except ManagedModal. Core changes: - Every command spawns a fresh bash process; session snapshot (env vars, functions, aliases) captured at init and re-sourced before each command - CWD persists via file-based read (local) or in-band stdout markers (remote) - ProcessHandle protocol + _ThreadedProcessHandle adapter for SDK backends - cancel_fn wired for Modal (sandbox.terminate) and Daytona (sandbox.stop) - Shared utilities extracted: _pipe_stdin, _popen_bash, _load_json_store, _save_json_store, _file_mtime_key, _SYNC_INTERVAL_SECONDS - Rate-limited file sync unified in base _before_execute() with _sync_files() hook - execute_oneshot() removed; all 11 call sites in code_execution_tool.py migrated to execute() - Daytona timeout wrapper replaced with SDK-native timeout parameter - persistent_shell.py deleted (291 lines) Backend-specific: - Local: process-group kill via os.killpg, file-based CWD read - Docker: -e env flags only on init_session, not per-command - SSH: shlex.quote transport, ControlMaster connection reuse - Singularity: apptainer exec with instance://, no forced --pwd - Modal: _AsyncWorker + _ThreadedProcessHandle, cancel_fn -> sandbox.terminate - Daytona: SDK-level timeout (not shell wrapper), cancel_fn -> sandbox.stop - ManagedModal: unchanged (gateway owns execution); docstring added explaining why
2026-04-27 01:11:40 +00:00 · 2026-04-08 13:38:04 -07:00 · 2026-04-08 13:38:04 -07:00 · d684d7ee7e
commit d684d7ee7e
parent 7d26feb9a3
17 changed files with 1170 additions and 1686 deletions
--- a/tests/tools/test_base_environment.py
+++ b/tests/tools/test_base_environment.py
@ -0,0 +1,174 @@
+"""Tests for BaseEnvironment unified execution model.
+
+Tests _wrap_command(), _extract_cwd_from_output(), _embed_stdin_heredoc(),
+init_session() failure handling, and the CWD marker contract.
+"""
+
+import uuid
+from unittest.mock import MagicMock
+
+from tools.environments.base import BaseEnvironment, _cwd_marker
+
+
+class _TestableEnv(BaseEnvironment):
+    """Concrete subclass for testing base class methods."""
+
+    def __init__(self, cwd="/tmp", timeout=10):
+        super().__init__(cwd=cwd, timeout=timeout)
+
+    def _run_bash(self, cmd_string, *, login=False, timeout=120, stdin_data=None):
+        raise NotImplementedError("Use mock")
+
+    def cleanup(self):
+        pass
+
+
+class TestWrapCommand:
+    def test_basic_shape(self):
+        env = _TestableEnv()
+        env._snapshot_ready = True
+        wrapped = env._wrap_command("echo hello", "/tmp")
+
+        assert "source" in wrapped
+        assert "cd /tmp" in wrapped or "cd '/tmp'" in wrapped
+        assert "eval 'echo hello'" in wrapped
+        assert "__hermes_ec=$?" in wrapped
+        assert "export -p >" in wrapped
+        assert "pwd -P >" in wrapped
+        assert env._cwd_marker in wrapped
+        assert "exit $__hermes_ec" in wrapped
+
+    def test_no_snapshot_skips_source(self):
+        env = _TestableEnv()
+        env._snapshot_ready = False
+        wrapped = env._wrap_command("echo hello", "/tmp")
+
+        assert "source" not in wrapped
+
+    def test_single_quote_escaping(self):
+        env = _TestableEnv()
+        env._snapshot_ready = True
+        wrapped = env._wrap_command("echo 'hello world'", "/tmp")
+
+        assert "eval 'echo '\\''hello world'\\'''" in wrapped
+
+    def test_tilde_not_quoted(self):
+        env = _TestableEnv()
+        env._snapshot_ready = True
+        wrapped = env._wrap_command("ls", "~")
+
+        assert "cd ~" in wrapped
+        assert "cd '~'" not in wrapped
+
+    def test_cd_failure_exit_126(self):
+        env = _TestableEnv()
+        env._snapshot_ready = True
+        wrapped = env._wrap_command("ls", "/nonexistent")
+
+        assert "exit 126" in wrapped
+
+
+class TestExtractCwdFromOutput:
+    def test_happy_path(self):
+        env = _TestableEnv()
+        marker = env._cwd_marker
+        result = {
+            "output": f"hello\n{marker}/home/user{marker}\n",
+        }
+        env._extract_cwd_from_output(result)
+
+        assert env.cwd == "/home/user"
+        assert marker not in result["output"]
+
+    def test_missing_marker(self):
+        env = _TestableEnv()
+        result = {"output": "hello world\n"}
+        env._extract_cwd_from_output(result)
+
+        assert env.cwd == "/tmp"  # unchanged
+
+    def test_marker_in_command_output(self):
+        """If the marker appears in command output AND as the real marker,
+        rfind grabs the last (real) one."""
+        env = _TestableEnv()
+        marker = env._cwd_marker
+        result = {
+            "output": f"user typed {marker} in their output\nreal output\n{marker}/correct/path{marker}\n",
+        }
+        env._extract_cwd_from_output(result)
+
+        assert env.cwd == "/correct/path"
+
+    def test_output_cleaned(self):
+        env = _TestableEnv()
+        marker = env._cwd_marker
+        result = {
+            "output": f"hello\n{marker}/tmp{marker}\n",
+        }
+        env._extract_cwd_from_output(result)
+
+        assert "hello" in result["output"]
+        assert marker not in result["output"]
+
+
+class TestEmbedStdinHeredoc:
+    def test_heredoc_format(self):
+        result = BaseEnvironment._embed_stdin_heredoc("cat", "hello world")
+
+        assert result.startswith("cat << '")
+        assert "hello world" in result
+        assert "HERMES_STDIN_" in result
+
+    def test_unique_delimiter_each_call(self):
+        r1 = BaseEnvironment._embed_stdin_heredoc("cat", "data")
+        r2 = BaseEnvironment._embed_stdin_heredoc("cat", "data")
+
+        # Extract delimiters
+        d1 = r1.split("'")[1]
+        d2 = r2.split("'")[1]
+        assert d1 != d2  # UUID-based, should be unique
+
+
+class TestInitSessionFailure:
+    def test_snapshot_ready_false_on_failure(self):
+        env = _TestableEnv()
+
+        def failing_run_bash(*args, **kwargs):
+            raise RuntimeError("bash not found")
+
+        env._run_bash = failing_run_bash
+        env.init_session()
+
+        assert env._snapshot_ready is False
+
+    def test_login_flag_when_snapshot_not_ready(self):
+        """When _snapshot_ready=False, execute() should pass login=True to _run_bash."""
+        env = _TestableEnv()
+        env._snapshot_ready = False
+
+        calls = []
+        def mock_run_bash(cmd, *, login=False, timeout=120, stdin_data=None):
+            calls.append({"login": login})
+            # Return a mock process handle
+            mock = MagicMock()
+            mock.poll.return_value = 0
+            mock.returncode = 0
+            mock.stdout = iter([])
+            return mock
+
+        env._run_bash = mock_run_bash
+        env.execute("echo test")
+
+        assert len(calls) == 1
+        assert calls[0]["login"] is True
+
+
+class TestCwdMarker:
+    def test_marker_contains_session_id(self):
+        env = _TestableEnv()
+        assert env._session_id in env._cwd_marker
+
+    def test_unique_per_instance(self):
+        env1 = _TestableEnv()
+        env2 = _TestableEnv()
+        assert env1._cwd_marker != env2._cwd_marker