feat(environments): unified spawn-per-call execution layer

Replace dual execution model (PersistentShellMixin + per-backend oneshot) with spawn-per-call + session snapshot for all backends except ManagedModal. Core changes: - Every command spawns a fresh bash process; session snapshot (env vars, functions, aliases) captured at init and re-sourced before each command - CWD persists via file-based read (local) or in-band stdout markers (remote) - ProcessHandle protocol + _ThreadedProcessHandle adapter for SDK backends - cancel_fn wired for Modal (sandbox.terminate) and Daytona (sandbox.stop) - Shared utilities extracted: _pipe_stdin, _popen_bash, _load_json_store, _save_json_store, _file_mtime_key, _SYNC_INTERVAL_SECONDS - Rate-limited file sync unified in base _before_execute() with _sync_files() hook - execute_oneshot() removed; all 11 call sites in code_execution_tool.py migrated to execute() - Daytona timeout wrapper replaced with SDK-native timeout parameter - persistent_shell.py deleted (291 lines) Backend-specific: - Local: process-group kill via os.killpg, file-based CWD read - Docker: -e env flags only on init_session, not per-command - SSH: shlex.quote transport, ControlMaster connection reuse - Singularity: apptainer exec with instance://, no forced --pwd - Modal: _AsyncWorker + _ThreadedProcessHandle, cancel_fn -> sandbox.terminate - Daytona: SDK-level timeout (not shell wrapper), cancel_fn -> sandbox.stop - ManagedModal: unchanged (gateway owns execution); docstring added explaining why
2026-06-09 08:21:50 +00:00 · 2026-04-08 13:38:04 -07:00 · 2026-04-08 13:38:04 -07:00 · d684d7ee7e
commit d684d7ee7e
parent 7d26feb9a3
17 changed files with 1170 additions and 1686 deletions
--- a/tests/tools/test_base_environment.py
+++ b/tests/tools/test_base_environment.py
@ -0,0 +1,174 @@
+"""Tests for BaseEnvironment unified execution model.
+
+Tests _wrap_command(), _extract_cwd_from_output(), _embed_stdin_heredoc(),
+init_session() failure handling, and the CWD marker contract.
+"""
+
+import uuid
+from unittest.mock import MagicMock
+
+from tools.environments.base import BaseEnvironment, _cwd_marker
+
+
+class _TestableEnv(BaseEnvironment):
+    """Concrete subclass for testing base class methods."""
+
+    def __init__(self, cwd="/tmp", timeout=10):
+        super().__init__(cwd=cwd, timeout=timeout)
+
+    def _run_bash(self, cmd_string, *, login=False, timeout=120, stdin_data=None):
+        raise NotImplementedError("Use mock")
+
+    def cleanup(self):
+        pass
+
+
+class TestWrapCommand:
+    def test_basic_shape(self):
+        env = _TestableEnv()
+        env._snapshot_ready = True
+        wrapped = env._wrap_command("echo hello", "/tmp")
+
+        assert "source" in wrapped
+        assert "cd /tmp" in wrapped or "cd '/tmp'" in wrapped
+        assert "eval 'echo hello'" in wrapped
+        assert "__hermes_ec=$?" in wrapped
+        assert "export -p >" in wrapped
+        assert "pwd -P >" in wrapped
+        assert env._cwd_marker in wrapped
+        assert "exit $__hermes_ec" in wrapped
+
+    def test_no_snapshot_skips_source(self):
+        env = _TestableEnv()
+        env._snapshot_ready = False
+        wrapped = env._wrap_command("echo hello", "/tmp")
+
+        assert "source" not in wrapped
+
+    def test_single_quote_escaping(self):
+        env = _TestableEnv()
+        env._snapshot_ready = True
+        wrapped = env._wrap_command("echo 'hello world'", "/tmp")
+
+        assert "eval 'echo '\\''hello world'\\'''" in wrapped
+
+    def test_tilde_not_quoted(self):
+        env = _TestableEnv()
+        env._snapshot_ready = True
+        wrapped = env._wrap_command("ls", "~")
+
+        assert "cd ~" in wrapped
+        assert "cd '~'" not in wrapped
+
+    def test_cd_failure_exit_126(self):
+        env = _TestableEnv()
+        env._snapshot_ready = True
+        wrapped = env._wrap_command("ls", "/nonexistent")
+
+        assert "exit 126" in wrapped
+
+
+class TestExtractCwdFromOutput:
+    def test_happy_path(self):
+        env = _TestableEnv()
+        marker = env._cwd_marker
+        result = {
+            "output": f"hello\n{marker}/home/user{marker}\n",
+        }
+        env._extract_cwd_from_output(result)
+
+        assert env.cwd == "/home/user"
+        assert marker not in result["output"]
+
+    def test_missing_marker(self):
+        env = _TestableEnv()
+        result = {"output": "hello world\n"}
+        env._extract_cwd_from_output(result)
+
+        assert env.cwd == "/tmp"  # unchanged
+
+    def test_marker_in_command_output(self):
+        """If the marker appears in command output AND as the real marker,
+        rfind grabs the last (real) one."""
+        env = _TestableEnv()
+        marker = env._cwd_marker
+        result = {
+            "output": f"user typed {marker} in their output\nreal output\n{marker}/correct/path{marker}\n",
+        }
+        env._extract_cwd_from_output(result)
+
+        assert env.cwd == "/correct/path"
+
+    def test_output_cleaned(self):
+        env = _TestableEnv()
+        marker = env._cwd_marker
+        result = {
+            "output": f"hello\n{marker}/tmp{marker}\n",
+        }
+        env._extract_cwd_from_output(result)
+
+        assert "hello" in result["output"]
+        assert marker not in result["output"]
+
+
+class TestEmbedStdinHeredoc:
+    def test_heredoc_format(self):
+        result = BaseEnvironment._embed_stdin_heredoc("cat", "hello world")
+
+        assert result.startswith("cat << '")
+        assert "hello world" in result
+        assert "HERMES_STDIN_" in result
+
+    def test_unique_delimiter_each_call(self):
+        r1 = BaseEnvironment._embed_stdin_heredoc("cat", "data")
+        r2 = BaseEnvironment._embed_stdin_heredoc("cat", "data")
+
+        # Extract delimiters
+        d1 = r1.split("'")[1]
+        d2 = r2.split("'")[1]
+        assert d1 != d2  # UUID-based, should be unique
+
+
+class TestInitSessionFailure:
+    def test_snapshot_ready_false_on_failure(self):
+        env = _TestableEnv()
+
+        def failing_run_bash(*args, **kwargs):
+            raise RuntimeError("bash not found")
+
+        env._run_bash = failing_run_bash
+        env.init_session()
+
+        assert env._snapshot_ready is False
+
+    def test_login_flag_when_snapshot_not_ready(self):
+        """When _snapshot_ready=False, execute() should pass login=True to _run_bash."""
+        env = _TestableEnv()
+        env._snapshot_ready = False
+
+        calls = []
+        def mock_run_bash(cmd, *, login=False, timeout=120, stdin_data=None):
+            calls.append({"login": login})
+            # Return a mock process handle
+            mock = MagicMock()
+            mock.poll.return_value = 0
+            mock.returncode = 0
+            mock.stdout = iter([])
+            return mock
+
+        env._run_bash = mock_run_bash
+        env.execute("echo test")
+
+        assert len(calls) == 1
+        assert calls[0]["login"] is True
+
+
+class TestCwdMarker:
+    def test_marker_contains_session_id(self):
+        env = _TestableEnv()
+        assert env._session_id in env._cwd_marker
+
+    def test_unique_per_instance(self):
+        env1 = _TestableEnv()
+        env2 = _TestableEnv()
+        assert env1._cwd_marker != env2._cwd_marker
--- a/tests/tools/test_file_tools_live.py
+++ b/tests/tools/test_file_tools_live.py
@ -22,21 +22,19 @@ import pytest

 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))

-from tools.environments.local import (
-    LocalEnvironment,
-    _clean_shell_noise,
-    _extract_fenced_output,
-    _OUTPUT_FENCE,
-    _SHELL_NOISE_SUBSTRINGS,
-)
+from tools.environments.local import LocalEnvironment
 from tools.file_operations import ShellFileOperations


 # ── Shared noise detection ───────────────────────────────────────────────
-# Every known shell noise pattern. If ANY of these appear in output that
-# isn't explicitly expected, the test fails with a clear message.
+# Known shell noise patterns that should never appear in command output.

-_ALL_NOISE_PATTERNS = list(_SHELL_NOISE_SUBSTRINGS) + [
+_ALL_NOISE_PATTERNS = [
+    "bash: cannot set terminal process group",
+    "bash: no job control in this shell",
+    "no job control in this shell",
+    "cannot set terminal process group",
+    "tcsetattr: Inappropriate ioctl for device",
    "bash: ",
    "Inappropriate ioctl",
    "Auto-suggestions:",
@ -88,134 +86,6 @@ def populated_dir(tmp_path):
    return tmp_path


-# ── _clean_shell_noise unit tests ────────────────────────────────────────
-
-class TestCleanShellNoise:
-    def test_single_noise_line(self):
-        output = "bash: no job control in this shell\nhello world\n"
-        result = _clean_shell_noise(output)
-        assert result == "hello world\n"
-
-    def test_double_noise_lines(self):
-        output = (
-            "bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
-            "bash: no job control in this shell\n"
-            "actual output here\n"
-        )
-        result = _clean_shell_noise(output)
-        assert result == "actual output here\n"
-        _assert_clean(result)
-
-    def test_tcsetattr_noise(self):
-        output = (
-            "bash: [12345: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
-            "real content\n"
-        )
-        result = _clean_shell_noise(output)
-        assert result == "real content\n"
-        _assert_clean(result)
-
-    def test_triple_noise_lines(self):
-        output = (
-            "bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
-            "bash: no job control in this shell\n"
-            "bash: [999: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
-            "clean\n"
-        )
-        result = _clean_shell_noise(output)
-        assert result == "clean\n"
-
-    def test_no_noise_untouched(self):
-        assert _clean_shell_noise("hello\nworld\n") == "hello\nworld\n"
-
-    def test_empty_string(self):
-        assert _clean_shell_noise("") == ""
-
-    def test_only_noise_produces_empty(self):
-        output = "bash: no job control in this shell\n"
-        result = _clean_shell_noise(output)
-        _assert_clean(result)
-
-    def test_noise_in_middle_not_stripped(self):
-        """Noise in the middle is real output and should be preserved."""
-        output = "real\nbash: no job control in this shell\nmore real\n"
-        result = _clean_shell_noise(output)
-        assert result == output
-
-    def test_zsh_restored_session(self):
-        output = "Restored session: Mon Mar  2 22:16:54 +03 2026\nhello\n"
-        result = _clean_shell_noise(output)
-        assert result == "hello\n"
-
-    def test_zsh_saving_session_trailing(self):
-        output = "hello\nSaving session...completed.\n"
-        result = _clean_shell_noise(output)
-        assert result == "hello\n"
-
-    def test_zsh_oh_my_zsh_banner(self):
-        output = "Oh My Zsh on! | Auto-suggestions: press right\nhello\n"
-        result = _clean_shell_noise(output)
-        assert result == "hello\n"
-
-    def test_zsh_full_noise_sandwich(self):
-        """Both leading and trailing zsh noise stripped."""
-        output = (
-            "Restored session: Mon Mar  2\n"
-            "command not found: docker\n"
-            "Oh My Zsh on!\n"
-            "actual output\n"
-            "Saving session...completed.\n"
-        )
-        result = _clean_shell_noise(output)
-        assert result == "actual output\n"
-
-    def test_last_login_stripped(self):
-        output = "Last login: Mon Mar 2 22:00:00 on ttys001\nhello\n"
-        result = _clean_shell_noise(output)
-        assert result == "hello\n"
-
-
-# ── _extract_fenced_output unit tests ────────────────────────────────────
-
-class TestExtractFencedOutput:
-    def test_normal_fenced_output(self):
-        raw = f"noise\n{_OUTPUT_FENCE}hello world\n{_OUTPUT_FENCE}more noise\n"
-        assert _extract_fenced_output(raw) == "hello world\n"
-
-    def test_no_trailing_newline(self):
-        """printf output with no trailing newline is preserved."""
-        raw = f"noise{_OUTPUT_FENCE}exact{_OUTPUT_FENCE}noise"
-        assert _extract_fenced_output(raw) == "exact"
-
-    def test_no_fences_falls_back(self):
-        """Without fences, falls back to pattern-based cleaning."""
-        raw = "bash: no job control in this shell\nhello\n"
-        result = _extract_fenced_output(raw)
-        assert result == "hello\n"
-
-    def test_only_start_fence(self):
-        """Only start fence (e.g. user command called exit)."""
-        raw = f"noise{_OUTPUT_FENCE}hello\nSaving session...\n"
-        result = _extract_fenced_output(raw)
-        assert result == "hello\n"
-
-    def test_user_outputs_fence_string(self):
-        """If user command outputs the fence marker, it is preserved."""
-        raw = f"noise{_OUTPUT_FENCE}{_OUTPUT_FENCE}real\n{_OUTPUT_FENCE}noise"
-        result = _extract_fenced_output(raw)
-        # first fence -> last fence captures the middle including user's fence
-        assert _OUTPUT_FENCE in result
-        assert "real\n" in result
-
-    def test_empty_command_output(self):
-        raw = f"noise{_OUTPUT_FENCE}{_OUTPUT_FENCE}noise"
-        assert _extract_fenced_output(raw) == ""
-
-    def test_multiline_output(self):
-        raw = f"noise\n{_OUTPUT_FENCE}line1\nline2\nline3\n{_OUTPUT_FENCE}noise\n"
-        assert _extract_fenced_output(raw) == "line1\nline2\nline3\n"
-
-
 # ── LocalEnvironment.execute() ───────────────────────────────────────────

 class TestLocalEnvironmentExecute:
--- a/tests/tools/test_local_persistent.py
+++ b/tests/tools/test_local_persistent.py
@ -1,164 +0,0 @@
-"""Tests for the local persistent shell backend."""
-
-import glob as glob_mod
-
-import pytest
-
-from tools.environments.local import LocalEnvironment
-from tools.environments.persistent_shell import PersistentShellMixin
-
-
-class TestLocalConfig:
-    def test_local_persistent_default_false(self, monkeypatch):
-        monkeypatch.delenv("TERMINAL_LOCAL_PERSISTENT", raising=False)
-        from tools.terminal_tool import _get_env_config
-        assert _get_env_config()["local_persistent"] is False
-
-    def test_local_persistent_true(self, monkeypatch):
-        monkeypatch.setenv("TERMINAL_LOCAL_PERSISTENT", "true")
-        from tools.terminal_tool import _get_env_config
-        assert _get_env_config()["local_persistent"] is True
-
-    def test_local_persistent_yes(self, monkeypatch):
-        monkeypatch.setenv("TERMINAL_LOCAL_PERSISTENT", "yes")
-        from tools.terminal_tool import _get_env_config
-        assert _get_env_config()["local_persistent"] is True
-
-
-class TestMergeOutput:
-    def test_stdout_only(self):
-        assert PersistentShellMixin._merge_output("out", "") == "out"
-
-    def test_stderr_only(self):
-        assert PersistentShellMixin._merge_output("", "err") == "err"
-
-    def test_both(self):
-        assert PersistentShellMixin._merge_output("out", "err") == "out\nerr"
-
-    def test_empty(self):
-        assert PersistentShellMixin._merge_output("", "") == ""
-
-    def test_strips_trailing_newlines(self):
-        assert PersistentShellMixin._merge_output("out\n\n", "err\n") == "out\nerr"
-
-
-class TestLocalOneShotRegression:
-    def test_echo(self):
-        env = LocalEnvironment(persistent=False)
-        r = env.execute("echo hello")
-        assert r["returncode"] == 0
-        assert "hello" in r["output"]
-        env.cleanup()
-
-    def test_exit_code(self):
-        env = LocalEnvironment(persistent=False)
-        r = env.execute("exit 42")
-        assert r["returncode"] == 42
-        env.cleanup()
-
-    def test_state_does_not_persist(self):
-        env = LocalEnvironment(persistent=False)
-        env.execute("export HERMES_ONESHOT_LOCAL=yes")
-        r = env.execute("echo $HERMES_ONESHOT_LOCAL")
-        assert r["output"].strip() == ""
-        env.cleanup()
-
-    def test_oneshot_heredoc_does_not_leak_fence_wrapper(self):
-        """Heredoc closing line must not be merged with the fence wrapper tail."""
-        env = LocalEnvironment(persistent=False)
-        cmd = "cat <<'H_EOF'\nheredoc body line\nH_EOF"
-        r = env.execute(cmd)
-        env.cleanup()
-        assert r["returncode"] == 0
-        assert "heredoc body line" in r["output"]
-        assert "__hermes_rc" not in r["output"]
-        assert "printf '" not in r["output"]
-        assert "exit $" not in r["output"]
-
-
-class TestLocalPersistent:
-    @pytest.fixture
-    def env(self):
-        e = LocalEnvironment(persistent=True)
-        yield e
-        e.cleanup()
-
-    def test_echo(self, env):
-        r = env.execute("echo hello-persistent")
-        assert r["returncode"] == 0
-        assert "hello-persistent" in r["output"]
-
-    def test_env_var_persists(self, env):
-        env.execute("export HERMES_LOCAL_PERSIST_TEST=works")
-        r = env.execute("echo $HERMES_LOCAL_PERSIST_TEST")
-        assert r["output"].strip() == "works"
-
-    def test_cwd_persists(self, env):
-        env.execute("cd /tmp")
-        r = env.execute("pwd")
-        assert r["output"].strip() == "/tmp"
-
-    def test_exit_code(self, env):
-        r = env.execute("(exit 42)")
-        assert r["returncode"] == 42
-
-    def test_stderr(self, env):
-        r = env.execute("echo oops >&2")
-        assert r["returncode"] == 0
-        assert "oops" in r["output"]
-
-    def test_multiline_output(self, env):
-        r = env.execute("echo a; echo b; echo c")
-        lines = r["output"].strip().splitlines()
-        assert lines == ["a", "b", "c"]
-
-    def test_timeout_then_recovery(self, env):
-        r = env.execute("sleep 999", timeout=2)
-        assert r["returncode"] in (124, 130)
-        r = env.execute("echo alive")
-        assert r["returncode"] == 0
-        assert "alive" in r["output"]
-
-    def test_large_output(self, env):
-        r = env.execute("seq 1 1000")
-        assert r["returncode"] == 0
-        lines = r["output"].strip().splitlines()
-        assert len(lines) == 1000
-        assert lines[0] == "1"
-        assert lines[-1] == "1000"
-
-    def test_shell_variable_persists(self, env):
-        env.execute("MY_LOCAL_VAR=hello123")
-        r = env.execute("echo $MY_LOCAL_VAR")
-        assert r["output"].strip() == "hello123"
-
-    def test_cleanup_removes_temp_files(self, env):
-        env.execute("echo warmup")
-        prefix = env._temp_prefix
-        assert len(glob_mod.glob(f"{prefix}-*")) > 0
-        env.cleanup()
-        remaining = glob_mod.glob(f"{prefix}-*")
-        assert remaining == []
-
-    def test_state_does_not_leak_between_instances(self):
-        env1 = LocalEnvironment(persistent=True)
-        env2 = LocalEnvironment(persistent=True)
-        try:
-            env1.execute("export LEAK_TEST=from_env1")
-            r = env2.execute("echo $LEAK_TEST")
-            assert r["output"].strip() == ""
-        finally:
-            env1.cleanup()
-            env2.cleanup()
-
-    def test_special_characters_in_command(self, env):
-        r = env.execute("echo 'hello world'")
-        assert r["output"].strip() == "hello world"
-
-    def test_pipe_command(self, env):
-        r = env.execute("echo hello | tr 'h' 'H'")
-        assert r["output"].strip() == "Hello"
-
-    def test_multiple_commands_semicolon(self, env):
-        r = env.execute("X=42; echo $X")
-        assert r["output"].strip() == "42"
--- a/tests/tools/test_managed_modal_environment.py
+++ b/tests/tools/test_managed_modal_environment.py
@ -110,7 +110,7 @@ class _FakeResponse:
 def test_managed_modal_execute_polls_until_completed(monkeypatch):
    _install_fake_tools_package()
    managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py")
-    modal_common = sys.modules["tools.environments.modal_common"]
+    modal_common = sys.modules["tools.environments.modal_utils"]

    calls = []
    poll_count = {"value": 0}
@ -173,7 +173,7 @@ def test_managed_modal_create_sends_a_stable_idempotency_key(monkeypatch):
 def test_managed_modal_execute_cancels_on_interrupt(monkeypatch):
    interrupt_event = _install_fake_tools_package()
    managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py")
-    modal_common = sys.modules["tools.environments.modal_common"]
+    modal_common = sys.modules["tools.environments.modal_utils"]

    calls = []

@ -215,7 +215,7 @@ def test_managed_modal_execute_cancels_on_interrupt(monkeypatch):
 def test_managed_modal_execute_returns_descriptive_error_on_missing_exec(monkeypatch):
    _install_fake_tools_package()
    managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py")
-    modal_common = sys.modules["tools.environments.modal_common"]
+    modal_common = sys.modules["tools.environments.modal_utils"]

    def fake_request(method, url, headers=None, json=None, timeout=None):
        if method == "POST" and url.endswith("/v1/sandboxes"):
@ -293,7 +293,7 @@ def test_managed_modal_rejects_host_credential_passthrough():
 def test_managed_modal_execute_times_out_and_cancels(monkeypatch):
    _install_fake_tools_package()
    managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py")
-    modal_common = sys.modules["tools.environments.modal_common"]
+    modal_common = sys.modules["tools.environments.modal_utils"]

    calls = []
    monotonic_values = iter([0.0, 12.5])
--- a/tests/tools/test_threaded_process_handle.py
+++ b/tests/tools/test_threaded_process_handle.py
@ -0,0 +1,144 @@
+"""Tests for _ThreadedProcessHandle — the adapter for SDK backends."""
+
+import threading
+import time
+
+from tools.environments.base import _ThreadedProcessHandle
+
+
+class TestBasicExecution:
+    def test_successful_execution(self):
+        def exec_fn():
+            return ("hello world", 0)
+
+        handle = _ThreadedProcessHandle(exec_fn)
+        handle.wait(timeout=5)
+
+        assert handle.returncode == 0
+        output = handle.stdout.read()
+        assert "hello world" in output
+
+    def test_nonzero_exit_code(self):
+        def exec_fn():
+            return ("error occurred", 42)
+
+        handle = _ThreadedProcessHandle(exec_fn)
+        handle.wait(timeout=5)
+
+        assert handle.returncode == 42
+        output = handle.stdout.read()
+        assert "error occurred" in output
+
+    def test_exception_in_exec_fn(self):
+        def exec_fn():
+            raise RuntimeError("boom")
+
+        handle = _ThreadedProcessHandle(exec_fn)
+        handle.wait(timeout=5)
+
+        assert handle.returncode == 1
+
+    def test_empty_output(self):
+        def exec_fn():
+            return ("", 0)
+
+        handle = _ThreadedProcessHandle(exec_fn)
+        handle.wait(timeout=5)
+
+        assert handle.returncode == 0
+        output = handle.stdout.read()
+        assert output == ""
+
+
+class TestPolling:
+    def test_poll_returns_none_while_running(self):
+        event = threading.Event()
+
+        def exec_fn():
+            event.wait(timeout=5)
+            return ("done", 0)
+
+        handle = _ThreadedProcessHandle(exec_fn)
+        assert handle.poll() is None
+
+        event.set()
+        handle.wait(timeout=5)
+        assert handle.poll() == 0
+
+    def test_poll_returns_returncode_when_done(self):
+        def exec_fn():
+            return ("ok", 0)
+
+        handle = _ThreadedProcessHandle(exec_fn)
+        handle.wait(timeout=5)
+        assert handle.poll() == 0
+
+
+class TestCancelFn:
+    def test_cancel_fn_called_on_kill(self):
+        called = threading.Event()
+
+        def cancel():
+            called.set()
+
+        def exec_fn():
+            time.sleep(10)
+            return ("", 0)
+
+        handle = _ThreadedProcessHandle(exec_fn, cancel_fn=cancel)
+        handle.kill()
+        assert called.is_set()
+
+    def test_cancel_fn_none_is_safe(self):
+        def exec_fn():
+            return ("ok", 0)
+
+        handle = _ThreadedProcessHandle(exec_fn, cancel_fn=None)
+        handle.kill()  # should not raise
+        handle.wait(timeout=5)
+        assert handle.returncode == 0
+
+    def test_cancel_fn_exception_swallowed(self):
+        def cancel():
+            raise RuntimeError("cancel failed")
+
+        def exec_fn():
+            return ("ok", 0)
+
+        handle = _ThreadedProcessHandle(exec_fn, cancel_fn=cancel)
+        handle.kill()  # should not raise despite cancel raising
+        handle.wait(timeout=5)
+
+
+class TestStdoutPipe:
+    def test_stdout_is_readable(self):
+        def exec_fn():
+            return ("line1\nline2\nline3\n", 0)
+
+        handle = _ThreadedProcessHandle(exec_fn)
+        handle.wait(timeout=5)
+
+        lines = handle.stdout.readlines()
+        assert len(lines) == 3
+        assert lines[0] == "line1\n"
+
+    def test_stdout_iterable(self):
+        def exec_fn():
+            return ("a\nb\nc\n", 0)
+
+        handle = _ThreadedProcessHandle(exec_fn)
+        handle.wait(timeout=5)
+
+        collected = list(handle.stdout)
+        assert len(collected) == 3
+
+    def test_unicode_output(self):
+        def exec_fn():
+            return ("hello 世界 🌍\n", 0)
+
+        handle = _ThreadedProcessHandle(exec_fn)
+        handle.wait(timeout=5)
+
+        output = handle.stdout.read()
+        assert "世界" in output
+        assert "🌍" in output