mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
feat(environments): unified spawn-per-call execution layer
Replace dual execution model (PersistentShellMixin + per-backend oneshot) with spawn-per-call + session snapshot for all backends except ManagedModal. Core changes: - Every command spawns a fresh bash process; session snapshot (env vars, functions, aliases) captured at init and re-sourced before each command - CWD persists via file-based read (local) or in-band stdout markers (remote) - ProcessHandle protocol + _ThreadedProcessHandle adapter for SDK backends - cancel_fn wired for Modal (sandbox.terminate) and Daytona (sandbox.stop) - Shared utilities extracted: _pipe_stdin, _popen_bash, _load_json_store, _save_json_store, _file_mtime_key, _SYNC_INTERVAL_SECONDS - Rate-limited file sync unified in base _before_execute() with _sync_files() hook - execute_oneshot() removed; all 11 call sites in code_execution_tool.py migrated to execute() - Daytona timeout wrapper replaced with SDK-native timeout parameter - persistent_shell.py deleted (291 lines) Backend-specific: - Local: process-group kill via os.killpg, file-based CWD read - Docker: -e env flags only on init_session, not per-command - SSH: shlex.quote transport, ControlMaster connection reuse - Singularity: apptainer exec with instance://, no forced --pwd - Modal: _AsyncWorker + _ThreadedProcessHandle, cancel_fn -> sandbox.terminate - Daytona: SDK-level timeout (not shell wrapper), cancel_fn -> sandbox.stop - ManagedModal: unchanged (gateway owns execution); docstring added explaining why
This commit is contained in:
parent
7d26feb9a3
commit
d684d7ee7e
17 changed files with 1170 additions and 1686 deletions
174
tests/tools/test_base_environment.py
Normal file
174
tests/tools/test_base_environment.py
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
"""Tests for BaseEnvironment unified execution model.
|
||||
|
||||
Tests _wrap_command(), _extract_cwd_from_output(), _embed_stdin_heredoc(),
|
||||
init_session() failure handling, and the CWD marker contract.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from tools.environments.base import BaseEnvironment, _cwd_marker
|
||||
|
||||
|
||||
class _TestableEnv(BaseEnvironment):
|
||||
"""Concrete subclass for testing base class methods."""
|
||||
|
||||
def __init__(self, cwd="/tmp", timeout=10):
|
||||
super().__init__(cwd=cwd, timeout=timeout)
|
||||
|
||||
def _run_bash(self, cmd_string, *, login=False, timeout=120, stdin_data=None):
|
||||
raise NotImplementedError("Use mock")
|
||||
|
||||
def cleanup(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestWrapCommand:
|
||||
def test_basic_shape(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
wrapped = env._wrap_command("echo hello", "/tmp")
|
||||
|
||||
assert "source" in wrapped
|
||||
assert "cd /tmp" in wrapped or "cd '/tmp'" in wrapped
|
||||
assert "eval 'echo hello'" in wrapped
|
||||
assert "__hermes_ec=$?" in wrapped
|
||||
assert "export -p >" in wrapped
|
||||
assert "pwd -P >" in wrapped
|
||||
assert env._cwd_marker in wrapped
|
||||
assert "exit $__hermes_ec" in wrapped
|
||||
|
||||
def test_no_snapshot_skips_source(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = False
|
||||
wrapped = env._wrap_command("echo hello", "/tmp")
|
||||
|
||||
assert "source" not in wrapped
|
||||
|
||||
def test_single_quote_escaping(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
wrapped = env._wrap_command("echo 'hello world'", "/tmp")
|
||||
|
||||
assert "eval 'echo '\\''hello world'\\'''" in wrapped
|
||||
|
||||
def test_tilde_not_quoted(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
wrapped = env._wrap_command("ls", "~")
|
||||
|
||||
assert "cd ~" in wrapped
|
||||
assert "cd '~'" not in wrapped
|
||||
|
||||
def test_cd_failure_exit_126(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
wrapped = env._wrap_command("ls", "/nonexistent")
|
||||
|
||||
assert "exit 126" in wrapped
|
||||
|
||||
|
||||
class TestExtractCwdFromOutput:
|
||||
def test_happy_path(self):
|
||||
env = _TestableEnv()
|
||||
marker = env._cwd_marker
|
||||
result = {
|
||||
"output": f"hello\n{marker}/home/user{marker}\n",
|
||||
}
|
||||
env._extract_cwd_from_output(result)
|
||||
|
||||
assert env.cwd == "/home/user"
|
||||
assert marker not in result["output"]
|
||||
|
||||
def test_missing_marker(self):
|
||||
env = _TestableEnv()
|
||||
result = {"output": "hello world\n"}
|
||||
env._extract_cwd_from_output(result)
|
||||
|
||||
assert env.cwd == "/tmp" # unchanged
|
||||
|
||||
def test_marker_in_command_output(self):
|
||||
"""If the marker appears in command output AND as the real marker,
|
||||
rfind grabs the last (real) one."""
|
||||
env = _TestableEnv()
|
||||
marker = env._cwd_marker
|
||||
result = {
|
||||
"output": f"user typed {marker} in their output\nreal output\n{marker}/correct/path{marker}\n",
|
||||
}
|
||||
env._extract_cwd_from_output(result)
|
||||
|
||||
assert env.cwd == "/correct/path"
|
||||
|
||||
def test_output_cleaned(self):
|
||||
env = _TestableEnv()
|
||||
marker = env._cwd_marker
|
||||
result = {
|
||||
"output": f"hello\n{marker}/tmp{marker}\n",
|
||||
}
|
||||
env._extract_cwd_from_output(result)
|
||||
|
||||
assert "hello" in result["output"]
|
||||
assert marker not in result["output"]
|
||||
|
||||
|
||||
class TestEmbedStdinHeredoc:
|
||||
def test_heredoc_format(self):
|
||||
result = BaseEnvironment._embed_stdin_heredoc("cat", "hello world")
|
||||
|
||||
assert result.startswith("cat << '")
|
||||
assert "hello world" in result
|
||||
assert "HERMES_STDIN_" in result
|
||||
|
||||
def test_unique_delimiter_each_call(self):
|
||||
r1 = BaseEnvironment._embed_stdin_heredoc("cat", "data")
|
||||
r2 = BaseEnvironment._embed_stdin_heredoc("cat", "data")
|
||||
|
||||
# Extract delimiters
|
||||
d1 = r1.split("'")[1]
|
||||
d2 = r2.split("'")[1]
|
||||
assert d1 != d2 # UUID-based, should be unique
|
||||
|
||||
|
||||
class TestInitSessionFailure:
|
||||
def test_snapshot_ready_false_on_failure(self):
|
||||
env = _TestableEnv()
|
||||
|
||||
def failing_run_bash(*args, **kwargs):
|
||||
raise RuntimeError("bash not found")
|
||||
|
||||
env._run_bash = failing_run_bash
|
||||
env.init_session()
|
||||
|
||||
assert env._snapshot_ready is False
|
||||
|
||||
def test_login_flag_when_snapshot_not_ready(self):
|
||||
"""When _snapshot_ready=False, execute() should pass login=True to _run_bash."""
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = False
|
||||
|
||||
calls = []
|
||||
def mock_run_bash(cmd, *, login=False, timeout=120, stdin_data=None):
|
||||
calls.append({"login": login})
|
||||
# Return a mock process handle
|
||||
mock = MagicMock()
|
||||
mock.poll.return_value = 0
|
||||
mock.returncode = 0
|
||||
mock.stdout = iter([])
|
||||
return mock
|
||||
|
||||
env._run_bash = mock_run_bash
|
||||
env.execute("echo test")
|
||||
|
||||
assert len(calls) == 1
|
||||
assert calls[0]["login"] is True
|
||||
|
||||
|
||||
class TestCwdMarker:
|
||||
def test_marker_contains_session_id(self):
|
||||
env = _TestableEnv()
|
||||
assert env._session_id in env._cwd_marker
|
||||
|
||||
def test_unique_per_instance(self):
|
||||
env1 = _TestableEnv()
|
||||
env2 = _TestableEnv()
|
||||
assert env1._cwd_marker != env2._cwd_marker
|
||||
|
|
@ -22,21 +22,19 @@ import pytest
|
|||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
|
||||
from tools.environments.local import (
|
||||
LocalEnvironment,
|
||||
_clean_shell_noise,
|
||||
_extract_fenced_output,
|
||||
_OUTPUT_FENCE,
|
||||
_SHELL_NOISE_SUBSTRINGS,
|
||||
)
|
||||
from tools.environments.local import LocalEnvironment
|
||||
from tools.file_operations import ShellFileOperations
|
||||
|
||||
|
||||
# ── Shared noise detection ───────────────────────────────────────────────
|
||||
# Every known shell noise pattern. If ANY of these appear in output that
|
||||
# isn't explicitly expected, the test fails with a clear message.
|
||||
# Known shell noise patterns that should never appear in command output.
|
||||
|
||||
_ALL_NOISE_PATTERNS = list(_SHELL_NOISE_SUBSTRINGS) + [
|
||||
_ALL_NOISE_PATTERNS = [
|
||||
"bash: cannot set terminal process group",
|
||||
"bash: no job control in this shell",
|
||||
"no job control in this shell",
|
||||
"cannot set terminal process group",
|
||||
"tcsetattr: Inappropriate ioctl for device",
|
||||
"bash: ",
|
||||
"Inappropriate ioctl",
|
||||
"Auto-suggestions:",
|
||||
|
|
@ -88,134 +86,6 @@ def populated_dir(tmp_path):
|
|||
return tmp_path
|
||||
|
||||
|
||||
# ── _clean_shell_noise unit tests ────────────────────────────────────────
|
||||
|
||||
class TestCleanShellNoise:
|
||||
def test_single_noise_line(self):
|
||||
output = "bash: no job control in this shell\nhello world\n"
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "hello world\n"
|
||||
|
||||
def test_double_noise_lines(self):
|
||||
output = (
|
||||
"bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
|
||||
"bash: no job control in this shell\n"
|
||||
"actual output here\n"
|
||||
)
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "actual output here\n"
|
||||
_assert_clean(result)
|
||||
|
||||
def test_tcsetattr_noise(self):
|
||||
output = (
|
||||
"bash: [12345: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
|
||||
"real content\n"
|
||||
)
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "real content\n"
|
||||
_assert_clean(result)
|
||||
|
||||
def test_triple_noise_lines(self):
|
||||
output = (
|
||||
"bash: cannot set terminal process group (-1): Inappropriate ioctl for device\n"
|
||||
"bash: no job control in this shell\n"
|
||||
"bash: [999: 2 (255)] tcsetattr: Inappropriate ioctl for device\n"
|
||||
"clean\n"
|
||||
)
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "clean\n"
|
||||
|
||||
def test_no_noise_untouched(self):
|
||||
assert _clean_shell_noise("hello\nworld\n") == "hello\nworld\n"
|
||||
|
||||
def test_empty_string(self):
|
||||
assert _clean_shell_noise("") == ""
|
||||
|
||||
def test_only_noise_produces_empty(self):
|
||||
output = "bash: no job control in this shell\n"
|
||||
result = _clean_shell_noise(output)
|
||||
_assert_clean(result)
|
||||
|
||||
def test_noise_in_middle_not_stripped(self):
|
||||
"""Noise in the middle is real output and should be preserved."""
|
||||
output = "real\nbash: no job control in this shell\nmore real\n"
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == output
|
||||
|
||||
def test_zsh_restored_session(self):
|
||||
output = "Restored session: Mon Mar 2 22:16:54 +03 2026\nhello\n"
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "hello\n"
|
||||
|
||||
def test_zsh_saving_session_trailing(self):
|
||||
output = "hello\nSaving session...completed.\n"
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "hello\n"
|
||||
|
||||
def test_zsh_oh_my_zsh_banner(self):
|
||||
output = "Oh My Zsh on! | Auto-suggestions: press right\nhello\n"
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "hello\n"
|
||||
|
||||
def test_zsh_full_noise_sandwich(self):
|
||||
"""Both leading and trailing zsh noise stripped."""
|
||||
output = (
|
||||
"Restored session: Mon Mar 2\n"
|
||||
"command not found: docker\n"
|
||||
"Oh My Zsh on!\n"
|
||||
"actual output\n"
|
||||
"Saving session...completed.\n"
|
||||
)
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "actual output\n"
|
||||
|
||||
def test_last_login_stripped(self):
|
||||
output = "Last login: Mon Mar 2 22:00:00 on ttys001\nhello\n"
|
||||
result = _clean_shell_noise(output)
|
||||
assert result == "hello\n"
|
||||
|
||||
|
||||
# ── _extract_fenced_output unit tests ────────────────────────────────────
|
||||
|
||||
class TestExtractFencedOutput:
|
||||
def test_normal_fenced_output(self):
|
||||
raw = f"noise\n{_OUTPUT_FENCE}hello world\n{_OUTPUT_FENCE}more noise\n"
|
||||
assert _extract_fenced_output(raw) == "hello world\n"
|
||||
|
||||
def test_no_trailing_newline(self):
|
||||
"""printf output with no trailing newline is preserved."""
|
||||
raw = f"noise{_OUTPUT_FENCE}exact{_OUTPUT_FENCE}noise"
|
||||
assert _extract_fenced_output(raw) == "exact"
|
||||
|
||||
def test_no_fences_falls_back(self):
|
||||
"""Without fences, falls back to pattern-based cleaning."""
|
||||
raw = "bash: no job control in this shell\nhello\n"
|
||||
result = _extract_fenced_output(raw)
|
||||
assert result == "hello\n"
|
||||
|
||||
def test_only_start_fence(self):
|
||||
"""Only start fence (e.g. user command called exit)."""
|
||||
raw = f"noise{_OUTPUT_FENCE}hello\nSaving session...\n"
|
||||
result = _extract_fenced_output(raw)
|
||||
assert result == "hello\n"
|
||||
|
||||
def test_user_outputs_fence_string(self):
|
||||
"""If user command outputs the fence marker, it is preserved."""
|
||||
raw = f"noise{_OUTPUT_FENCE}{_OUTPUT_FENCE}real\n{_OUTPUT_FENCE}noise"
|
||||
result = _extract_fenced_output(raw)
|
||||
# first fence -> last fence captures the middle including user's fence
|
||||
assert _OUTPUT_FENCE in result
|
||||
assert "real\n" in result
|
||||
|
||||
def test_empty_command_output(self):
|
||||
raw = f"noise{_OUTPUT_FENCE}{_OUTPUT_FENCE}noise"
|
||||
assert _extract_fenced_output(raw) == ""
|
||||
|
||||
def test_multiline_output(self):
|
||||
raw = f"noise\n{_OUTPUT_FENCE}line1\nline2\nline3\n{_OUTPUT_FENCE}noise\n"
|
||||
assert _extract_fenced_output(raw) == "line1\nline2\nline3\n"
|
||||
|
||||
|
||||
# ── LocalEnvironment.execute() ───────────────────────────────────────────
|
||||
|
||||
class TestLocalEnvironmentExecute:
|
||||
|
|
|
|||
|
|
@ -1,164 +0,0 @@
|
|||
"""Tests for the local persistent shell backend."""
|
||||
|
||||
import glob as glob_mod
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.environments.local import LocalEnvironment
|
||||
from tools.environments.persistent_shell import PersistentShellMixin
|
||||
|
||||
|
||||
class TestLocalConfig:
|
||||
def test_local_persistent_default_false(self, monkeypatch):
|
||||
monkeypatch.delenv("TERMINAL_LOCAL_PERSISTENT", raising=False)
|
||||
from tools.terminal_tool import _get_env_config
|
||||
assert _get_env_config()["local_persistent"] is False
|
||||
|
||||
def test_local_persistent_true(self, monkeypatch):
|
||||
monkeypatch.setenv("TERMINAL_LOCAL_PERSISTENT", "true")
|
||||
from tools.terminal_tool import _get_env_config
|
||||
assert _get_env_config()["local_persistent"] is True
|
||||
|
||||
def test_local_persistent_yes(self, monkeypatch):
|
||||
monkeypatch.setenv("TERMINAL_LOCAL_PERSISTENT", "yes")
|
||||
from tools.terminal_tool import _get_env_config
|
||||
assert _get_env_config()["local_persistent"] is True
|
||||
|
||||
|
||||
class TestMergeOutput:
|
||||
def test_stdout_only(self):
|
||||
assert PersistentShellMixin._merge_output("out", "") == "out"
|
||||
|
||||
def test_stderr_only(self):
|
||||
assert PersistentShellMixin._merge_output("", "err") == "err"
|
||||
|
||||
def test_both(self):
|
||||
assert PersistentShellMixin._merge_output("out", "err") == "out\nerr"
|
||||
|
||||
def test_empty(self):
|
||||
assert PersistentShellMixin._merge_output("", "") == ""
|
||||
|
||||
def test_strips_trailing_newlines(self):
|
||||
assert PersistentShellMixin._merge_output("out\n\n", "err\n") == "out\nerr"
|
||||
|
||||
|
||||
class TestLocalOneShotRegression:
|
||||
def test_echo(self):
|
||||
env = LocalEnvironment(persistent=False)
|
||||
r = env.execute("echo hello")
|
||||
assert r["returncode"] == 0
|
||||
assert "hello" in r["output"]
|
||||
env.cleanup()
|
||||
|
||||
def test_exit_code(self):
|
||||
env = LocalEnvironment(persistent=False)
|
||||
r = env.execute("exit 42")
|
||||
assert r["returncode"] == 42
|
||||
env.cleanup()
|
||||
|
||||
def test_state_does_not_persist(self):
|
||||
env = LocalEnvironment(persistent=False)
|
||||
env.execute("export HERMES_ONESHOT_LOCAL=yes")
|
||||
r = env.execute("echo $HERMES_ONESHOT_LOCAL")
|
||||
assert r["output"].strip() == ""
|
||||
env.cleanup()
|
||||
|
||||
def test_oneshot_heredoc_does_not_leak_fence_wrapper(self):
|
||||
"""Heredoc closing line must not be merged with the fence wrapper tail."""
|
||||
env = LocalEnvironment(persistent=False)
|
||||
cmd = "cat <<'H_EOF'\nheredoc body line\nH_EOF"
|
||||
r = env.execute(cmd)
|
||||
env.cleanup()
|
||||
assert r["returncode"] == 0
|
||||
assert "heredoc body line" in r["output"]
|
||||
assert "__hermes_rc" not in r["output"]
|
||||
assert "printf '" not in r["output"]
|
||||
assert "exit $" not in r["output"]
|
||||
|
||||
|
||||
class TestLocalPersistent:
|
||||
@pytest.fixture
|
||||
def env(self):
|
||||
e = LocalEnvironment(persistent=True)
|
||||
yield e
|
||||
e.cleanup()
|
||||
|
||||
def test_echo(self, env):
|
||||
r = env.execute("echo hello-persistent")
|
||||
assert r["returncode"] == 0
|
||||
assert "hello-persistent" in r["output"]
|
||||
|
||||
def test_env_var_persists(self, env):
|
||||
env.execute("export HERMES_LOCAL_PERSIST_TEST=works")
|
||||
r = env.execute("echo $HERMES_LOCAL_PERSIST_TEST")
|
||||
assert r["output"].strip() == "works"
|
||||
|
||||
def test_cwd_persists(self, env):
|
||||
env.execute("cd /tmp")
|
||||
r = env.execute("pwd")
|
||||
assert r["output"].strip() == "/tmp"
|
||||
|
||||
def test_exit_code(self, env):
|
||||
r = env.execute("(exit 42)")
|
||||
assert r["returncode"] == 42
|
||||
|
||||
def test_stderr(self, env):
|
||||
r = env.execute("echo oops >&2")
|
||||
assert r["returncode"] == 0
|
||||
assert "oops" in r["output"]
|
||||
|
||||
def test_multiline_output(self, env):
|
||||
r = env.execute("echo a; echo b; echo c")
|
||||
lines = r["output"].strip().splitlines()
|
||||
assert lines == ["a", "b", "c"]
|
||||
|
||||
def test_timeout_then_recovery(self, env):
|
||||
r = env.execute("sleep 999", timeout=2)
|
||||
assert r["returncode"] in (124, 130)
|
||||
r = env.execute("echo alive")
|
||||
assert r["returncode"] == 0
|
||||
assert "alive" in r["output"]
|
||||
|
||||
def test_large_output(self, env):
|
||||
r = env.execute("seq 1 1000")
|
||||
assert r["returncode"] == 0
|
||||
lines = r["output"].strip().splitlines()
|
||||
assert len(lines) == 1000
|
||||
assert lines[0] == "1"
|
||||
assert lines[-1] == "1000"
|
||||
|
||||
def test_shell_variable_persists(self, env):
|
||||
env.execute("MY_LOCAL_VAR=hello123")
|
||||
r = env.execute("echo $MY_LOCAL_VAR")
|
||||
assert r["output"].strip() == "hello123"
|
||||
|
||||
def test_cleanup_removes_temp_files(self, env):
|
||||
env.execute("echo warmup")
|
||||
prefix = env._temp_prefix
|
||||
assert len(glob_mod.glob(f"{prefix}-*")) > 0
|
||||
env.cleanup()
|
||||
remaining = glob_mod.glob(f"{prefix}-*")
|
||||
assert remaining == []
|
||||
|
||||
def test_state_does_not_leak_between_instances(self):
|
||||
env1 = LocalEnvironment(persistent=True)
|
||||
env2 = LocalEnvironment(persistent=True)
|
||||
try:
|
||||
env1.execute("export LEAK_TEST=from_env1")
|
||||
r = env2.execute("echo $LEAK_TEST")
|
||||
assert r["output"].strip() == ""
|
||||
finally:
|
||||
env1.cleanup()
|
||||
env2.cleanup()
|
||||
|
||||
def test_special_characters_in_command(self, env):
|
||||
r = env.execute("echo 'hello world'")
|
||||
assert r["output"].strip() == "hello world"
|
||||
|
||||
def test_pipe_command(self, env):
|
||||
r = env.execute("echo hello | tr 'h' 'H'")
|
||||
assert r["output"].strip() == "Hello"
|
||||
|
||||
def test_multiple_commands_semicolon(self, env):
|
||||
r = env.execute("X=42; echo $X")
|
||||
assert r["output"].strip() == "42"
|
||||
|
|
@ -110,7 +110,7 @@ class _FakeResponse:
|
|||
def test_managed_modal_execute_polls_until_completed(monkeypatch):
|
||||
_install_fake_tools_package()
|
||||
managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py")
|
||||
modal_common = sys.modules["tools.environments.modal_common"]
|
||||
modal_common = sys.modules["tools.environments.modal_utils"]
|
||||
|
||||
calls = []
|
||||
poll_count = {"value": 0}
|
||||
|
|
@ -173,7 +173,7 @@ def test_managed_modal_create_sends_a_stable_idempotency_key(monkeypatch):
|
|||
def test_managed_modal_execute_cancels_on_interrupt(monkeypatch):
|
||||
interrupt_event = _install_fake_tools_package()
|
||||
managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py")
|
||||
modal_common = sys.modules["tools.environments.modal_common"]
|
||||
modal_common = sys.modules["tools.environments.modal_utils"]
|
||||
|
||||
calls = []
|
||||
|
||||
|
|
@ -215,7 +215,7 @@ def test_managed_modal_execute_cancels_on_interrupt(monkeypatch):
|
|||
def test_managed_modal_execute_returns_descriptive_error_on_missing_exec(monkeypatch):
|
||||
_install_fake_tools_package()
|
||||
managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py")
|
||||
modal_common = sys.modules["tools.environments.modal_common"]
|
||||
modal_common = sys.modules["tools.environments.modal_utils"]
|
||||
|
||||
def fake_request(method, url, headers=None, json=None, timeout=None):
|
||||
if method == "POST" and url.endswith("/v1/sandboxes"):
|
||||
|
|
@ -293,7 +293,7 @@ def test_managed_modal_rejects_host_credential_passthrough():
|
|||
def test_managed_modal_execute_times_out_and_cancels(monkeypatch):
|
||||
_install_fake_tools_package()
|
||||
managed_modal = _load_tool_module("tools.environments.managed_modal", "environments/managed_modal.py")
|
||||
modal_common = sys.modules["tools.environments.modal_common"]
|
||||
modal_common = sys.modules["tools.environments.modal_utils"]
|
||||
|
||||
calls = []
|
||||
monotonic_values = iter([0.0, 12.5])
|
||||
|
|
|
|||
144
tests/tools/test_threaded_process_handle.py
Normal file
144
tests/tools/test_threaded_process_handle.py
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
"""Tests for _ThreadedProcessHandle — the adapter for SDK backends."""
|
||||
|
||||
import threading
|
||||
import time
|
||||
|
||||
from tools.environments.base import _ThreadedProcessHandle
|
||||
|
||||
|
||||
class TestBasicExecution:
|
||||
def test_successful_execution(self):
|
||||
def exec_fn():
|
||||
return ("hello world", 0)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn)
|
||||
handle.wait(timeout=5)
|
||||
|
||||
assert handle.returncode == 0
|
||||
output = handle.stdout.read()
|
||||
assert "hello world" in output
|
||||
|
||||
def test_nonzero_exit_code(self):
|
||||
def exec_fn():
|
||||
return ("error occurred", 42)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn)
|
||||
handle.wait(timeout=5)
|
||||
|
||||
assert handle.returncode == 42
|
||||
output = handle.stdout.read()
|
||||
assert "error occurred" in output
|
||||
|
||||
def test_exception_in_exec_fn(self):
|
||||
def exec_fn():
|
||||
raise RuntimeError("boom")
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn)
|
||||
handle.wait(timeout=5)
|
||||
|
||||
assert handle.returncode == 1
|
||||
|
||||
def test_empty_output(self):
|
||||
def exec_fn():
|
||||
return ("", 0)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn)
|
||||
handle.wait(timeout=5)
|
||||
|
||||
assert handle.returncode == 0
|
||||
output = handle.stdout.read()
|
||||
assert output == ""
|
||||
|
||||
|
||||
class TestPolling:
|
||||
def test_poll_returns_none_while_running(self):
|
||||
event = threading.Event()
|
||||
|
||||
def exec_fn():
|
||||
event.wait(timeout=5)
|
||||
return ("done", 0)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn)
|
||||
assert handle.poll() is None
|
||||
|
||||
event.set()
|
||||
handle.wait(timeout=5)
|
||||
assert handle.poll() == 0
|
||||
|
||||
def test_poll_returns_returncode_when_done(self):
|
||||
def exec_fn():
|
||||
return ("ok", 0)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn)
|
||||
handle.wait(timeout=5)
|
||||
assert handle.poll() == 0
|
||||
|
||||
|
||||
class TestCancelFn:
|
||||
def test_cancel_fn_called_on_kill(self):
|
||||
called = threading.Event()
|
||||
|
||||
def cancel():
|
||||
called.set()
|
||||
|
||||
def exec_fn():
|
||||
time.sleep(10)
|
||||
return ("", 0)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn, cancel_fn=cancel)
|
||||
handle.kill()
|
||||
assert called.is_set()
|
||||
|
||||
def test_cancel_fn_none_is_safe(self):
|
||||
def exec_fn():
|
||||
return ("ok", 0)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn, cancel_fn=None)
|
||||
handle.kill() # should not raise
|
||||
handle.wait(timeout=5)
|
||||
assert handle.returncode == 0
|
||||
|
||||
def test_cancel_fn_exception_swallowed(self):
|
||||
def cancel():
|
||||
raise RuntimeError("cancel failed")
|
||||
|
||||
def exec_fn():
|
||||
return ("ok", 0)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn, cancel_fn=cancel)
|
||||
handle.kill() # should not raise despite cancel raising
|
||||
handle.wait(timeout=5)
|
||||
|
||||
|
||||
class TestStdoutPipe:
|
||||
def test_stdout_is_readable(self):
|
||||
def exec_fn():
|
||||
return ("line1\nline2\nline3\n", 0)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn)
|
||||
handle.wait(timeout=5)
|
||||
|
||||
lines = handle.stdout.readlines()
|
||||
assert len(lines) == 3
|
||||
assert lines[0] == "line1\n"
|
||||
|
||||
def test_stdout_iterable(self):
|
||||
def exec_fn():
|
||||
return ("a\nb\nc\n", 0)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn)
|
||||
handle.wait(timeout=5)
|
||||
|
||||
collected = list(handle.stdout)
|
||||
assert len(collected) == 3
|
||||
|
||||
def test_unicode_output(self):
|
||||
def exec_fn():
|
||||
return ("hello 世界 🌍\n", 0)
|
||||
|
||||
handle = _ThreadedProcessHandle(exec_fn)
|
||||
handle.wait(timeout=5)
|
||||
|
||||
output = handle.stdout.read()
|
||||
assert "世界" in output
|
||||
assert "🌍" in output
|
||||
Loading…
Add table
Add a link
Reference in a new issue