fix: update tests for unified spawn-per-call execution model

- Docker env tests: verify _build_init_env_args() instead of per-execute
  Popen flags (env forwarding is now init-time only)
- Docker: preserve explicit forward_env bypass of blocklist from main
- Daytona tests: adapt to SDK-native timeout, _ThreadedProcessHandle,
  base.py interrupt handling, HERMES_STDIN_ heredoc prefix
- Modal tests: fix _load_module to include _ThreadedProcessHandle stub,
  check ensurepip in _resolve_modal_image instead of __init__
- SSH tests: mock time.sleep on base module instead of removed ssh import
- Add missing BaseEnvironment attributes to __new__()-based test fixtures
This commit is contained in:
Teknium 2026-04-08 17:17:41 -07:00 committed by Teknium
parent d684d7ee7e
commit e19252afc4
5 changed files with 120 additions and 84 deletions

View file

@ -59,8 +59,8 @@ def daytona_sdk(monkeypatch):
@pytest.fixture()
def make_env(daytona_sdk, monkeypatch):
"""Factory that creates a DaytonaEnvironment with a mocked SDK."""
# Prevent is_interrupted from interfering
monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False)
# Prevent is_interrupted from interfering — patch where it's used (base.py)
monkeypatch.setattr("tools.environments.base.is_interrupted", lambda: False)
# Prevent skills/credential sync from consuming mock exec calls
monkeypatch.setattr("tools.credential_files.get_credential_file_mounts", lambda: [])
monkeypatch.setattr("tools.credential_files.get_skills_directory_mount", lambda **kw: None)
@ -221,41 +221,45 @@ class TestCleanup:
class TestExecute:
def test_basic_command(self, make_env):
sb = _make_sandbox()
# First call: $HOME detection; subsequent calls: actual commands
# Calls: (1) $HOME detection, (2) init_session bootstrap, (3) actual command
sb.process.exec.side_effect = [
_make_exec_response(result="/root"), # $HOME
_make_exec_response(result="", exit_code=0), # init_session
_make_exec_response(result="hello", exit_code=0), # actual cmd
]
sb.state = "started"
env = make_env(sandbox=sb)
result = env.execute("echo hello")
assert result["output"] == "hello"
assert "hello" in result["output"]
assert result["returncode"] == 0
def test_command_wrapped_with_shell_timeout(self, make_env):
def test_sdk_timeout_passed_to_exec(self, make_env):
"""SDK native timeout is passed to sandbox.process.exec()."""
sb = _make_sandbox()
sb.process.exec.side_effect = [
_make_exec_response(result="/root"),
_make_exec_response(result="", exit_code=0), # init_session
_make_exec_response(result="ok", exit_code=0),
]
sb.state = "started"
env = make_env(sandbox=sb, timeout=42)
env.execute("echo hello")
# The command sent to exec should be wrapped with `timeout N sh -c '...'`
# The exec call should receive timeout= kwarg (SDK native timeout)
call_args = sb.process.exec.call_args_list[-1]
assert call_args[1]["timeout"] == 42
# The command should NOT have a shell `timeout` prefix
cmd = call_args[0][0]
assert cmd.startswith("timeout 42 sh -c ")
# SDK timeout param should NOT be passed
assert "timeout" not in call_args[1]
assert not cmd.startswith("timeout ")
def test_timeout_returns_exit_code_124(self, make_env):
"""Shell timeout utility returns exit code 124."""
"""SDK-level timeout surfaces as exit code 124 via _wait_for_process."""
sb = _make_sandbox()
sb.process.exec.side_effect = [
_make_exec_response(result="/root"),
_make_exec_response(result="", exit_code=124),
_make_exec_response(result="", exit_code=0), # init_session
_make_exec_response(result="", exit_code=124), # actual cmd
]
sb.state = "started"
env = make_env(sandbox=sb)
@ -267,6 +271,7 @@ class TestExecute:
sb = _make_sandbox()
sb.process.exec.side_effect = [
_make_exec_response(result="/root"),
_make_exec_response(result="", exit_code=0), # init_session
_make_exec_response(result="not found", exit_code=127),
]
sb.state = "started"
@ -279,6 +284,7 @@ class TestExecute:
sb = _make_sandbox()
sb.process.exec.side_effect = [
_make_exec_response(result="/root"),
_make_exec_response(result="", exit_code=0), # init_session
_make_exec_response(result="ok", exit_code=0),
]
sb.state = "started"
@ -286,39 +292,47 @@ class TestExecute:
env.execute("python3", stdin_data="print('hi')")
# Check that the command passed to exec contains heredoc markers
# (single quotes get shell-escaped by shlex.quote, so check components)
# Base class uses HERMES_STDIN_ prefix for heredoc delimiters
call_args = sb.process.exec.call_args_list[-1]
cmd = call_args[0][0]
assert "HERMES_EOF_" in cmd
assert "HERMES_STDIN_" in cmd
assert "print" in cmd
assert "hi" in cmd
def test_custom_cwd_passed_through(self, make_env):
def test_custom_cwd_in_command_wrapper(self, make_env):
"""CWD is handled by _wrap_command() in the command string, not as a kwarg."""
sb = _make_sandbox()
sb.process.exec.side_effect = [
_make_exec_response(result="/root"),
_make_exec_response(result="", exit_code=0), # init_session
_make_exec_response(result="/tmp", exit_code=0),
]
sb.state = "started"
env = make_env(sandbox=sb)
env.execute("pwd", cwd="/tmp")
call_kwargs = sb.process.exec.call_args_list[-1][1]
assert call_kwargs["cwd"] == "/tmp"
# CWD should be embedded in the command string via _wrap_command
call_args = sb.process.exec.call_args_list[-1]
cmd = call_args[0][0]
assert "cd /tmp" in cmd
# CWD should NOT be passed as a kwarg to exec
assert "cwd" not in call_args[1]
def test_daytona_error_triggers_retry(self, make_env, daytona_sdk):
sb = _make_sandbox()
sb.state = "started"
sb.process.exec.side_effect = [
_make_exec_response(result="/root"), # $HOME
_make_exec_response(result="", exit_code=0), # init_session
daytona_sdk.DaytonaError("transient"), # first attempt fails
_make_exec_response(result="ok", exit_code=0), # retry succeeds
]
env = make_env(sandbox=sb)
result = env.execute("echo retry")
assert result["output"] == "ok"
assert result["returncode"] == 0
# DaytonaError now surfaces directly through _ThreadedProcessHandle
# (no retry logic) — the error becomes returncode=1
assert result["returncode"] == 1
# ---------------------------------------------------------------------------
@ -359,14 +373,18 @@ class TestInterrupt:
calls["n"] += 1
if calls["n"] == 1:
return _make_exec_response(result="/root") # $HOME detection
if calls["n"] == 2:
return _make_exec_response(result="", exit_code=0) # init_session
event.wait(timeout=5) # simulate long-running command
return _make_exec_response(result="done", exit_code=0)
sb.process.exec.side_effect = exec_side_effect
env = make_env(sandbox=sb)
# is_interrupted is checked by base.py's _wait_for_process,
# patch where it's actually referenced (base.py's local binding)
monkeypatch.setattr(
"tools.environments.daytona.is_interrupted", lambda: True
"tools.environments.base.is_interrupted", lambda: True
)
try:
result = env.execute("sleep 10")
@ -377,23 +395,24 @@ class TestInterrupt:
# ---------------------------------------------------------------------------
# Retry exhaustion
# DaytonaError surfaces directly (no retry)
# ---------------------------------------------------------------------------
class TestRetryExhausted:
def test_both_attempts_fail(self, make_env, daytona_sdk):
"""DaytonaError surfaces directly as rc=1 (retry logic was removed)."""
sb = _make_sandbox()
sb.state = "started"
sb.process.exec.side_effect = [
_make_exec_response(result="/root"), # $HOME
daytona_sdk.DaytonaError("fail1"), # first attempt
daytona_sdk.DaytonaError("fail2"), # retry
_make_exec_response(result="", exit_code=0), # init_session
daytona_sdk.DaytonaError("fail1"), # actual command fails
]
env = make_env(sandbox=sb)
result = env.execute("echo x")
# Error surfaces directly through _ThreadedProcessHandle (rc=1)
assert result["returncode"] == 1
assert "Daytona execution error" in result["output"]
# ---------------------------------------------------------------------------

View file

@ -245,43 +245,42 @@ def _make_execute_only_env(forward_env=None):
env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124}
env._container_id = "test-container"
env._docker_exe = "/usr/bin/docker"
# Base class attributes needed by unified execute()
env._session_id = "test123"
env._snapshot_path = "/tmp/hermes-snap-test123.sh"
env._cwd_file = "/tmp/hermes-cwd-test123.txt"
env._cwd_marker = "__HERMES_CWD_test123__"
env._snapshot_ready = True
env._last_sync_time = None
env._init_env_args = []
return env
def test_execute_uses_hermes_dotenv_for_allowlisted_env(monkeypatch):
def test_init_env_args_uses_hermes_dotenv_for_allowlisted_env(monkeypatch):
"""_build_init_env_args picks up forwarded env vars from .env file at init time."""
env = _make_execute_only_env(["GITHUB_TOKEN"])
popen_calls = []
def _fake_popen(cmd, **kwargs):
popen_calls.append(cmd)
return _FakePopen(cmd, **kwargs)
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"GITHUB_TOKEN": "value_from_dotenv"})
monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen)
result = env.execute("echo hi")
args = env._build_init_env_args()
args_str = " ".join(args)
assert result["returncode"] == 0
assert "GITHUB_TOKEN=value_from_dotenv" in popen_calls[0]
assert "GITHUB_TOKEN=value_from_dotenv" in args_str
def test_execute_prefers_shell_env_over_hermes_dotenv(monkeypatch):
def test_init_env_args_prefers_shell_env_over_hermes_dotenv(monkeypatch):
"""Shell env vars take priority over .env file values in init env args."""
env = _make_execute_only_env(["GITHUB_TOKEN"])
popen_calls = []
def _fake_popen(cmd, **kwargs):
popen_calls.append(cmd)
return _FakePopen(cmd, **kwargs)
monkeypatch.setenv("GITHUB_TOKEN", "value_from_shell")
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"GITHUB_TOKEN": "value_from_dotenv"})
monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen)
env.execute("echo hi")
args = env._build_init_env_args()
args_str = " ".join(args)
assert "GITHUB_TOKEN=value_from_shell" in popen_calls[0]
assert "GITHUB_TOKEN=value_from_dotenv" not in popen_calls[0]
assert "GITHUB_TOKEN=value_from_shell" in args_str
assert "value_from_dotenv" not in args_str
# ── docker_env tests ──────────────────────────────────────────────
@ -302,64 +301,46 @@ def test_docker_env_appears_in_run_command(monkeypatch):
assert "GNUPGHOME=/root/.gnupg" in run_args_str
def test_docker_env_appears_in_exec_command(monkeypatch):
"""Explicit docker_env values should also be passed via -e at docker exec time."""
def test_docker_env_appears_in_init_env_args(monkeypatch):
"""Explicit docker_env values should appear in _build_init_env_args."""
env = _make_execute_only_env()
env._env = {"MY_VAR": "my_value"}
popen_calls = []
def _fake_popen(cmd, **kwargs):
popen_calls.append(cmd)
return _FakePopen(cmd, **kwargs)
args = env._build_init_env_args()
args_str = " ".join(args)
monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen)
env.execute("echo hi")
assert popen_calls, "Popen should have been called"
assert "MY_VAR=my_value" in popen_calls[0]
assert "MY_VAR=my_value" in args_str
def test_forward_env_overrides_docker_env(monkeypatch):
def test_forward_env_overrides_docker_env_in_init_args(monkeypatch):
"""docker_forward_env should override docker_env for the same key."""
env = _make_execute_only_env(forward_env=["MY_KEY"])
env._env = {"MY_KEY": "static_value"}
popen_calls = []
def _fake_popen(cmd, **kwargs):
popen_calls.append(cmd)
return _FakePopen(cmd, **kwargs)
monkeypatch.setenv("MY_KEY", "dynamic_value")
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {})
monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen)
env.execute("echo hi")
args = env._build_init_env_args()
args_str = " ".join(args)
cmd_str = " ".join(popen_calls[0])
assert "MY_KEY=dynamic_value" in cmd_str
assert "MY_KEY=static_value" not in cmd_str
assert "MY_KEY=dynamic_value" in args_str
assert "MY_KEY=static_value" not in args_str
def test_docker_env_and_forward_env_merge(monkeypatch):
def test_docker_env_and_forward_env_merge_in_init_args(monkeypatch):
"""docker_env and docker_forward_env with different keys should both appear."""
env = _make_execute_only_env(forward_env=["TOKEN"])
env._env = {"SSH_AUTH_SOCK": "/run/user/1000/agent.sock"}
popen_calls = []
def _fake_popen(cmd, **kwargs):
popen_calls.append(cmd)
return _FakePopen(cmd, **kwargs)
monkeypatch.setenv("TOKEN", "secret123")
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {})
monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen)
env.execute("echo hi")
args = env._build_init_env_args()
args_str = " ".join(args)
assert "SSH_AUTH_SOCK=/run/user/1000/agent.sock" in args_str
assert "TOKEN=secret123" in args_str
cmd_str = " ".join(popen_calls[0])
assert "SSH_AUTH_SOCK=/run/user/1000/agent.sock" in cmd_str
assert "TOKEN=secret123" in cmd_str
def test_normalize_env_dict_filters_invalid_keys():

View file

@ -231,20 +231,20 @@ class TestEnsurepipFix:
"""Verify the pip fix is applied in the ModalEnvironment init."""
def test_modal_environment_creates_image_with_setup_commands(self):
"""ModalEnvironment.__init__ should create a modal.Image with pip fix."""
"""_resolve_modal_image should create a modal.Image with pip fix."""
try:
from tools.environments.modal import ModalEnvironment
from tools.environments.modal import _resolve_modal_image
except ImportError:
pytest.skip("tools.environments.modal not importable")
import inspect
source = inspect.getsource(ModalEnvironment.__init__)
source = inspect.getsource(_resolve_modal_image)
assert "ensurepip" in source, (
"ModalEnvironment should include ensurepip fix "
"_resolve_modal_image should include ensurepip fix "
"for Modal's legacy image builder"
)
assert "setup_dockerfile_commands" in source, (
"ModalEnvironment should use setup_dockerfile_commands "
"_resolve_modal_image should use setup_dockerfile_commands "
"to fix pip before Modal's bootstrap"
)

View file

@ -85,11 +85,47 @@ def _install_modal_test_modules(
def _prepare_command(self, command: str):
return command, None
sys.modules["tools.environments.base"] = types.SimpleNamespace(BaseEnvironment=_DummyBaseEnvironment)
def init_session(self):
pass
# Stub _ThreadedProcessHandle: modal.py imports it but only uses it at
# runtime inside _run_bash; the snapshot-isolation tests never call _run_bash,
# so a class placeholder is sufficient.
class _DummyThreadedProcessHandle:
def __init__(self, exec_fn, cancel_fn=None):
pass
def _load_json_store(path):
if path.exists():
try:
return json.loads(path.read_text())
except Exception:
pass
return {}
def _save_json_store(path, data):
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(data, indent=2))
def _file_mtime_key(host_path):
try:
st = Path(host_path).stat()
return (st.st_mtime, st.st_size)
except OSError:
return None
sys.modules["tools.environments.base"] = types.SimpleNamespace(
BaseEnvironment=_DummyBaseEnvironment,
_ThreadedProcessHandle=_DummyThreadedProcessHandle,
_load_json_store=_load_json_store,
_save_json_store=_save_json_store,
_file_mtime_key=_file_mtime_key,
)
sys.modules["tools.interrupt"] = types.SimpleNamespace(is_interrupted=lambda: False)
sys.modules["tools.credential_files"] = types.SimpleNamespace(
get_credential_file_mounts=lambda: [],
iter_skills_files=lambda: [],
iter_cache_files=lambda: [],
)
from_id_calls: list[str] = []

View file

@ -43,7 +43,7 @@ class TestBuildSSHCommand:
lambda *a, **k: MagicMock(stdout=iter([]),
stderr=iter([]),
stdin=MagicMock()))
monkeypatch.setattr("tools.environments.ssh.time.sleep", lambda _: None)
monkeypatch.setattr("tools.environments.base.time.sleep", lambda _: None)
def test_base_flags(self):
env = SSHEnvironment(host="h", user="u")