mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: update tests for unified spawn-per-call execution model
- Docker env tests: verify _build_init_env_args() instead of per-execute Popen flags (env forwarding is now init-time only) - Docker: preserve explicit forward_env bypass of blocklist from main - Daytona tests: adapt to SDK-native timeout, _ThreadedProcessHandle, base.py interrupt handling, HERMES_STDIN_ heredoc prefix - Modal tests: fix _load_module to include _ThreadedProcessHandle stub, check ensurepip in _resolve_modal_image instead of __init__ - SSH tests: mock time.sleep on base module instead of removed ssh import - Add missing BaseEnvironment attributes to __new__()-based test fixtures
This commit is contained in:
parent
d684d7ee7e
commit
e19252afc4
5 changed files with 120 additions and 84 deletions
|
|
@ -59,8 +59,8 @@ def daytona_sdk(monkeypatch):
|
|||
@pytest.fixture()
|
||||
def make_env(daytona_sdk, monkeypatch):
|
||||
"""Factory that creates a DaytonaEnvironment with a mocked SDK."""
|
||||
# Prevent is_interrupted from interfering
|
||||
monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False)
|
||||
# Prevent is_interrupted from interfering — patch where it's used (base.py)
|
||||
monkeypatch.setattr("tools.environments.base.is_interrupted", lambda: False)
|
||||
# Prevent skills/credential sync from consuming mock exec calls
|
||||
monkeypatch.setattr("tools.credential_files.get_credential_file_mounts", lambda: [])
|
||||
monkeypatch.setattr("tools.credential_files.get_skills_directory_mount", lambda **kw: None)
|
||||
|
|
@ -221,41 +221,45 @@ class TestCleanup:
|
|||
class TestExecute:
|
||||
def test_basic_command(self, make_env):
|
||||
sb = _make_sandbox()
|
||||
# First call: $HOME detection; subsequent calls: actual commands
|
||||
# Calls: (1) $HOME detection, (2) init_session bootstrap, (3) actual command
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"), # $HOME
|
||||
_make_exec_response(result="", exit_code=0), # init_session
|
||||
_make_exec_response(result="hello", exit_code=0), # actual cmd
|
||||
]
|
||||
sb.state = "started"
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
result = env.execute("echo hello")
|
||||
assert result["output"] == "hello"
|
||||
assert "hello" in result["output"]
|
||||
assert result["returncode"] == 0
|
||||
|
||||
def test_command_wrapped_with_shell_timeout(self, make_env):
|
||||
def test_sdk_timeout_passed_to_exec(self, make_env):
|
||||
"""SDK native timeout is passed to sandbox.process.exec()."""
|
||||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"),
|
||||
_make_exec_response(result="", exit_code=0), # init_session
|
||||
_make_exec_response(result="ok", exit_code=0),
|
||||
]
|
||||
sb.state = "started"
|
||||
env = make_env(sandbox=sb, timeout=42)
|
||||
|
||||
env.execute("echo hello")
|
||||
# The command sent to exec should be wrapped with `timeout N sh -c '...'`
|
||||
# The exec call should receive timeout= kwarg (SDK native timeout)
|
||||
call_args = sb.process.exec.call_args_list[-1]
|
||||
assert call_args[1]["timeout"] == 42
|
||||
# The command should NOT have a shell `timeout` prefix
|
||||
cmd = call_args[0][0]
|
||||
assert cmd.startswith("timeout 42 sh -c ")
|
||||
# SDK timeout param should NOT be passed
|
||||
assert "timeout" not in call_args[1]
|
||||
assert not cmd.startswith("timeout ")
|
||||
|
||||
def test_timeout_returns_exit_code_124(self, make_env):
|
||||
"""Shell timeout utility returns exit code 124."""
|
||||
"""SDK-level timeout surfaces as exit code 124 via _wait_for_process."""
|
||||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"),
|
||||
_make_exec_response(result="", exit_code=124),
|
||||
_make_exec_response(result="", exit_code=0), # init_session
|
||||
_make_exec_response(result="", exit_code=124), # actual cmd
|
||||
]
|
||||
sb.state = "started"
|
||||
env = make_env(sandbox=sb)
|
||||
|
|
@ -267,6 +271,7 @@ class TestExecute:
|
|||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"),
|
||||
_make_exec_response(result="", exit_code=0), # init_session
|
||||
_make_exec_response(result="not found", exit_code=127),
|
||||
]
|
||||
sb.state = "started"
|
||||
|
|
@ -279,6 +284,7 @@ class TestExecute:
|
|||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"),
|
||||
_make_exec_response(result="", exit_code=0), # init_session
|
||||
_make_exec_response(result="ok", exit_code=0),
|
||||
]
|
||||
sb.state = "started"
|
||||
|
|
@ -286,39 +292,47 @@ class TestExecute:
|
|||
|
||||
env.execute("python3", stdin_data="print('hi')")
|
||||
# Check that the command passed to exec contains heredoc markers
|
||||
# (single quotes get shell-escaped by shlex.quote, so check components)
|
||||
# Base class uses HERMES_STDIN_ prefix for heredoc delimiters
|
||||
call_args = sb.process.exec.call_args_list[-1]
|
||||
cmd = call_args[0][0]
|
||||
assert "HERMES_EOF_" in cmd
|
||||
assert "HERMES_STDIN_" in cmd
|
||||
assert "print" in cmd
|
||||
assert "hi" in cmd
|
||||
|
||||
def test_custom_cwd_passed_through(self, make_env):
|
||||
def test_custom_cwd_in_command_wrapper(self, make_env):
|
||||
"""CWD is handled by _wrap_command() in the command string, not as a kwarg."""
|
||||
sb = _make_sandbox()
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"),
|
||||
_make_exec_response(result="", exit_code=0), # init_session
|
||||
_make_exec_response(result="/tmp", exit_code=0),
|
||||
]
|
||||
sb.state = "started"
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
env.execute("pwd", cwd="/tmp")
|
||||
call_kwargs = sb.process.exec.call_args_list[-1][1]
|
||||
assert call_kwargs["cwd"] == "/tmp"
|
||||
# CWD should be embedded in the command string via _wrap_command
|
||||
call_args = sb.process.exec.call_args_list[-1]
|
||||
cmd = call_args[0][0]
|
||||
assert "cd /tmp" in cmd
|
||||
# CWD should NOT be passed as a kwarg to exec
|
||||
assert "cwd" not in call_args[1]
|
||||
|
||||
def test_daytona_error_triggers_retry(self, make_env, daytona_sdk):
|
||||
sb = _make_sandbox()
|
||||
sb.state = "started"
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"), # $HOME
|
||||
_make_exec_response(result="", exit_code=0), # init_session
|
||||
daytona_sdk.DaytonaError("transient"), # first attempt fails
|
||||
_make_exec_response(result="ok", exit_code=0), # retry succeeds
|
||||
]
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
result = env.execute("echo retry")
|
||||
assert result["output"] == "ok"
|
||||
assert result["returncode"] == 0
|
||||
# DaytonaError now surfaces directly through _ThreadedProcessHandle
|
||||
# (no retry logic) — the error becomes returncode=1
|
||||
assert result["returncode"] == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -359,14 +373,18 @@ class TestInterrupt:
|
|||
calls["n"] += 1
|
||||
if calls["n"] == 1:
|
||||
return _make_exec_response(result="/root") # $HOME detection
|
||||
if calls["n"] == 2:
|
||||
return _make_exec_response(result="", exit_code=0) # init_session
|
||||
event.wait(timeout=5) # simulate long-running command
|
||||
return _make_exec_response(result="done", exit_code=0)
|
||||
|
||||
sb.process.exec.side_effect = exec_side_effect
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
# is_interrupted is checked by base.py's _wait_for_process,
|
||||
# patch where it's actually referenced (base.py's local binding)
|
||||
monkeypatch.setattr(
|
||||
"tools.environments.daytona.is_interrupted", lambda: True
|
||||
"tools.environments.base.is_interrupted", lambda: True
|
||||
)
|
||||
try:
|
||||
result = env.execute("sleep 10")
|
||||
|
|
@ -377,23 +395,24 @@ class TestInterrupt:
|
|||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Retry exhaustion
|
||||
# DaytonaError surfaces directly (no retry)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRetryExhausted:
|
||||
def test_both_attempts_fail(self, make_env, daytona_sdk):
|
||||
"""DaytonaError surfaces directly as rc=1 (retry logic was removed)."""
|
||||
sb = _make_sandbox()
|
||||
sb.state = "started"
|
||||
sb.process.exec.side_effect = [
|
||||
_make_exec_response(result="/root"), # $HOME
|
||||
daytona_sdk.DaytonaError("fail1"), # first attempt
|
||||
daytona_sdk.DaytonaError("fail2"), # retry
|
||||
_make_exec_response(result="", exit_code=0), # init_session
|
||||
daytona_sdk.DaytonaError("fail1"), # actual command fails
|
||||
]
|
||||
env = make_env(sandbox=sb)
|
||||
|
||||
result = env.execute("echo x")
|
||||
# Error surfaces directly through _ThreadedProcessHandle (rc=1)
|
||||
assert result["returncode"] == 1
|
||||
assert "Daytona execution error" in result["output"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -245,43 +245,42 @@ def _make_execute_only_env(forward_env=None):
|
|||
env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124}
|
||||
env._container_id = "test-container"
|
||||
env._docker_exe = "/usr/bin/docker"
|
||||
# Base class attributes needed by unified execute()
|
||||
env._session_id = "test123"
|
||||
env._snapshot_path = "/tmp/hermes-snap-test123.sh"
|
||||
env._cwd_file = "/tmp/hermes-cwd-test123.txt"
|
||||
env._cwd_marker = "__HERMES_CWD_test123__"
|
||||
env._snapshot_ready = True
|
||||
env._last_sync_time = None
|
||||
env._init_env_args = []
|
||||
return env
|
||||
|
||||
|
||||
def test_execute_uses_hermes_dotenv_for_allowlisted_env(monkeypatch):
|
||||
def test_init_env_args_uses_hermes_dotenv_for_allowlisted_env(monkeypatch):
|
||||
"""_build_init_env_args picks up forwarded env vars from .env file at init time."""
|
||||
env = _make_execute_only_env(["GITHUB_TOKEN"])
|
||||
popen_calls = []
|
||||
|
||||
def _fake_popen(cmd, **kwargs):
|
||||
popen_calls.append(cmd)
|
||||
return _FakePopen(cmd, **kwargs)
|
||||
|
||||
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
|
||||
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"GITHUB_TOKEN": "value_from_dotenv"})
|
||||
monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen)
|
||||
|
||||
result = env.execute("echo hi")
|
||||
args = env._build_init_env_args()
|
||||
args_str = " ".join(args)
|
||||
|
||||
assert result["returncode"] == 0
|
||||
assert "GITHUB_TOKEN=value_from_dotenv" in popen_calls[0]
|
||||
assert "GITHUB_TOKEN=value_from_dotenv" in args_str
|
||||
|
||||
|
||||
def test_execute_prefers_shell_env_over_hermes_dotenv(monkeypatch):
|
||||
def test_init_env_args_prefers_shell_env_over_hermes_dotenv(monkeypatch):
|
||||
"""Shell env vars take priority over .env file values in init env args."""
|
||||
env = _make_execute_only_env(["GITHUB_TOKEN"])
|
||||
popen_calls = []
|
||||
|
||||
def _fake_popen(cmd, **kwargs):
|
||||
popen_calls.append(cmd)
|
||||
return _FakePopen(cmd, **kwargs)
|
||||
|
||||
monkeypatch.setenv("GITHUB_TOKEN", "value_from_shell")
|
||||
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"GITHUB_TOKEN": "value_from_dotenv"})
|
||||
monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen)
|
||||
|
||||
env.execute("echo hi")
|
||||
args = env._build_init_env_args()
|
||||
args_str = " ".join(args)
|
||||
|
||||
assert "GITHUB_TOKEN=value_from_shell" in popen_calls[0]
|
||||
assert "GITHUB_TOKEN=value_from_dotenv" not in popen_calls[0]
|
||||
assert "GITHUB_TOKEN=value_from_shell" in args_str
|
||||
assert "value_from_dotenv" not in args_str
|
||||
|
||||
|
||||
# ── docker_env tests ──────────────────────────────────────────────
|
||||
|
|
@ -302,64 +301,46 @@ def test_docker_env_appears_in_run_command(monkeypatch):
|
|||
assert "GNUPGHOME=/root/.gnupg" in run_args_str
|
||||
|
||||
|
||||
def test_docker_env_appears_in_exec_command(monkeypatch):
|
||||
"""Explicit docker_env values should also be passed via -e at docker exec time."""
|
||||
def test_docker_env_appears_in_init_env_args(monkeypatch):
|
||||
"""Explicit docker_env values should appear in _build_init_env_args."""
|
||||
env = _make_execute_only_env()
|
||||
env._env = {"MY_VAR": "my_value"}
|
||||
popen_calls = []
|
||||
|
||||
def _fake_popen(cmd, **kwargs):
|
||||
popen_calls.append(cmd)
|
||||
return _FakePopen(cmd, **kwargs)
|
||||
args = env._build_init_env_args()
|
||||
args_str = " ".join(args)
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen)
|
||||
|
||||
env.execute("echo hi")
|
||||
|
||||
assert popen_calls, "Popen should have been called"
|
||||
assert "MY_VAR=my_value" in popen_calls[0]
|
||||
assert "MY_VAR=my_value" in args_str
|
||||
|
||||
|
||||
def test_forward_env_overrides_docker_env(monkeypatch):
|
||||
def test_forward_env_overrides_docker_env_in_init_args(monkeypatch):
|
||||
"""docker_forward_env should override docker_env for the same key."""
|
||||
env = _make_execute_only_env(forward_env=["MY_KEY"])
|
||||
env._env = {"MY_KEY": "static_value"}
|
||||
popen_calls = []
|
||||
|
||||
def _fake_popen(cmd, **kwargs):
|
||||
popen_calls.append(cmd)
|
||||
return _FakePopen(cmd, **kwargs)
|
||||
|
||||
monkeypatch.setenv("MY_KEY", "dynamic_value")
|
||||
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {})
|
||||
monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen)
|
||||
|
||||
env.execute("echo hi")
|
||||
args = env._build_init_env_args()
|
||||
args_str = " ".join(args)
|
||||
|
||||
cmd_str = " ".join(popen_calls[0])
|
||||
assert "MY_KEY=dynamic_value" in cmd_str
|
||||
assert "MY_KEY=static_value" not in cmd_str
|
||||
assert "MY_KEY=dynamic_value" in args_str
|
||||
assert "MY_KEY=static_value" not in args_str
|
||||
|
||||
|
||||
def test_docker_env_and_forward_env_merge(monkeypatch):
|
||||
def test_docker_env_and_forward_env_merge_in_init_args(monkeypatch):
|
||||
"""docker_env and docker_forward_env with different keys should both appear."""
|
||||
env = _make_execute_only_env(forward_env=["TOKEN"])
|
||||
env._env = {"SSH_AUTH_SOCK": "/run/user/1000/agent.sock"}
|
||||
popen_calls = []
|
||||
|
||||
def _fake_popen(cmd, **kwargs):
|
||||
popen_calls.append(cmd)
|
||||
return _FakePopen(cmd, **kwargs)
|
||||
|
||||
monkeypatch.setenv("TOKEN", "secret123")
|
||||
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {})
|
||||
monkeypatch.setattr(docker_env.subprocess, "Popen", _fake_popen)
|
||||
|
||||
env.execute("echo hi")
|
||||
args = env._build_init_env_args()
|
||||
args_str = " ".join(args)
|
||||
|
||||
assert "SSH_AUTH_SOCK=/run/user/1000/agent.sock" in args_str
|
||||
assert "TOKEN=secret123" in args_str
|
||||
|
||||
cmd_str = " ".join(popen_calls[0])
|
||||
assert "SSH_AUTH_SOCK=/run/user/1000/agent.sock" in cmd_str
|
||||
assert "TOKEN=secret123" in cmd_str
|
||||
|
||||
|
||||
def test_normalize_env_dict_filters_invalid_keys():
|
||||
|
|
|
|||
|
|
@ -231,20 +231,20 @@ class TestEnsurepipFix:
|
|||
"""Verify the pip fix is applied in the ModalEnvironment init."""
|
||||
|
||||
def test_modal_environment_creates_image_with_setup_commands(self):
|
||||
"""ModalEnvironment.__init__ should create a modal.Image with pip fix."""
|
||||
"""_resolve_modal_image should create a modal.Image with pip fix."""
|
||||
try:
|
||||
from tools.environments.modal import ModalEnvironment
|
||||
from tools.environments.modal import _resolve_modal_image
|
||||
except ImportError:
|
||||
pytest.skip("tools.environments.modal not importable")
|
||||
|
||||
import inspect
|
||||
source = inspect.getsource(ModalEnvironment.__init__)
|
||||
source = inspect.getsource(_resolve_modal_image)
|
||||
assert "ensurepip" in source, (
|
||||
"ModalEnvironment should include ensurepip fix "
|
||||
"_resolve_modal_image should include ensurepip fix "
|
||||
"for Modal's legacy image builder"
|
||||
)
|
||||
assert "setup_dockerfile_commands" in source, (
|
||||
"ModalEnvironment should use setup_dockerfile_commands "
|
||||
"_resolve_modal_image should use setup_dockerfile_commands "
|
||||
"to fix pip before Modal's bootstrap"
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -85,11 +85,47 @@ def _install_modal_test_modules(
|
|||
def _prepare_command(self, command: str):
|
||||
return command, None
|
||||
|
||||
sys.modules["tools.environments.base"] = types.SimpleNamespace(BaseEnvironment=_DummyBaseEnvironment)
|
||||
def init_session(self):
|
||||
pass
|
||||
|
||||
# Stub _ThreadedProcessHandle: modal.py imports it but only uses it at
|
||||
# runtime inside _run_bash; the snapshot-isolation tests never call _run_bash,
|
||||
# so a class placeholder is sufficient.
|
||||
class _DummyThreadedProcessHandle:
|
||||
def __init__(self, exec_fn, cancel_fn=None):
|
||||
pass
|
||||
|
||||
def _load_json_store(path):
|
||||
if path.exists():
|
||||
try:
|
||||
return json.loads(path.read_text())
|
||||
except Exception:
|
||||
pass
|
||||
return {}
|
||||
|
||||
def _save_json_store(path, data):
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(data, indent=2))
|
||||
|
||||
def _file_mtime_key(host_path):
|
||||
try:
|
||||
st = Path(host_path).stat()
|
||||
return (st.st_mtime, st.st_size)
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
sys.modules["tools.environments.base"] = types.SimpleNamespace(
|
||||
BaseEnvironment=_DummyBaseEnvironment,
|
||||
_ThreadedProcessHandle=_DummyThreadedProcessHandle,
|
||||
_load_json_store=_load_json_store,
|
||||
_save_json_store=_save_json_store,
|
||||
_file_mtime_key=_file_mtime_key,
|
||||
)
|
||||
sys.modules["tools.interrupt"] = types.SimpleNamespace(is_interrupted=lambda: False)
|
||||
sys.modules["tools.credential_files"] = types.SimpleNamespace(
|
||||
get_credential_file_mounts=lambda: [],
|
||||
iter_skills_files=lambda: [],
|
||||
iter_cache_files=lambda: [],
|
||||
)
|
||||
|
||||
from_id_calls: list[str] = []
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class TestBuildSSHCommand:
|
|||
lambda *a, **k: MagicMock(stdout=iter([]),
|
||||
stderr=iter([]),
|
||||
stdin=MagicMock()))
|
||||
monkeypatch.setattr("tools.environments.ssh.time.sleep", lambda _: None)
|
||||
monkeypatch.setattr("tools.environments.base.time.sleep", lambda _: None)
|
||||
|
||||
def test_base_flags(self):
|
||||
env = SSHEnvironment(host="h", user="u")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue