wip: add persistent shell to ssh and local terminal backends

2026-04-29 01:31:41 +00:00 · 2026-03-13 16:54:11 +05:30 · 2026-03-13 16:54:11 +05:30 · 861202b56c
commit 861202b56c
parent 9d63dcc3f9
6 changed files with 842 additions and 277 deletions
--- a/tests/tools/test_local_persistent.py
+++ b/tests/tools/test_local_persistent.py
@ -0,0 +1,183 @@
+"""Tests for the local persistent shell backend.
+
+Unit tests cover config plumbing (no real shell needed).
+Integration tests run real commands — no external dependencies required.
+
+    pytest tests/tools/test_local_persistent.py -v
+"""
+
+import glob as glob_mod
+
+import pytest
+
+from tools.environments.local import LocalEnvironment
+from tools.environments.persistent_shell import PersistentShellMixin
+
+
+# ---------------------------------------------------------------------------
+# Unit tests — config plumbing
+# ---------------------------------------------------------------------------
+
+class TestLocalConfig:
+    def test_local_persistent_default_false(self, monkeypatch):
+        monkeypatch.delenv("TERMINAL_LOCAL_PERSISTENT", raising=False)
+        from tools.terminal_tool import _get_env_config
+        assert _get_env_config()["local_persistent"] is False
+
+    def test_local_persistent_true(self, monkeypatch):
+        monkeypatch.setenv("TERMINAL_LOCAL_PERSISTENT", "true")
+        from tools.terminal_tool import _get_env_config
+        assert _get_env_config()["local_persistent"] is True
+
+    def test_local_persistent_yes(self, monkeypatch):
+        monkeypatch.setenv("TERMINAL_LOCAL_PERSISTENT", "yes")
+        from tools.terminal_tool import _get_env_config
+        assert _get_env_config()["local_persistent"] is True
+
+
+class TestMergeOutput:
+    """Test the shared _merge_output static method."""
+
+    def test_stdout_only(self):
+        assert PersistentShellMixin._merge_output("out", "") == "out"
+
+    def test_stderr_only(self):
+        assert PersistentShellMixin._merge_output("", "err") == "err"
+
+    def test_both(self):
+        assert PersistentShellMixin._merge_output("out", "err") == "out\nerr"
+
+    def test_empty(self):
+        assert PersistentShellMixin._merge_output("", "") == ""
+
+    def test_strips_trailing_newlines(self):
+        assert PersistentShellMixin._merge_output("out\n\n", "err\n") == "out\nerr"
+
+
+# ---------------------------------------------------------------------------
+# One-shot regression tests — ensure refactor didn't break anything
+# ---------------------------------------------------------------------------
+
+class TestLocalOneShotRegression:
+    """Verify one-shot mode still works after adding the mixin."""
+
+    def test_echo(self):
+        env = LocalEnvironment(persistent=False)
+        r = env.execute("echo hello")
+        assert r["returncode"] == 0
+        assert "hello" in r["output"]
+        env.cleanup()
+
+    def test_exit_code(self):
+        env = LocalEnvironment(persistent=False)
+        r = env.execute("exit 42")
+        assert r["returncode"] == 42
+        env.cleanup()
+
+    def test_state_does_not_persist(self):
+        """Env vars set in one command should NOT survive in one-shot mode."""
+        env = LocalEnvironment(persistent=False)
+        env.execute("export HERMES_ONESHOT_LOCAL=yes")
+        r = env.execute("echo $HERMES_ONESHOT_LOCAL")
+        # In one-shot mode, env var should not persist
+        assert r["output"].strip() == ""
+        env.cleanup()
+
+
+# ---------------------------------------------------------------------------
+# Persistent shell integration tests
+# ---------------------------------------------------------------------------
+
+class TestLocalPersistent:
+    """Persistent mode: state persists across execute() calls."""
+
+    @pytest.fixture
+    def env(self):
+        e = LocalEnvironment(persistent=True)
+        yield e
+        e.cleanup()
+
+    def test_echo(self, env):
+        r = env.execute("echo hello-persistent")
+        assert r["returncode"] == 0
+        assert "hello-persistent" in r["output"]
+
+    def test_env_var_persists(self, env):
+        env.execute("export HERMES_LOCAL_PERSIST_TEST=works")
+        r = env.execute("echo $HERMES_LOCAL_PERSIST_TEST")
+        assert r["output"].strip() == "works"
+
+    def test_cwd_persists(self, env):
+        env.execute("cd /tmp")
+        r = env.execute("pwd")
+        assert r["output"].strip() == "/tmp"
+
+    def test_exit_code(self, env):
+        r = env.execute("(exit 42)")
+        assert r["returncode"] == 42
+
+    def test_stderr(self, env):
+        r = env.execute("echo oops >&2")
+        assert r["returncode"] == 0
+        assert "oops" in r["output"]
+
+    def test_multiline_output(self, env):
+        r = env.execute("echo a; echo b; echo c")
+        lines = r["output"].strip().splitlines()
+        assert lines == ["a", "b", "c"]
+
+    def test_timeout_then_recovery(self, env):
+        r = env.execute("sleep 999", timeout=2)
+        assert r["returncode"] in (124, 130)  # timeout or interrupted
+        # Shell should survive — next command works
+        r = env.execute("echo alive")
+        assert r["returncode"] == 0
+        assert "alive" in r["output"]
+
+    def test_large_output(self, env):
+        r = env.execute("seq 1 1000")
+        assert r["returncode"] == 0
+        lines = r["output"].strip().splitlines()
+        assert len(lines) == 1000
+        assert lines[0] == "1"
+        assert lines[-1] == "1000"
+
+    def test_shell_variable_persists(self, env):
+        """Shell variables (not exported) should also persist."""
+        env.execute("MY_LOCAL_VAR=hello123")
+        r = env.execute("echo $MY_LOCAL_VAR")
+        assert r["output"].strip() == "hello123"
+
+    def test_cleanup_removes_temp_files(self, env):
+        env.execute("echo warmup")
+        prefix = env._temp_prefix
+        # Temp files should exist
+        assert len(glob_mod.glob(f"{prefix}-*")) > 0
+        env.cleanup()
+        remaining = glob_mod.glob(f"{prefix}-*")
+        assert remaining == []
+
+    def test_state_does_not_leak_between_instances(self):
+        """Two separate persistent instances don't share state."""
+        env1 = LocalEnvironment(persistent=True)
+        env2 = LocalEnvironment(persistent=True)
+        try:
+            env1.execute("export LEAK_TEST=from_env1")
+            r = env2.execute("echo $LEAK_TEST")
+            assert r["output"].strip() == ""
+        finally:
+            env1.cleanup()
+            env2.cleanup()
+
+    def test_special_characters_in_command(self, env):
+        """Commands with quotes and special chars should work."""
+        r = env.execute("echo 'hello world'")
+        assert r["output"].strip() == "hello world"
+
+    def test_pipe_command(self, env):
+        r = env.execute("echo hello | tr 'h' 'H'")
+        assert r["output"].strip() == "Hello"
+
+    def test_multiple_commands_semicolon(self, env):
+        r = env.execute("X=42; echo $X")
+        assert r["output"].strip() == "42"
--- a/tests/tools/test_ssh_environment.py
+++ b/tests/tools/test_ssh_environment.py
@ -0,0 +1,198 @@
+"""Tests for the SSH remote execution environment backend.
+
+Unit tests (no SSH required) cover pure logic: command building, output merging,
+config plumbing.
+
+Integration tests require a real SSH target. Set TERMINAL_SSH_HOST and
+TERMINAL_SSH_USER to enable them. In CI, start an sshd container or enable
+the localhost SSH service.
+
+    TERMINAL_SSH_HOST=localhost TERMINAL_SSH_USER=$(whoami) \
+        pytest tests/tools/test_ssh_environment.py -v
+"""
+
+import json
+import os
+import subprocess
+from unittest.mock import MagicMock
+
+import pytest
+
+from tools.environments.ssh import SSHEnvironment
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+_SSH_HOST = os.getenv("TERMINAL_SSH_HOST", "")
+_SSH_USER = os.getenv("TERMINAL_SSH_USER", "")
+_SSH_PORT = int(os.getenv("TERMINAL_SSH_PORT", "22"))
+_SSH_KEY = os.getenv("TERMINAL_SSH_KEY", "")
+
+_has_ssh = bool(_SSH_HOST and _SSH_USER)
+
+requires_ssh = pytest.mark.skipif(
+    not _has_ssh,
+    reason="TERMINAL_SSH_HOST / TERMINAL_SSH_USER not set",
+)
+
+
+def _run(command, task_id="ssh_test", **kwargs):
+    """Call terminal_tool like an LLM would, return parsed JSON."""
+    from tools.terminal_tool import terminal_tool
+    return json.loads(terminal_tool(command, task_id=task_id, **kwargs))
+
+
+def _cleanup(task_id="ssh_test"):
+    from tools.terminal_tool import cleanup_vm
+    cleanup_vm(task_id)
+
+
+# ---------------------------------------------------------------------------
+# Unit tests — no SSH connection needed
+# ---------------------------------------------------------------------------
+
+class TestBuildSSHCommand:
+    """Pure logic: verify the ssh command list is assembled correctly."""
+
+    @pytest.fixture(autouse=True)
+    def _mock_connection(self, monkeypatch):
+        monkeypatch.setattr("tools.environments.ssh.subprocess.run",
+                            lambda *a, **k: subprocess.CompletedProcess([], 0))
+        monkeypatch.setattr("tools.environments.ssh.subprocess.Popen",
+                            lambda *a, **k: MagicMock(stdout=iter([]),
+                                                      stderr=iter([]),
+                                                      stdin=MagicMock()))
+        monkeypatch.setattr("tools.environments.ssh.time.sleep", lambda _: None)
+
+    def test_base_flags(self):
+        env = SSHEnvironment(host="h", user="u")
+        cmd = " ".join(env._build_ssh_command())
+        for flag in ("ControlMaster=auto", "ControlPersist=300",
+                      "BatchMode=yes", "StrictHostKeyChecking=accept-new"):
+            assert flag in cmd
+
+    def test_custom_port(self):
+        env = SSHEnvironment(host="h", user="u", port=2222)
+        cmd = env._build_ssh_command()
+        assert "-p" in cmd and "2222" in cmd
+
+    def test_key_path(self):
+        env = SSHEnvironment(host="h", user="u", key_path="/k")
+        cmd = env._build_ssh_command()
+        assert "-i" in cmd and "/k" in cmd
+
+    def test_user_host_suffix(self):
+        env = SSHEnvironment(host="h", user="u")
+        assert env._build_ssh_command()[-1] == "u@h"
+
+
+class TestTerminalToolConfig:
+    def test_ssh_persistent_default_false(self, monkeypatch):
+        monkeypatch.delenv("TERMINAL_SSH_PERSISTENT", raising=False)
+        from tools.terminal_tool import _get_env_config
+        assert _get_env_config()["ssh_persistent"] is False
+
+    def test_ssh_persistent_true(self, monkeypatch):
+        monkeypatch.setenv("TERMINAL_SSH_PERSISTENT", "true")
+        from tools.terminal_tool import _get_env_config
+        assert _get_env_config()["ssh_persistent"] is True
+
+
+# ---------------------------------------------------------------------------
+# Integration tests — real SSH, through terminal_tool() interface
+# ---------------------------------------------------------------------------
+
+def _setup_ssh_env(monkeypatch, persistent: bool):
+    """Configure env vars for SSH integration tests."""
+    monkeypatch.setenv("TERMINAL_ENV", "ssh")
+    monkeypatch.setenv("TERMINAL_SSH_HOST", _SSH_HOST)
+    monkeypatch.setenv("TERMINAL_SSH_USER", _SSH_USER)
+    monkeypatch.setenv("TERMINAL_SSH_PERSISTENT", "true" if persistent else "false")
+    if _SSH_PORT != 22:
+        monkeypatch.setenv("TERMINAL_SSH_PORT", str(_SSH_PORT))
+    if _SSH_KEY:
+        monkeypatch.setenv("TERMINAL_SSH_KEY", _SSH_KEY)
+
+
+@requires_ssh
+class TestOneShotSSH:
+    """One-shot mode: each command is a fresh ssh invocation."""
+
+    @pytest.fixture(autouse=True)
+    def _setup(self, monkeypatch):
+        _setup_ssh_env(monkeypatch, persistent=False)
+        yield
+        _cleanup()
+
+    def test_echo(self):
+        r = _run("echo hello")
+        assert r["exit_code"] == 0
+        assert "hello" in r["output"]
+
+    def test_exit_code(self):
+        r = _run("exit 42")
+        assert r["exit_code"] == 42
+
+    def test_state_does_not_persist(self):
+        """Env vars set in one command should NOT survive to the next."""
+        _run("export HERMES_ONESHOT_TEST=yes")
+        r = _run("echo $HERMES_ONESHOT_TEST")
+        assert r["output"].strip() == ""
+
+
+@requires_ssh
+class TestPersistentSSH:
+    """Persistent mode: single long-lived shell, state persists."""
+
+    @pytest.fixture(autouse=True)
+    def _setup(self, monkeypatch):
+        _setup_ssh_env(monkeypatch, persistent=True)
+        yield
+        _cleanup()
+
+    def test_echo(self):
+        r = _run("echo hello-persistent")
+        assert r["exit_code"] == 0
+        assert "hello-persistent" in r["output"]
+
+    def test_env_var_persists(self):
+        _run("export HERMES_PERSIST_TEST=works")
+        r = _run("echo $HERMES_PERSIST_TEST")
+        assert r["output"].strip() == "works"
+
+    def test_cwd_persists(self):
+        _run("cd /tmp")
+        r = _run("pwd")
+        assert r["output"].strip() == "/tmp"
+
+    def test_exit_code(self):
+        r = _run("(exit 42)")
+        assert r["exit_code"] == 42
+
+    def test_stderr(self):
+        r = _run("echo oops >&2")
+        assert r["exit_code"] == 0
+        assert "oops" in r["output"]
+
+    def test_multiline_output(self):
+        r = _run("echo a; echo b; echo c")
+        lines = r["output"].strip().splitlines()
+        assert lines == ["a", "b", "c"]
+
+    def test_timeout_then_recovery(self):
+        r = _run("sleep 999", timeout=2)
+        assert r["exit_code"] == 124
+        # Shell should survive — next command works
+        r = _run("echo alive")
+        assert r["exit_code"] == 0
+        assert "alive" in r["output"]
+
+    def test_large_output(self):
+        r = _run("seq 1 1000")
+        assert r["exit_code"] == 0
+        lines = r["output"].strip().splitlines()
+        assert len(lines) == 1000
+        assert lines[0] == "1"
+        assert lines[-1] == "1000"