Merge 5727b3429f into 05d8f11085

2026-05-08 03:01:47 +00:00 · 2026-04-24 19:24:50 -05:00 · 2026-04-24 19:24:50 -05:00 · b4f338ccf4
commit b4f338ccf4
parent 05d8f11085 5727b3429f
19 changed files with 1022 additions and 29 deletions
--- a/tests/integration/test_koyeb_terminal.py
+++ b/tests/integration/test_koyeb_terminal.py
@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""
+Test Koyeb Terminal Tool
+
+This script tests that the Koyeb terminal backend is correctly configured
+and can execute commands in Koyeb sandboxes.
+
+Usage:
+    # Run with Koyeb backend
+    TERMINAL_ENV=koyeb python tests/test_koyeb_terminal.py
+
+    # Or run directly (will use whatever TERMINAL_ENV is set in .env)
+    python tests/test_koyeb_terminal.py
+"""
+
+import pytest
+pytestmark = pytest.mark.integration
+
+import os
+import sys
+import json
+from pathlib import Path
+
+# Try to load .env file if python-dotenv is available
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    # Manually load .env if dotenv not available
+    env_file = Path(__file__).parent.parent.parent / ".env"
+    if env_file.exists():
+        with open(env_file) as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith('#') and '=' in line:
+                    key, value = line.split('=', 1)
+                    # Remove quotes if present
+                    value = value.strip().strip('"').strip("'")
+                    os.environ.setdefault(key.strip(), value)
+
+# Add project root to path for imports
+parent_dir = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(parent_dir))
+
+# Import terminal_tool module directly using importlib to avoid tools/__init__.py
+import importlib.util
+terminal_tool_path = parent_dir / "tools" / "terminal_tool.py"
+spec = importlib.util.spec_from_file_location("terminal_tool", terminal_tool_path)
+terminal_module = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(terminal_module)
+
+terminal_tool = terminal_module.terminal_tool
+check_terminal_requirements = terminal_module.check_terminal_requirements
+_get_env_config = terminal_module._get_env_config
+cleanup_vm = terminal_module.cleanup_vm
+
+
+def test_koyeb_requirements():
+    """Test that Koyeb requirements are met."""
+    print("\n" + "=" * 60)
+    print("TEST 1: Koyeb Requirements Check")
+    print("=" * 60)
+    
+    config = _get_env_config()
+    print(f"Current TERMINAL_ENV: {config['env_type']}")
+    print(f"Koyeb image: {config['koyeb_image']}")
+    
+    # Check for Koyeb authentication
+    koyeb_token = os.getenv("KOYEB_API_TOKEN")
+    
+    print(f"\nKoyeb authentication:")
+    print(f"  KOYEB_API_TOKEN env var: {'✅ Set' if koyeb_token else '❌ Not set'}")
+    
+    if config['env_type'] != 'koyeb':
+        print(f"\n⚠️  TERMINAL_ENV is '{config['env_type']}', not 'koyeb'")
+        print("   Set TERMINAL_ENV=koyeb in .env or export it to test Koyeb backend")
+        return False
+    
+    requirements_met = check_terminal_requirements()
+    print(f"\nRequirements check: {'✅ Passed' if requirements_met else '❌ Failed'}")
+    
+    return requirements_met
+
+
+def test_simple_command():
+    """Test executing a simple command."""
+    print("\n" + "=" * 60)
+    print("TEST 2: Simple Command Execution")
+    print("=" * 60)
+    
+    test_task_id = "koyeb_test_simple"
+    
+    print("Executing: echo 'Hello from Koyeb!'")
+    result = terminal_tool("echo 'Hello from Koyeb!'", task_id=test_task_id)
+    result_json = json.loads(result)
+    
+    print(f"\nResult:")
+    print(f"  Output: {result_json.get('output', '')[:200]}")
+    print(f"  Exit code: {result_json.get('exit_code')}")
+    print(f"  Error: {result_json.get('error')}")
+    
+    success = result_json.get('exit_code') == 0 and 'Hello from Koyeb!' in result_json.get('output', '')
+    print(f"\nTest: {'✅ Passed' if success else '❌ Failed'}")
+    
+    # Cleanup
+    cleanup_vm(test_task_id)
+    
+    return success
+
+
+def test_python_execution():
+    """Test executing Python code in Koyeb."""
+    print("\n" + "=" * 60)
+    print("TEST 3: Python Execution")
+    print("=" * 60)
+    
+    test_task_id = "koyeb_test_python"
+    
+    python_cmd = 'python3 -c "import sys; print(f\'Python {sys.version}\')"'
+    print(f"Executing: {python_cmd}")
+    
+    result = terminal_tool(python_cmd, task_id=test_task_id)
+    result_json = json.loads(result)
+    
+    print(f"\nResult:")
+    print(f"  Output: {result_json.get('output', '')[:200]}")
+    print(f"  Exit code: {result_json.get('exit_code')}")
+    print(f"  Error: {result_json.get('error')}")
+    
+    success = result_json.get('exit_code') == 0 and 'Python' in result_json.get('output', '')
+    print(f"\nTest: {'✅ Passed' if success else '❌ Failed'}")
+    
+    # Cleanup
+    cleanup_vm(test_task_id)
+    
+    return success
+
+
+def test_filesystem_operations():
+    """Test filesystem operations in Koyeb."""
+    print("\n" + "=" * 60)
+    print("TEST 4: Filesystem Operations")
+    print("=" * 60)
+    
+    test_task_id = "koyeb_test_fs"
+    
+    # Create a file
+    print("Step 1: Creating test file...")
+    result1 = terminal_tool("echo 'koyeb filesystem test' > /tmp/koyeb_test.txt", task_id=test_task_id)
+    result1_json = json.loads(result1)
+    print(f"  Exit code: {result1_json.get('exit_code')}")
+    
+    # Read the file back
+    print("Step 2: Reading test file...")
+    result2 = terminal_tool("cat /tmp/koyeb_test.txt", task_id=test_task_id)
+    result2_json = json.loads(result2)
+    print(f"  Output: {result2_json.get('output', '')}")
+    print(f"  Exit code: {result2_json.get('exit_code')}")
+    
+    success = (
+        result1_json.get('exit_code') == 0 and
+        result2_json.get('exit_code') == 0 and
+        'koyeb filesystem test' in result2_json.get('output', '')
+    )
+    print(f"\nTest: {'✅ Passed' if success else '❌ Failed'}")
+    
+    # Cleanup
+    cleanup_vm(test_task_id)
+    
+    return success
+
+
+def test_environment_isolation():
+    """Test that different task_ids get isolated environments."""
+    print("\n" + "=" * 60)
+    print("TEST 5: Environment Isolation")
+    print("=" * 60)
+    
+    task1 = "koyeb_test_iso_1"
+    task2 = "koyeb_test_iso_2"
+    
+    # Create file in task1
+    print("Step 1: Creating file in task1...")
+    result1 = terminal_tool("echo 'task1 data' > /tmp/isolated.txt", task_id=task1)
+    
+    # Try to read from task2 (should not exist)
+    print("Step 2: Trying to read file from task2 (should not exist)...")
+    result2 = terminal_tool("cat /tmp/isolated.txt 2>&1 || echo 'FILE_NOT_FOUND'", task_id=task2)
+    result2_json = json.loads(result2)
+    
+    # The file should either not exist or be empty in task2
+    output = result2_json.get('output', '')
+    isolated = 'task1 data' not in output or 'FILE_NOT_FOUND' in output or 'No such file' in output
+    
+    print(f"  Task2 output: {output[:200]}")
+    print(f"\nTest: {'✅ Passed (environments isolated)' if isolated else '❌ Failed (environments NOT isolated)'}")
+    
+    # Cleanup
+    cleanup_vm(task1)
+    cleanup_vm(task2)
+    
+    return isolated
+
+
+def main():
+    """Run all Koyeb terminal tests."""
+    print("🧪 Koyeb Terminal Tool Test Suite")
+    print("=" * 60)
+    
+    # Check current config
+    config = _get_env_config()
+    print(f"\nCurrent configuration:")
+    print(f"  TERMINAL_ENV: {config['env_type']}")
+    print(f"  TERMINAL_KOYEB_IMAGE: {config['koyeb_image']}")
+    print(f"  TERMINAL_TIMEOUT: {config['timeout']}s")
+    
+    if config['env_type'] != 'koyeb':
+        print(f"\n⚠️  WARNING: TERMINAL_ENV is set to '{config['env_type']}', not 'koyeb'")
+        print("   To test Koyeb specifically, set TERMINAL_ENV=koyeb")
+        response = input("\n   Continue testing with current backend? (y/n): ")
+        if response.lower() != 'y':
+            print("Aborting.")
+            return
+    
+    results = {}
+    
+    # Run tests
+    results['requirements'] = test_koyeb_requirements()
+    
+    if not results['requirements']:
+        print("\n❌ Requirements not met. Cannot continue with other tests.")
+        return
+    
+    results['simple_command'] = test_simple_command()
+    results['python_execution'] = test_python_execution()
+    results['filesystem_operations'] = test_filesystem_operations()
+    results['environment_isolation'] = test_environment_isolation()
+    
+    # Summary
+    print("\n" + "=" * 60)
+    print("TEST SUMMARY")
+    print("=" * 60)
+    
+    passed = sum(1 for v in results.values() if v)
+    total = len(results)
+    
+    for test_name, passed_test in results.items():
+        status = "✅ PASSED" if passed_test else "❌ FAILED"
+        print(f"  {test_name}: {status}")
+    
+    print(f"\nTotal: {passed}/{total} tests passed")
+    
+    return passed == total
+
+
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)
--- a/tests/tools/test_koyeb_environment.py
+++ b/tests/tools/test_koyeb_environment.py
@ -0,0 +1,266 @@
+"""Unit tests for the Koyeb cloud sandbox environment backend."""
+
+import threading
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Helpers to build mock Koyeb SDK objects
+# ---------------------------------------------------------------------------
+
+def _make_exec_response(stdout="", stderr="", exit_code=0):
+    return SimpleNamespace(stdout=stdout, stderr=stderr, exit_code=exit_code)
+
+
+def _make_sandbox(sandbox_id="sb-koyeb-123"):
+    sb = MagicMock()
+    sb.id = sandbox_id
+    sb.exec.return_value = _make_exec_response()
+    sb.filesystem = MagicMock()
+    return sb
+
+
+def _patch_koyeb_imports(monkeypatch):
+    """Patch the koyeb SDK so KoyebEnvironment can be imported without it."""
+    import types as _types
+
+    koyeb_mod = _types.ModuleType("koyeb")
+    koyeb_mod.Sandbox = MagicMock()
+
+    monkeypatch.setitem(__import__("sys").modules, "koyeb", koyeb_mod)
+    return koyeb_mod
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture()
+def koyeb_sdk(monkeypatch):
+    """Provide a mock koyeb SDK module and return it for assertions."""
+    return _patch_koyeb_imports(monkeypatch)
+
+
+@pytest.fixture()
+def make_env(koyeb_sdk, monkeypatch):
+    """Factory that creates a KoyebEnvironment with a mocked SDK."""
+    monkeypatch.setattr("tools.environments.base.is_interrupted", lambda: False)
+    monkeypatch.setattr("tools.credential_files.get_credential_file_mounts", lambda: [])
+    monkeypatch.setattr("tools.credential_files.get_skills_directory_mount", lambda **kw: None)
+    monkeypatch.setattr("tools.credential_files.iter_skills_files", lambda **kw: [])
+
+    def _factory(
+        sandbox=None,
+        home_dir="/root",
+        **kwargs,
+    ):
+        sandbox = sandbox or _make_sandbox()
+        # Mock the $HOME detection
+        sandbox.exec.return_value = _make_exec_response(stdout=home_dir)
+
+        koyeb_sdk.Sandbox.create.return_value = sandbox
+
+        from tools.environments.koyeb import KoyebEnvironment
+
+        kwargs.setdefault("task_id", "test-task")
+        env = KoyebEnvironment(
+            image="koyeb/sandbox:latest",
+            **kwargs,
+        )
+        return env
+
+    return _factory
+
+
+# ---------------------------------------------------------------------------
+# Constructor / cwd resolution
+# ---------------------------------------------------------------------------
+
+class TestCwdResolution:
+    def test_default_cwd_resolves_home(self, make_env):
+        env = make_env(home_dir="/home/testuser")
+        assert env.cwd == "/home/testuser"
+
+    def test_tilde_cwd_resolves_home(self, make_env):
+        env = make_env(cwd="~", home_dir="/home/testuser")
+        assert env.cwd == "/home/testuser"
+
+    def test_explicit_cwd_not_overridden(self, make_env):
+        env = make_env(cwd="/workspace", home_dir="/root")
+        assert env.cwd == "/workspace"
+
+    def test_home_detection_failure_keeps_default_cwd(self, make_env):
+        sb = _make_sandbox()
+        sb.exec.side_effect = RuntimeError("exec failed")
+        env = make_env(sandbox=sb)
+        assert env.cwd == "/root"  # keeps constructor default
+
+    def test_empty_home_keeps_default_cwd(self, make_env):
+        env = make_env(home_dir="")
+        assert env.cwd == "/root"
+
+
+# ---------------------------------------------------------------------------
+# Sandbox name sanitization
+# ---------------------------------------------------------------------------
+
+class TestSandboxNameSanitization:
+    def test_underscores_replaced_with_hyphens(self, make_env, koyeb_sdk):
+        make_env(task_id="my_test_task")
+        name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
+        assert "_" not in name_arg
+        assert name_arg == "hermes-my-test-task"
+
+    def test_uppercase_lowered(self, make_env, koyeb_sdk):
+        make_env(task_id="MyTask")
+        name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
+        assert name_arg == "hermes-mytask"
+
+    def test_special_chars_removed(self, make_env, koyeb_sdk):
+        make_env(task_id="task@#$123")
+        name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
+        assert name_arg == "hermes-task-123"
+
+    def test_name_truncated_to_63_chars(self, make_env, koyeb_sdk):
+        make_env(task_id="a" * 100)
+        name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
+        assert len(name_arg) <= 63
+
+    def test_consecutive_hyphens_collapsed(self, make_env, koyeb_sdk):
+        make_env(task_id="a__b---c")
+        name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
+        assert "--" not in name_arg
+
+
+# ---------------------------------------------------------------------------
+# Cleanup
+# ---------------------------------------------------------------------------
+
+class TestCleanup:
+    def test_cleanup_deletes_sandbox(self, make_env):
+        env = make_env()
+        sb = env._sandbox
+        env.cleanup()
+        sb.delete.assert_called_once()
+
+    def test_cleanup_idempotent(self, make_env):
+        env = make_env()
+        env.cleanup()
+        env.cleanup()  # should not raise
+
+    def test_cleanup_swallows_errors(self, make_env):
+        env = make_env()
+        env._sandbox.delete.side_effect = RuntimeError("delete failed")
+        env.cleanup()  # should not raise
+        assert env._sandbox is None
+
+    def test_cleanup_calls_sync_back_before_delete(self, make_env):
+        env = make_env()
+        call_order = []
+        sync_mgr = MagicMock()
+        sync_mgr.sync_back = lambda: call_order.append("sync_back")
+        env._sync_manager = sync_mgr
+        original_delete = env._sandbox.delete
+        env._sandbox.delete = lambda: (call_order.append("delete"), original_delete())
+
+        env.cleanup()
+
+        assert "sync_back" in call_order
+        assert "delete" in call_order
+        assert call_order.index("sync_back") < call_order.index("delete")
+
+
+# ---------------------------------------------------------------------------
+# Execute
+# ---------------------------------------------------------------------------
+
+class TestExecute:
+    def test_basic_command(self, make_env):
+        sb = _make_sandbox()
+        # Calls: (1) $HOME detection, (2) init_session bootstrap, (3) actual command
+        sb.exec.side_effect = [
+            _make_exec_response(stdout="/root"),           # $HOME
+            _make_exec_response(stdout="", exit_code=0),   # init_session
+            _make_exec_response(stdout="hello", exit_code=0),  # actual cmd
+        ]
+        env = make_env(sandbox=sb)
+
+        result = env.execute("echo hello")
+        assert "hello" in result["output"]
+        assert result["returncode"] == 0
+
+    def test_nonzero_exit_code(self, make_env):
+        sb = _make_sandbox()
+        sb.exec.side_effect = [
+            _make_exec_response(stdout="/root"),
+            _make_exec_response(stdout="", exit_code=0),   # init_session
+            _make_exec_response(stdout="not found", exit_code=127),
+        ]
+        env = make_env(sandbox=sb)
+
+        result = env.execute("bad_cmd")
+        assert result["returncode"] == 127
+
+    def test_stderr_included_in_output(self, make_env):
+        sb = _make_sandbox()
+        sb.exec.side_effect = [
+            _make_exec_response(stdout="/root"),
+            _make_exec_response(stdout="", exit_code=0),   # init_session
+            _make_exec_response(stdout="out", stderr="err", exit_code=0),
+        ]
+        env = make_env(sandbox=sb)
+
+        result = env.execute("cmd")
+        assert "out" in result["output"]
+        assert "err" in result["output"]
+
+    def test_stdin_data_wraps_heredoc(self, make_env):
+        sb = _make_sandbox()
+        sb.exec.side_effect = [
+            _make_exec_response(stdout="/root"),
+            _make_exec_response(stdout="", exit_code=0),   # init_session
+            _make_exec_response(stdout="ok", exit_code=0),
+        ]
+        env = make_env(sandbox=sb)
+
+        env.execute("python3", stdin_data="print('hi')")
+        call_args = sb.exec.call_args_list[-1]
+        cmd = call_args[0][0]
+        assert "HERMES_STDIN_" in cmd
+        assert "print" in cmd
+
+
+# ---------------------------------------------------------------------------
+# Interrupt
+# ---------------------------------------------------------------------------
+
+class TestInterrupt:
+    def test_interrupt_kills_and_returns_130(self, make_env, monkeypatch):
+        sb = _make_sandbox()
+        event = threading.Event()
+        calls = {"n": 0}
+
+        def exec_side_effect(*args, **kwargs):
+            calls["n"] += 1
+            if calls["n"] == 1:
+                return _make_exec_response(stdout="/root")  # $HOME
+            if calls["n"] == 2:
+                return _make_exec_response(stdout="", exit_code=0)  # init_session
+            event.wait(timeout=5)  # simulate long-running command
+            return _make_exec_response(stdout="done", exit_code=0)
+
+        sb.exec.side_effect = exec_side_effect
+        env = make_env(sandbox=sb)
+
+        monkeypatch.setattr(
+            "tools.environments.base.is_interrupted", lambda: True
+        )
+        try:
+            result = env.execute("sleep 10")
+            assert result["returncode"] == 130
+            sb.delete.assert_called()  # cancel_fn calls sandbox.delete()
+        finally:
+            event.set()
--- a/tests/tools/test_sync_back_backends.py
+++ b/tests/tools/test_sync_back_backends.py
@ -10,6 +10,7 @@ import pytest
 from tools.environments import ssh as ssh_env
 from tools.environments import modal as modal_env
 from tools.environments import daytona as daytona_env
+from tools.environments import koyeb as koyeb_env
 from tools.environments.ssh import SSHEnvironment


@ -95,6 +96,20 @@ def _make_mock_daytona_env():
    return env


+# ── Koyeb helpers ────────────────────────────────────────────────────
+
+
+def _make_mock_koyeb_env():
+    """Create a minimal KoyebEnvironment without calling __init__."""
+    env = object.__new__(koyeb_env.KoyebEnvironment)
+    env._sandbox = MagicMock()
+    env._remote_home = "/root"
+    env._sync_manager = None
+    env._lock = __import__("threading").Lock()
+    env._task_id = "test"
+    return env
+
+
 # =====================================================================
 # SSH bulk download
 # =====================================================================
@ -402,6 +417,69 @@ class TestDaytonaCleanup:
        assert call_order.index("sync_back") < call_order.index("stop")


+# =====================================================================
+# Koyeb bulk download + cleanup
+# =====================================================================
+
+
+class TestKoyebBulkDownload:
+    """Unit tests for _koyeb_bulk_download."""
+
+    def test_koyeb_bulk_download_creates_tar_and_downloads(self, tmp_path):
+        """exec and download_file should both be called."""
+        env = _make_mock_koyeb_env()
+        dest = tmp_path / "backup.tar"
+
+        env._koyeb_bulk_download(dest)
+
+        # exec called twice: tar creation + rm cleanup
+        assert env._sandbox.exec.call_count == 2
+        tar_cmd = env._sandbox.exec.call_args_list[0][0][0]
+        assert "tar cf" in tar_cmd
+        assert "/tmp/.hermes_sync." in tar_cmd
+        assert ".tar" in tar_cmd
+        assert ".hermes" in tar_cmd
+
+        cleanup_cmd = env._sandbox.exec.call_args_list[1][0][0]
+        assert "rm -f" in cleanup_cmd
+
+        env._sandbox.filesystem.download_file.assert_called_once()
+        download_args = env._sandbox.filesystem.download_file.call_args[0]
+        assert download_args[0].startswith("/tmp/.hermes_sync.")
+        assert download_args[1] == str(dest)
+
+    def test_koyeb_bulk_download_uses_remote_home(self, tmp_path):
+        """The tar command should use the env's _remote_home."""
+        env = _make_mock_koyeb_env()
+        env._remote_home = "/home/koyeb"
+        dest = tmp_path / "backup.tar"
+
+        env._koyeb_bulk_download(dest)
+
+        tar_cmd = env._sandbox.exec.call_args_list[0][0][0]
+        assert "home/koyeb/.hermes" in tar_cmd
+
+
+class TestKoyebCleanup:
+    """Verify Koyeb cleanup() calls sync_back() before delete."""
+
+    def test_koyeb_cleanup_calls_sync_back(self):
+        """cleanup() should call sync_back() before sandbox.delete()."""
+        env = _make_mock_koyeb_env()
+
+        call_order = []
+        sync_mgr = MagicMock()
+        sync_mgr.sync_back = lambda: call_order.append("sync_back")
+        env._sync_manager = sync_mgr
+        env._sandbox.delete = lambda: call_order.append("delete")
+
+        env.cleanup()
+
+        assert "sync_back" in call_order
+        assert "delete" in call_order
+        assert call_order.index("sync_back") < call_order.index("delete")
+
+
 # =====================================================================
 # FileSyncManager wiring: bulk_download_fn passed by each backend
 # =====================================================================