This commit is contained in:
Fabzer 2026-04-24 19:24:50 -05:00 committed by GitHub
commit b4f338ccf4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 1022 additions and 29 deletions

View file

@ -0,0 +1,258 @@
#!/usr/bin/env python3
"""
Test Koyeb Terminal Tool
This script tests that the Koyeb terminal backend is correctly configured
and can execute commands in Koyeb sandboxes.
Usage:
# Run with Koyeb backend
TERMINAL_ENV=koyeb python tests/test_koyeb_terminal.py
# Or run directly (will use whatever TERMINAL_ENV is set in .env)
python tests/test_koyeb_terminal.py
"""
import pytest
pytestmark = pytest.mark.integration
import os
import sys
import json
from pathlib import Path
# Try to load .env file if python-dotenv is available
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
# Manually load .env if dotenv not available
env_file = Path(__file__).parent.parent.parent / ".env"
if env_file.exists():
with open(env_file) as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, value = line.split('=', 1)
# Remove quotes if present
value = value.strip().strip('"').strip("'")
os.environ.setdefault(key.strip(), value)
# Add project root to path for imports
parent_dir = Path(__file__).parent.parent.parent
sys.path.insert(0, str(parent_dir))
# Import terminal_tool module directly using importlib to avoid tools/__init__.py
import importlib.util
terminal_tool_path = parent_dir / "tools" / "terminal_tool.py"
spec = importlib.util.spec_from_file_location("terminal_tool", terminal_tool_path)
terminal_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(terminal_module)
terminal_tool = terminal_module.terminal_tool
check_terminal_requirements = terminal_module.check_terminal_requirements
_get_env_config = terminal_module._get_env_config
cleanup_vm = terminal_module.cleanup_vm
def test_koyeb_requirements():
"""Test that Koyeb requirements are met."""
print("\n" + "=" * 60)
print("TEST 1: Koyeb Requirements Check")
print("=" * 60)
config = _get_env_config()
print(f"Current TERMINAL_ENV: {config['env_type']}")
print(f"Koyeb image: {config['koyeb_image']}")
# Check for Koyeb authentication
koyeb_token = os.getenv("KOYEB_API_TOKEN")
print(f"\nKoyeb authentication:")
print(f" KOYEB_API_TOKEN env var: {'✅ Set' if koyeb_token else '❌ Not set'}")
if config['env_type'] != 'koyeb':
print(f"\n⚠️ TERMINAL_ENV is '{config['env_type']}', not 'koyeb'")
print(" Set TERMINAL_ENV=koyeb in .env or export it to test Koyeb backend")
return False
requirements_met = check_terminal_requirements()
print(f"\nRequirements check: {'✅ Passed' if requirements_met else '❌ Failed'}")
return requirements_met
def test_simple_command():
"""Test executing a simple command."""
print("\n" + "=" * 60)
print("TEST 2: Simple Command Execution")
print("=" * 60)
test_task_id = "koyeb_test_simple"
print("Executing: echo 'Hello from Koyeb!'")
result = terminal_tool("echo 'Hello from Koyeb!'", task_id=test_task_id)
result_json = json.loads(result)
print(f"\nResult:")
print(f" Output: {result_json.get('output', '')[:200]}")
print(f" Exit code: {result_json.get('exit_code')}")
print(f" Error: {result_json.get('error')}")
success = result_json.get('exit_code') == 0 and 'Hello from Koyeb!' in result_json.get('output', '')
print(f"\nTest: {'✅ Passed' if success else '❌ Failed'}")
# Cleanup
cleanup_vm(test_task_id)
return success
def test_python_execution():
"""Test executing Python code in Koyeb."""
print("\n" + "=" * 60)
print("TEST 3: Python Execution")
print("=" * 60)
test_task_id = "koyeb_test_python"
python_cmd = 'python3 -c "import sys; print(f\'Python {sys.version}\')"'
print(f"Executing: {python_cmd}")
result = terminal_tool(python_cmd, task_id=test_task_id)
result_json = json.loads(result)
print(f"\nResult:")
print(f" Output: {result_json.get('output', '')[:200]}")
print(f" Exit code: {result_json.get('exit_code')}")
print(f" Error: {result_json.get('error')}")
success = result_json.get('exit_code') == 0 and 'Python' in result_json.get('output', '')
print(f"\nTest: {'✅ Passed' if success else '❌ Failed'}")
# Cleanup
cleanup_vm(test_task_id)
return success
def test_filesystem_operations():
"""Test filesystem operations in Koyeb."""
print("\n" + "=" * 60)
print("TEST 4: Filesystem Operations")
print("=" * 60)
test_task_id = "koyeb_test_fs"
# Create a file
print("Step 1: Creating test file...")
result1 = terminal_tool("echo 'koyeb filesystem test' > /tmp/koyeb_test.txt", task_id=test_task_id)
result1_json = json.loads(result1)
print(f" Exit code: {result1_json.get('exit_code')}")
# Read the file back
print("Step 2: Reading test file...")
result2 = terminal_tool("cat /tmp/koyeb_test.txt", task_id=test_task_id)
result2_json = json.loads(result2)
print(f" Output: {result2_json.get('output', '')}")
print(f" Exit code: {result2_json.get('exit_code')}")
success = (
result1_json.get('exit_code') == 0 and
result2_json.get('exit_code') == 0 and
'koyeb filesystem test' in result2_json.get('output', '')
)
print(f"\nTest: {'✅ Passed' if success else '❌ Failed'}")
# Cleanup
cleanup_vm(test_task_id)
return success
def test_environment_isolation():
"""Test that different task_ids get isolated environments."""
print("\n" + "=" * 60)
print("TEST 5: Environment Isolation")
print("=" * 60)
task1 = "koyeb_test_iso_1"
task2 = "koyeb_test_iso_2"
# Create file in task1
print("Step 1: Creating file in task1...")
result1 = terminal_tool("echo 'task1 data' > /tmp/isolated.txt", task_id=task1)
# Try to read from task2 (should not exist)
print("Step 2: Trying to read file from task2 (should not exist)...")
result2 = terminal_tool("cat /tmp/isolated.txt 2>&1 || echo 'FILE_NOT_FOUND'", task_id=task2)
result2_json = json.loads(result2)
# The file should either not exist or be empty in task2
output = result2_json.get('output', '')
isolated = 'task1 data' not in output or 'FILE_NOT_FOUND' in output or 'No such file' in output
print(f" Task2 output: {output[:200]}")
print(f"\nTest: {'✅ Passed (environments isolated)' if isolated else '❌ Failed (environments NOT isolated)'}")
# Cleanup
cleanup_vm(task1)
cleanup_vm(task2)
return isolated
def main():
"""Run all Koyeb terminal tests."""
print("🧪 Koyeb Terminal Tool Test Suite")
print("=" * 60)
# Check current config
config = _get_env_config()
print(f"\nCurrent configuration:")
print(f" TERMINAL_ENV: {config['env_type']}")
print(f" TERMINAL_KOYEB_IMAGE: {config['koyeb_image']}")
print(f" TERMINAL_TIMEOUT: {config['timeout']}s")
if config['env_type'] != 'koyeb':
print(f"\n⚠️ WARNING: TERMINAL_ENV is set to '{config['env_type']}', not 'koyeb'")
print(" To test Koyeb specifically, set TERMINAL_ENV=koyeb")
response = input("\n Continue testing with current backend? (y/n): ")
if response.lower() != 'y':
print("Aborting.")
return
results = {}
# Run tests
results['requirements'] = test_koyeb_requirements()
if not results['requirements']:
print("\n❌ Requirements not met. Cannot continue with other tests.")
return
results['simple_command'] = test_simple_command()
results['python_execution'] = test_python_execution()
results['filesystem_operations'] = test_filesystem_operations()
results['environment_isolation'] = test_environment_isolation()
# Summary
print("\n" + "=" * 60)
print("TEST SUMMARY")
print("=" * 60)
passed = sum(1 for v in results.values() if v)
total = len(results)
for test_name, passed_test in results.items():
status = "✅ PASSED" if passed_test else "❌ FAILED"
print(f" {test_name}: {status}")
print(f"\nTotal: {passed}/{total} tests passed")
return passed == total
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)

View file

@ -0,0 +1,266 @@
"""Unit tests for the Koyeb cloud sandbox environment backend."""
import threading
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Helpers to build mock Koyeb SDK objects
# ---------------------------------------------------------------------------
def _make_exec_response(stdout="", stderr="", exit_code=0):
return SimpleNamespace(stdout=stdout, stderr=stderr, exit_code=exit_code)
def _make_sandbox(sandbox_id="sb-koyeb-123"):
sb = MagicMock()
sb.id = sandbox_id
sb.exec.return_value = _make_exec_response()
sb.filesystem = MagicMock()
return sb
def _patch_koyeb_imports(monkeypatch):
"""Patch the koyeb SDK so KoyebEnvironment can be imported without it."""
import types as _types
koyeb_mod = _types.ModuleType("koyeb")
koyeb_mod.Sandbox = MagicMock()
monkeypatch.setitem(__import__("sys").modules, "koyeb", koyeb_mod)
return koyeb_mod
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture()
def koyeb_sdk(monkeypatch):
"""Provide a mock koyeb SDK module and return it for assertions."""
return _patch_koyeb_imports(monkeypatch)
@pytest.fixture()
def make_env(koyeb_sdk, monkeypatch):
"""Factory that creates a KoyebEnvironment with a mocked SDK."""
monkeypatch.setattr("tools.environments.base.is_interrupted", lambda: False)
monkeypatch.setattr("tools.credential_files.get_credential_file_mounts", lambda: [])
monkeypatch.setattr("tools.credential_files.get_skills_directory_mount", lambda **kw: None)
monkeypatch.setattr("tools.credential_files.iter_skills_files", lambda **kw: [])
def _factory(
sandbox=None,
home_dir="/root",
**kwargs,
):
sandbox = sandbox or _make_sandbox()
# Mock the $HOME detection
sandbox.exec.return_value = _make_exec_response(stdout=home_dir)
koyeb_sdk.Sandbox.create.return_value = sandbox
from tools.environments.koyeb import KoyebEnvironment
kwargs.setdefault("task_id", "test-task")
env = KoyebEnvironment(
image="koyeb/sandbox:latest",
**kwargs,
)
return env
return _factory
# ---------------------------------------------------------------------------
# Constructor / cwd resolution
# ---------------------------------------------------------------------------
class TestCwdResolution:
def test_default_cwd_resolves_home(self, make_env):
env = make_env(home_dir="/home/testuser")
assert env.cwd == "/home/testuser"
def test_tilde_cwd_resolves_home(self, make_env):
env = make_env(cwd="~", home_dir="/home/testuser")
assert env.cwd == "/home/testuser"
def test_explicit_cwd_not_overridden(self, make_env):
env = make_env(cwd="/workspace", home_dir="/root")
assert env.cwd == "/workspace"
def test_home_detection_failure_keeps_default_cwd(self, make_env):
sb = _make_sandbox()
sb.exec.side_effect = RuntimeError("exec failed")
env = make_env(sandbox=sb)
assert env.cwd == "/root" # keeps constructor default
def test_empty_home_keeps_default_cwd(self, make_env):
env = make_env(home_dir="")
assert env.cwd == "/root"
# ---------------------------------------------------------------------------
# Sandbox name sanitization
# ---------------------------------------------------------------------------
class TestSandboxNameSanitization:
def test_underscores_replaced_with_hyphens(self, make_env, koyeb_sdk):
make_env(task_id="my_test_task")
name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
assert "_" not in name_arg
assert name_arg == "hermes-my-test-task"
def test_uppercase_lowered(self, make_env, koyeb_sdk):
make_env(task_id="MyTask")
name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
assert name_arg == "hermes-mytask"
def test_special_chars_removed(self, make_env, koyeb_sdk):
make_env(task_id="task@#$123")
name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
assert name_arg == "hermes-task-123"
def test_name_truncated_to_63_chars(self, make_env, koyeb_sdk):
make_env(task_id="a" * 100)
name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
assert len(name_arg) <= 63
def test_consecutive_hyphens_collapsed(self, make_env, koyeb_sdk):
make_env(task_id="a__b---c")
name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
assert "--" not in name_arg
# ---------------------------------------------------------------------------
# Cleanup
# ---------------------------------------------------------------------------
class TestCleanup:
def test_cleanup_deletes_sandbox(self, make_env):
env = make_env()
sb = env._sandbox
env.cleanup()
sb.delete.assert_called_once()
def test_cleanup_idempotent(self, make_env):
env = make_env()
env.cleanup()
env.cleanup() # should not raise
def test_cleanup_swallows_errors(self, make_env):
env = make_env()
env._sandbox.delete.side_effect = RuntimeError("delete failed")
env.cleanup() # should not raise
assert env._sandbox is None
def test_cleanup_calls_sync_back_before_delete(self, make_env):
env = make_env()
call_order = []
sync_mgr = MagicMock()
sync_mgr.sync_back = lambda: call_order.append("sync_back")
env._sync_manager = sync_mgr
original_delete = env._sandbox.delete
env._sandbox.delete = lambda: (call_order.append("delete"), original_delete())
env.cleanup()
assert "sync_back" in call_order
assert "delete" in call_order
assert call_order.index("sync_back") < call_order.index("delete")
# ---------------------------------------------------------------------------
# Execute
# ---------------------------------------------------------------------------
class TestExecute:
def test_basic_command(self, make_env):
sb = _make_sandbox()
# Calls: (1) $HOME detection, (2) init_session bootstrap, (3) actual command
sb.exec.side_effect = [
_make_exec_response(stdout="/root"), # $HOME
_make_exec_response(stdout="", exit_code=0), # init_session
_make_exec_response(stdout="hello", exit_code=0), # actual cmd
]
env = make_env(sandbox=sb)
result = env.execute("echo hello")
assert "hello" in result["output"]
assert result["returncode"] == 0
def test_nonzero_exit_code(self, make_env):
sb = _make_sandbox()
sb.exec.side_effect = [
_make_exec_response(stdout="/root"),
_make_exec_response(stdout="", exit_code=0), # init_session
_make_exec_response(stdout="not found", exit_code=127),
]
env = make_env(sandbox=sb)
result = env.execute("bad_cmd")
assert result["returncode"] == 127
def test_stderr_included_in_output(self, make_env):
sb = _make_sandbox()
sb.exec.side_effect = [
_make_exec_response(stdout="/root"),
_make_exec_response(stdout="", exit_code=0), # init_session
_make_exec_response(stdout="out", stderr="err", exit_code=0),
]
env = make_env(sandbox=sb)
result = env.execute("cmd")
assert "out" in result["output"]
assert "err" in result["output"]
def test_stdin_data_wraps_heredoc(self, make_env):
sb = _make_sandbox()
sb.exec.side_effect = [
_make_exec_response(stdout="/root"),
_make_exec_response(stdout="", exit_code=0), # init_session
_make_exec_response(stdout="ok", exit_code=0),
]
env = make_env(sandbox=sb)
env.execute("python3", stdin_data="print('hi')")
call_args = sb.exec.call_args_list[-1]
cmd = call_args[0][0]
assert "HERMES_STDIN_" in cmd
assert "print" in cmd
# ---------------------------------------------------------------------------
# Interrupt
# ---------------------------------------------------------------------------
class TestInterrupt:
def test_interrupt_kills_and_returns_130(self, make_env, monkeypatch):
sb = _make_sandbox()
event = threading.Event()
calls = {"n": 0}
def exec_side_effect(*args, **kwargs):
calls["n"] += 1
if calls["n"] == 1:
return _make_exec_response(stdout="/root") # $HOME
if calls["n"] == 2:
return _make_exec_response(stdout="", exit_code=0) # init_session
event.wait(timeout=5) # simulate long-running command
return _make_exec_response(stdout="done", exit_code=0)
sb.exec.side_effect = exec_side_effect
env = make_env(sandbox=sb)
monkeypatch.setattr(
"tools.environments.base.is_interrupted", lambda: True
)
try:
result = env.execute("sleep 10")
assert result["returncode"] == 130
sb.delete.assert_called() # cancel_fn calls sandbox.delete()
finally:
event.set()

View file

@ -10,6 +10,7 @@ import pytest
from tools.environments import ssh as ssh_env
from tools.environments import modal as modal_env
from tools.environments import daytona as daytona_env
from tools.environments import koyeb as koyeb_env
from tools.environments.ssh import SSHEnvironment
@ -95,6 +96,20 @@ def _make_mock_daytona_env():
return env
# ── Koyeb helpers ────────────────────────────────────────────────────
def _make_mock_koyeb_env():
"""Create a minimal KoyebEnvironment without calling __init__."""
env = object.__new__(koyeb_env.KoyebEnvironment)
env._sandbox = MagicMock()
env._remote_home = "/root"
env._sync_manager = None
env._lock = __import__("threading").Lock()
env._task_id = "test"
return env
# =====================================================================
# SSH bulk download
# =====================================================================
@ -402,6 +417,69 @@ class TestDaytonaCleanup:
assert call_order.index("sync_back") < call_order.index("stop")
# =====================================================================
# Koyeb bulk download + cleanup
# =====================================================================
class TestKoyebBulkDownload:
"""Unit tests for _koyeb_bulk_download."""
def test_koyeb_bulk_download_creates_tar_and_downloads(self, tmp_path):
"""exec and download_file should both be called."""
env = _make_mock_koyeb_env()
dest = tmp_path / "backup.tar"
env._koyeb_bulk_download(dest)
# exec called twice: tar creation + rm cleanup
assert env._sandbox.exec.call_count == 2
tar_cmd = env._sandbox.exec.call_args_list[0][0][0]
assert "tar cf" in tar_cmd
assert "/tmp/.hermes_sync." in tar_cmd
assert ".tar" in tar_cmd
assert ".hermes" in tar_cmd
cleanup_cmd = env._sandbox.exec.call_args_list[1][0][0]
assert "rm -f" in cleanup_cmd
env._sandbox.filesystem.download_file.assert_called_once()
download_args = env._sandbox.filesystem.download_file.call_args[0]
assert download_args[0].startswith("/tmp/.hermes_sync.")
assert download_args[1] == str(dest)
def test_koyeb_bulk_download_uses_remote_home(self, tmp_path):
"""The tar command should use the env's _remote_home."""
env = _make_mock_koyeb_env()
env._remote_home = "/home/koyeb"
dest = tmp_path / "backup.tar"
env._koyeb_bulk_download(dest)
tar_cmd = env._sandbox.exec.call_args_list[0][0][0]
assert "home/koyeb/.hermes" in tar_cmd
class TestKoyebCleanup:
"""Verify Koyeb cleanup() calls sync_back() before delete."""
def test_koyeb_cleanup_calls_sync_back(self):
"""cleanup() should call sync_back() before sandbox.delete()."""
env = _make_mock_koyeb_env()
call_order = []
sync_mgr = MagicMock()
sync_mgr.sync_back = lambda: call_order.append("sync_back")
env._sync_manager = sync_mgr
env._sandbox.delete = lambda: call_order.append("delete")
env.cleanup()
assert "sync_back" in call_order
assert "delete" in call_order
assert call_order.index("sync_back") < call_order.index("delete")
# =====================================================================
# FileSyncManager wiring: bulk_download_fn passed by each backend
# =====================================================================