feat(koyeb): add Koyeb backend support for cloud execution and environment management

2026-05-17 04:31:55 +00:00 · 2026-04-22 23:00:03 +02:00 · 2026-04-22 23:00:03 +02:00 · abd5eacb6e
commit abd5eacb6e
parent 57e33cf284
11 changed files with 479 additions and 64 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -32,7 +32,7 @@ hermes-agent/
 ├── agent/                # Agent internals (provider adapters, memory, caching, compression, etc.)
 ├── hermes_cli/           # CLI subcommands, setup wizard, plugins loader, skin engine
 ├── tools/                # Tool implementations — auto-discovered via tools/registry.py
-│   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity)
+│   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity, koyeb)
 ├── gateway/              # Messaging gateway — run.py + session.py + platforms/
 │   ├── platforms/        # Adapter per platform (telegram, discord, slack, whatsapp,
 │   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -157,7 +157,7 @@ hermes-agent/
 │   ├── skill_tools.py            # Skill search, load, manage
 │   └── environments/             # Terminal execution backends
 │       ├── base.py                   # BaseEnvironment ABC
-│       ├── local.py, docker.py, ssh.py, singularity.py, modal.py, daytona.py
+│       ├── local.py, docker.py, ssh.py, singularity.py, modal.py, daytona.py, koyeb.py
 │
 ├── gateway/                  # Messaging gateway
 │   ├── run.py                    # GatewayRunner — platform lifecycle, message routing, cron
--- a/README.md
+++ b/README.md
@ -21,7 +21,7 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open
 <tr><td><b>A closed learning loop</b></td><td>Agent-curated memory with periodic nudges. Autonomous skill creation after complex tasks. Skills self-improve during use. FTS5 session search with LLM summarization for cross-session recall. <a href="https://github.com/plastic-labs/honcho">Honcho</a> dialectic user modeling. Compatible with the <a href="https://agentskills.io">agentskills.io</a> open standard.</td></tr>
 <tr><td><b>Scheduled automations</b></td><td>Built-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.</td></tr>
 <tr><td><b>Delegates and parallelizes</b></td><td>Spawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.</td></tr>
-<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Six terminal backends — local, Docker, SSH, Daytona, Singularity, and Modal. Daytona and Modal offer serverless persistence — your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster.</td></tr>
+<tr><td><b>Runs anywhere, not just your laptop</b></td><td>Seven terminal backends — local, Docker, SSH, Daytona, Singularity, Modal, and Koyeb. Daytona, Modal, and Koyeb offer serverless cloud sandboxes — your agent's environment spins up on demand and is deleted when done. Run it on a $5 VPS or a GPU cluster.</td></tr>
 <tr><td><b>Research-ready</b></td><td>Batch trajectory generation, Atropos RL environments, trajectory compression for training the next generation of tool-calling models.</td></tr>
 </table>
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -437,7 +437,8 @@ DEFAULT_CONFIG = {
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
        "modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
-        # Container resource limits (docker, singularity, modal, daytona — ignored for local/ssh)
+        "koyeb_image": "koyeb/sandbox:latest",
        # Container resource limits (docker, singularity, modal, daytona, koyeb — ignored for local/ssh)
        "container_cpu": 1,
        "container_memory": 5120,       # MB (default 5GB)
        "container_disk": 51200,        # MB (default 50GB)
@ -3694,6 +3695,10 @@ def show_config():
        print(f"  Daytona image: {terminal.get('daytona_image', 'nikolaik/python-nodejs:python3.11-nodejs20')}")
        daytona_key = get_env_value('DAYTONA_API_KEY')
        print(f"  API key:      {'configured' if daytona_key else '(not set)'}")
    elif terminal.get('backend') == 'koyeb':
        print(f"  Koyeb image:  {terminal.get('koyeb_image', 'koyeb/sandbox:latest')}")
        koyeb_token = get_env_value('KOYEB_API_TOKEN')
        print(f"  API token:    {'configured' if koyeb_token else '(not set)'}")
    elif terminal.get('backend') == 'ssh':
        ssh_host = get_env_value('TERMINAL_SSH_HOST')
        ssh_user = get_env_value('TERMINAL_SSH_USER')
@ -3886,6 +3891,7 @@ def set_config_value(key: str, value: str):
        "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "terminal.modal_image": "TERMINAL_MODAL_IMAGE",
        "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
        "terminal.koyeb_image": "TERMINAL_KOYEB_IMAGE",
        "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
        "terminal.cwd": "TERMINAL_CWD",
        "terminal.timeout": "TERMINAL_TIMEOUT",
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@ -777,6 +777,21 @@ def run_doctor(args):
            check_fail("daytona SDK not installed", "(pip install daytona)")
            issues.append("Install daytona SDK: pip install daytona")
    # Koyeb (if using koyeb backend)
    if terminal_env == "koyeb":
        koyeb_token = os.getenv("KOYEB_API_TOKEN")
        if koyeb_token:
            check_ok("Koyeb API token", "(configured)")
        else:
            check_fail("KOYEB_API_TOKEN not set", "(required for TERMINAL_ENV=koyeb)")
            issues.append("Set KOYEB_API_TOKEN environment variable")
        try:
            from koyeb import Sandbox  # noqa: F401 — SDK presence check
            check_ok("koyeb SDK", "(installed)")
        except ImportError:
            check_fail("koyeb SDK not installed", "(pip install koyeb-sdk)")
            issues.append("Install koyeb SDK: pip install koyeb-sdk")
    # Node.js + agent-browser (for browser automation tools)
    if shutil.which("node"):
        check_ok("Node.js")
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -1182,11 +1182,12 @@ def setup_terminal_backend(config: dict):
        "Modal - serverless cloud sandbox",
        "SSH - run on a remote machine",
        "Daytona - persistent cloud development environment",
        "Koyeb - cloud sandbox execution",
    ]
-    idx_to_backend = {0: "local", 1: "docker", 2: "modal", 3: "ssh", 4: "daytona"}
+    idx_to_backend = {0: "local", 1: "docker", 2: "modal", 3: "ssh", 4: "daytona", 5: "koyeb"}
-    backend_to_idx = {"local": 0, "docker": 1, "modal": 2, "ssh": 3, "daytona": 4}
+    backend_to_idx = {"local": 0, "docker": 1, "modal": 2, "ssh": 3, "daytona": 4, "koyeb": 5}
-    next_idx = 5
+    next_idx = 6
    if is_linux:
        terminal_choices.append("Singularity/Apptainer - HPC-friendly container")
        idx_to_backend[next_idx] = "singularity"
@ -1441,6 +1442,64 @@ def setup_terminal_backend(config: dict):
        _prompt_container_resources(config)
    elif selected_backend == "koyeb":
        print_success("Terminal backend: Koyeb")
        print_info("Cloud sandbox execution via Koyeb.")
        print_info("Sign up at: https://www.koyeb.com")
        # Check if koyeb SDK is installed
        try:
            __import__("koyeb")
        except ImportError:
            print_info("Installing koyeb SDK...")
            import subprocess
            uv_bin = shutil.which("uv")
            if uv_bin:
                result = subprocess.run(
                    [uv_bin, "pip", "install", "--python", sys.executable, "koyeb-sdk"],
                    capture_output=True,
                    text=True,
                )
            else:
                result = subprocess.run(
                    [sys.executable, "-m", "pip", "install", "koyeb-sdk"],
                    capture_output=True,
                    text=True,
                )
            if result.returncode == 0:
                print_success("koyeb SDK installed")
            else:
                print_warning("Install failed — run manually: pip install koyeb-sdk")
                if result.stderr:
                    print_info(f"  Error: {result.stderr.strip().splitlines()[-1]}")
        # Koyeb API token
        print()
        existing_key = get_env_value("KOYEB_API_TOKEN")
        if existing_key:
            print_info("  Koyeb API token: already configured")
            if prompt_yes_no("  Update API token?", False):
                api_key = prompt("    Koyeb API token", password=True)
                if api_key:
                    save_env_value("KOYEB_API_TOKEN", api_key)
                    print_success("    Updated")
        else:
            api_key = prompt("    Koyeb API token", password=True)
            if api_key:
                save_env_value("KOYEB_API_TOKEN", api_key)
                print_success("    Configured")
        # Koyeb image
        current_image = config.get("terminal", {}).get(
            "koyeb_image", "koyeb/sandbox:latest"
        )
        image = prompt("  Sandbox image", current_image)
        config["terminal"]["koyeb_image"] = image
        save_env_value("TERMINAL_KOYEB_IMAGE", image)
        _prompt_container_resources(config)
    elif selected_backend == "ssh":
        print_success("Terminal backend: SSH")
        print_info("Run commands on a remote machine via SSH.")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -39,6 +39,7 @@ dependencies = [
 [project.optional-dependencies]
 modal = ["modal>=1.0.0,<2"]
 daytona = ["daytona>=0.148.0,<1"]
 koyeb = ["koyeb-sdk>=1.4.0,<2"]
 dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2", "ty>=0.0.1a29,<0.0.22", "ruff"]
 messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
 cron = ["croniter>=6.0.0,<7"]
--- a/tests/tools/test_koyeb_environment.py
+++ b/tests/tools/test_koyeb_environment.py
@ -0,0 +1,266 @@
 """Unit tests for the Koyeb cloud sandbox environment backend."""
 import threading
 from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 import pytest
 # ---------------------------------------------------------------------------
 # Helpers to build mock Koyeb SDK objects
 # ---------------------------------------------------------------------------
 def _make_exec_response(stdout="", stderr="", exit_code=0):
    return SimpleNamespace(stdout=stdout, stderr=stderr, exit_code=exit_code)
 def _make_sandbox(sandbox_id="sb-koyeb-123"):
    sb = MagicMock()
    sb.id = sandbox_id
    sb.exec.return_value = _make_exec_response()
    sb.filesystem = MagicMock()
    return sb
 def _patch_koyeb_imports(monkeypatch):
    """Patch the koyeb SDK so KoyebEnvironment can be imported without it."""
    import types as _types
    koyeb_mod = _types.ModuleType("koyeb")
    koyeb_mod.Sandbox = MagicMock()
    monkeypatch.setitem(__import__("sys").modules, "koyeb", koyeb_mod)
    return koyeb_mod
 # ---------------------------------------------------------------------------
 # Fixtures
 # ---------------------------------------------------------------------------
@pytest.fixture()
 def koyeb_sdk(monkeypatch):
    """Provide a mock koyeb SDK module and return it for assertions."""
    return _patch_koyeb_imports(monkeypatch)
@pytest.fixture()
 def make_env(koyeb_sdk, monkeypatch):
    """Factory that creates a KoyebEnvironment with a mocked SDK."""
    monkeypatch.setattr("tools.environments.base.is_interrupted", lambda: False)
    monkeypatch.setattr("tools.credential_files.get_credential_file_mounts", lambda: [])
    monkeypatch.setattr("tools.credential_files.get_skills_directory_mount", lambda **kw: None)
    monkeypatch.setattr("tools.credential_files.iter_skills_files", lambda **kw: [])
    def _factory(
        sandbox=None,
        home_dir="/root",
        **kwargs,
    ):
        sandbox = sandbox or _make_sandbox()
        # Mock the $HOME detection
        sandbox.exec.return_value = _make_exec_response(stdout=home_dir)
        koyeb_sdk.Sandbox.create.return_value = sandbox
        from tools.environments.koyeb import KoyebEnvironment
        kwargs.setdefault("task_id", "test-task")
        env = KoyebEnvironment(
            image="koyeb/sandbox:latest",
            **kwargs,
        )
        return env
    return _factory
 # ---------------------------------------------------------------------------
 # Constructor / cwd resolution
 # ---------------------------------------------------------------------------
 class TestCwdResolution:
    def test_default_cwd_resolves_home(self, make_env):
        env = make_env(home_dir="/home/testuser")
        assert env.cwd == "/home/testuser"
    def test_tilde_cwd_resolves_home(self, make_env):
        env = make_env(cwd="~", home_dir="/home/testuser")
        assert env.cwd == "/home/testuser"
    def test_explicit_cwd_not_overridden(self, make_env):
        env = make_env(cwd="/workspace", home_dir="/root")
        assert env.cwd == "/workspace"
    def test_home_detection_failure_keeps_default_cwd(self, make_env):
        sb = _make_sandbox()
        sb.exec.side_effect = RuntimeError("exec failed")
        env = make_env(sandbox=sb)
        assert env.cwd == "/root"  # keeps constructor default
    def test_empty_home_keeps_default_cwd(self, make_env):
        env = make_env(home_dir="")
        assert env.cwd == "/root"
 # ---------------------------------------------------------------------------
 # Sandbox name sanitization
 # ---------------------------------------------------------------------------
 class TestSandboxNameSanitization:
    def test_underscores_replaced_with_hyphens(self, make_env, koyeb_sdk):
        make_env(task_id="my_test_task")
        name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
        assert "_" not in name_arg
        assert name_arg == "hermes-my-test-task"
    def test_uppercase_lowered(self, make_env, koyeb_sdk):
        make_env(task_id="MyTask")
        name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
        assert name_arg == "hermes-mytask"
    def test_special_chars_removed(self, make_env, koyeb_sdk):
        make_env(task_id="task@#$123")
        name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
        assert name_arg == "hermes-task-123"
    def test_name_truncated_to_63_chars(self, make_env, koyeb_sdk):
        make_env(task_id="a" * 100)
        name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
        assert len(name_arg) <= 63
    def test_consecutive_hyphens_collapsed(self, make_env, koyeb_sdk):
        make_env(task_id="a__b---c")
        name_arg = koyeb_sdk.Sandbox.create.call_args[1]["name"]
        assert "--" not in name_arg
 # ---------------------------------------------------------------------------
 # Cleanup
 # ---------------------------------------------------------------------------
 class TestCleanup:
    def test_cleanup_deletes_sandbox(self, make_env):
        env = make_env()
        sb = env._sandbox
        env.cleanup()
        sb.delete.assert_called_once()
    def test_cleanup_idempotent(self, make_env):
        env = make_env()
        env.cleanup()
        env.cleanup()  # should not raise
    def test_cleanup_swallows_errors(self, make_env):
        env = make_env()
        env._sandbox.delete.side_effect = RuntimeError("delete failed")
        env.cleanup()  # should not raise
        assert env._sandbox is None
    def test_cleanup_calls_sync_back_before_delete(self, make_env):
        env = make_env()
        call_order = []
        sync_mgr = MagicMock()
        sync_mgr.sync_back = lambda: call_order.append("sync_back")
        env._sync_manager = sync_mgr
        original_delete = env._sandbox.delete
        env._sandbox.delete = lambda: (call_order.append("delete"), original_delete())
        env.cleanup()
        assert "sync_back" in call_order
        assert "delete" in call_order
        assert call_order.index("sync_back") < call_order.index("delete")
 # ---------------------------------------------------------------------------
 # Execute
 # ---------------------------------------------------------------------------
 class TestExecute:
    def test_basic_command(self, make_env):
        sb = _make_sandbox()
        # Calls: (1) $HOME detection, (2) init_session bootstrap, (3) actual command
        sb.exec.side_effect = [
            _make_exec_response(stdout="/root"),           # $HOME
            _make_exec_response(stdout="", exit_code=0),   # init_session
            _make_exec_response(stdout="hello", exit_code=0),  # actual cmd
        ]
        env = make_env(sandbox=sb)
        result = env.execute("echo hello")
        assert "hello" in result["output"]
        assert result["returncode"] == 0
    def test_nonzero_exit_code(self, make_env):
        sb = _make_sandbox()
        sb.exec.side_effect = [
            _make_exec_response(stdout="/root"),
            _make_exec_response(stdout="", exit_code=0),   # init_session
            _make_exec_response(stdout="not found", exit_code=127),
        ]
        env = make_env(sandbox=sb)
        result = env.execute("bad_cmd")
        assert result["returncode"] == 127
    def test_stderr_included_in_output(self, make_env):
        sb = _make_sandbox()
        sb.exec.side_effect = [
            _make_exec_response(stdout="/root"),
            _make_exec_response(stdout="", exit_code=0),   # init_session
            _make_exec_response(stdout="out", stderr="err", exit_code=0),
        ]
        env = make_env(sandbox=sb)
        result = env.execute("cmd")
        assert "out" in result["output"]
        assert "err" in result["output"]
    def test_stdin_data_wraps_heredoc(self, make_env):
        sb = _make_sandbox()
        sb.exec.side_effect = [
            _make_exec_response(stdout="/root"),
            _make_exec_response(stdout="", exit_code=0),   # init_session
            _make_exec_response(stdout="ok", exit_code=0),
        ]
        env = make_env(sandbox=sb)
        env.execute("python3", stdin_data="print('hi')")
        call_args = sb.exec.call_args_list[-1]
        cmd = call_args[0][0]
        assert "HERMES_STDIN_" in cmd
        assert "print" in cmd
 # ---------------------------------------------------------------------------
 # Interrupt
 # ---------------------------------------------------------------------------
 class TestInterrupt:
    def test_interrupt_kills_and_returns_130(self, make_env, monkeypatch):
        sb = _make_sandbox()
        event = threading.Event()
        calls = {"n": 0}
        def exec_side_effect(*args, **kwargs):
            calls["n"] += 1
            if calls["n"] == 1:
                return _make_exec_response(stdout="/root")  # $HOME
            if calls["n"] == 2:
                return _make_exec_response(stdout="", exit_code=0)  # init_session
            event.wait(timeout=5)  # simulate long-running command
            return _make_exec_response(stdout="done", exit_code=0)
        sb.exec.side_effect = exec_side_effect
        env = make_env(sandbox=sb)
        monkeypatch.setattr(
            "tools.environments.base.is_interrupted", lambda: True
        )
        try:
            result = env.execute("sleep 10")
            assert result["returncode"] == 130
            sb.delete.assert_called()  # cancel_fn calls sandbox.delete()
        finally:
            event.set()
--- a/tests/tools/test_sync_back_backends.py
+++ b/tests/tools/test_sync_back_backends.py
@ -10,6 +10,7 @@ import pytest
 from tools.environments import ssh as ssh_env
 from tools.environments import modal as modal_env
 from tools.environments import daytona as daytona_env
 from tools.environments import koyeb as koyeb_env
 from tools.environments.ssh import SSHEnvironment
@ -95,6 +96,20 @@ def _make_mock_daytona_env():
    return env
 # ── Koyeb helpers ────────────────────────────────────────────────────
 def _make_mock_koyeb_env():
    """Create a minimal KoyebEnvironment without calling __init__."""
    env = object.__new__(koyeb_env.KoyebEnvironment)
    env._sandbox = MagicMock()
    env._remote_home = "/root"
    env._sync_manager = None
    env._lock = __import__("threading").Lock()
    env._task_id = "test"
    return env
 # =====================================================================
 # SSH bulk download
 # =====================================================================
@ -402,6 +417,69 @@ class TestDaytonaCleanup:
        assert call_order.index("sync_back") < call_order.index("stop")
 # =====================================================================
 # Koyeb bulk download + cleanup
 # =====================================================================
 class TestKoyebBulkDownload:
    """Unit tests for _koyeb_bulk_download."""
    def test_koyeb_bulk_download_creates_tar_and_downloads(self, tmp_path):
        """exec and download_file should both be called."""
        env = _make_mock_koyeb_env()
        dest = tmp_path / "backup.tar"
        env._koyeb_bulk_download(dest)
        # exec called twice: tar creation + rm cleanup
        assert env._sandbox.exec.call_count == 2
        tar_cmd = env._sandbox.exec.call_args_list[0][0][0]
        assert "tar cf" in tar_cmd
        assert "/tmp/.hermes_sync." in tar_cmd
        assert ".tar" in tar_cmd
        assert ".hermes" in tar_cmd
        cleanup_cmd = env._sandbox.exec.call_args_list[1][0][0]
        assert "rm -f" in cleanup_cmd
        env._sandbox.filesystem.download_file.assert_called_once()
        download_args = env._sandbox.filesystem.download_file.call_args[0]
        assert download_args[0].startswith("/tmp/.hermes_sync.")
        assert download_args[1] == str(dest)
    def test_koyeb_bulk_download_uses_remote_home(self, tmp_path):
        """The tar command should use the env's _remote_home."""
        env = _make_mock_koyeb_env()
        env._remote_home = "/home/koyeb"
        dest = tmp_path / "backup.tar"
        env._koyeb_bulk_download(dest)
        tar_cmd = env._sandbox.exec.call_args_list[0][0][0]
        assert "home/koyeb/.hermes" in tar_cmd
 class TestKoyebCleanup:
    """Verify Koyeb cleanup() calls sync_back() before delete."""
    def test_koyeb_cleanup_calls_sync_back(self):
        """cleanup() should call sync_back() before sandbox.delete()."""
        env = _make_mock_koyeb_env()
        call_order = []
        sync_mgr = MagicMock()
        sync_mgr.sync_back = lambda: call_order.append("sync_back")
        env._sync_manager = sync_mgr
        env._sandbox.delete = lambda: call_order.append("delete")
        env.cleanup()
        assert "sync_back" in call_order
        assert "delete" in call_order
        assert call_order.index("sync_back") < call_order.index("delete")
 # =====================================================================
 # FileSyncManager wiring: bulk_download_fn passed by each backend
 # =====================================================================
--- a/tools/environments/init.py
+++ b/tools/environments/init.py
@ -2,7 +2,7 @@
 Each backend provides the same interface (BaseEnvironment ABC) for running
 shell commands in a specific execution context: local, Docker, Singularity,
-SSH, Modal, or Daytona.
+SSH, Modal, Daytona, or Koyeb.
 The terminal_tool.py factory (_create_environment) selects the backend
 based on the TERMINAL_ENV configuration.
--- a/tools/environments/koyeb.py
+++ b/tools/environments/koyeb.py
@ -1,13 +1,13 @@
 """Koyeb cloud execution environment.
 Uses the Koyeb Python SDK to run commands in cloud sandboxes.
-Supports persistent sandboxes: when enabled, sandboxes are stopped on cleanup
+Each task gets its own sandbox which is deleted on cleanup.
 and resumed on next creation, preserving the filesystem across sessions.
 """
 import logging
 import math
 import os
 import re
 import shlex
 import threading
 from pathlib import Path
@ -56,7 +56,6 @@ class KoyebEnvironment(BaseEnvironment):
        from koyeb import Sandbox
        self._persistent = persistent_filesystem
        self._task_id = task_id
        self._sandbox = None
        self._lock = threading.Lock()
@ -71,25 +70,12 @@ class KoyebEnvironment(BaseEnvironment):
        # For now, we'll use the instance_type parameter directly
        # cpu and memory parameters are kept for compatibility but may be overridden by instance_type
-        sandbox_name = f"hermes-{task_id}"
+        # Koyeb app names must be lowercase alphanumeric + hyphens only.
-        labels = {"hermes_task_id": task_id}
+        # Sanitize task_id: replace underscores/invalid chars with hyphens,
-
+        # collapse runs, strip leading/trailing hyphens, and truncate.
-        # Try to reuse existing sandbox if persistent
+        safe_id = re.sub(r"[^a-z0-9-]", "-", task_id.lower())
-        if self._persistent:
+        safe_id = re.sub(r"-{2,}", "-", safe_id).strip("-")
-            try:
+        sandbox_name = f"hermes-{safe_id}"[:63]  # Koyeb name max length
                # List existing sandboxes with our label
                existing = Sandbox.list(api_token=self._api_token, labels=labels)
                if existing:
                    self._sandbox = existing[0]
                    logger.info("Koyeb: resumed sandbox %s for task %s",
                                self._sandbox.id, task_id)
            except Exception as e:
                logger.debug("Koyeb: could not resume sandbox for task %s: %s",
                             task_id, e)
                self._sandbox = None
        # Create new sandbox if needed
        if self._sandbox is None:
        try:
            self._sandbox = Sandbox.create(
                image=image,
@ -99,9 +85,6 @@ class KoyebEnvironment(BaseEnvironment):
                region=self._region,
                api_token=self._api_token,
                timeout=300,
                    idle_timeout=0,  # Disable auto-sleep for persistent sandboxes
                    delete_after_delay=0,
                    delete_after_inactivity_delay=0,
            )
            logger.info("Koyeb: created sandbox %s for task %s",
                        self._sandbox.id, task_id)
@ -135,20 +118,33 @@ class KoyebEnvironment(BaseEnvironment):
        """Upload a single file via Koyeb SDK."""
        parent = str(Path(remote_path).parent)
        self._sandbox.exec(f"mkdir -p {shlex.quote(parent)}")
-        self._sandbox.filesystem.upload_file(host_path, remote_path)
+        self._sandbox.filesystem.upload_file(host_path, remote_path, encoding="base64")
    def _koyeb_bulk_upload(self, files: list[tuple[str, str]]) -> None:
-        """Upload many files via Koyeb SDK."""
+        """Upload many files as a single tar archive to avoid per-file HTTP overhead."""
        if not files:
            return
-        parents = unique_parent_dirs(files)
+        import tarfile
-        if parents:
+        import tempfile
            self._sandbox.exec(quoted_mkdir_command(parents))
-        # Upload files one by one (Koyeb SDK doesn't have bulk upload for files)
+        with tempfile.NamedTemporaryFile(suffix=".tar", delete=False) as tmp:
            tmp_path = tmp.name
        try:
            with tarfile.open(tmp_path, "w") as tar:
                for host_path, remote_path in files:
-            self._sandbox.filesystem.upload_file(host_path, remote_path)
+                    # Store with absolute remote path inside the tar
                    tar.add(host_path, arcname=remote_path)
            remote_tar = f"/tmp/.hermes_upload.{os.getpid()}.tar"
            self._sandbox.filesystem.upload_file(tmp_path, remote_tar, encoding="base64")
            self._sandbox.exec(f"tar xf {shlex.quote(remote_tar)} -C / && rm -f {shlex.quote(remote_tar)}")
        finally:
            try:
                os.unlink(tmp_path)
            except OSError:
                pass
    def _koyeb_bulk_download(self, dest: Path) -> None:
        """Download remote .hermes/ as a tar archive."""
@ -228,12 +224,6 @@ class KoyebEnvironment(BaseEnvironment):
                    logger.warning("Koyeb: sync_back failed: %s", e)
            try:
                if self._persistent:
                    # For persistent sandboxes, we don't delete them
                    # They'll be reused on next creation
                    logger.info("Koyeb: keeping sandbox %s (filesystem preserved)",
                                self._sandbox.id)
                else:
                self._sandbox.delete()
                logger.info("Koyeb: deleted sandbox %s", self._sandbox.id)
            except Exception as e: