feat(tools): add microsandbox terminal backend

Adds microsandbox (https://microsandbox.dev) as a terminal backend alongside docker/singularity/modal/daytona/ssh. Commands run inside a libkrun microVM with its own kernel — meaningfully stronger isolation than the shared-kernel container backends, without a cloud dependency or a daemon. Backend shape is a thin wrapper around the msb CLI: one long-lived sandbox per environment (msb create), command execution via msb exec, teardown via msb stop + msb remove. Env-var filtering mirrors the Docker backend — explicit docker-style microsandbox_forward_env / microsandbox_env lists, skill passthroughs still filtered through _HERMES_PROVIDER_ENV_BLOCKLIST. Files: - tools/environments/microsandbox.py — new MicrosandboxEnvironment backend - tools/terminal_tool.py — dispatch, container_config keys, image resolution - hermes_cli/config.py — default microsandbox_* entries + env var sync - cli-config.yaml.example — 'Option 7' documented config block - tests/integration/test_microsandbox_terminal.py — skip-if-no-KVM integration tests for basic exec, filesystem, isolation, and the secret-leak regression Host requirements: Linux with /dev/kvm readable (or macOS on Apple Silicon) and msb on PATH or at MSB_PATH. Install: curl -fsSL https://install.microsandbox.dev | sh Follow-up PR will wire this into the hermes_cli/setup.py wizard.
2026-04-25 00:51:20 +00:00 · 2026-04-24 17:12:45 +09:00 · 2026-04-24 17:12:45 +09:00 · 1f2303d3e2
commit 1f2303d3e2
parent 5dda4cab41
5 changed files with 531 additions and 4 deletions
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@ -226,8 +226,32 @@ terminal:
 #   daytona_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 #   container_disk: 10240          # Daytona max is 10GB per sandbox
 # -----------------------------------------------------------------------------
 # OPTION 7: Microsandbox libkrun microVMs (local)
 # Commands run in a libkrun microVM with its own kernel.
 # Great for: strong isolation from the agent's env and filesystem while keeping
 # everything local (no cloud dependency).
 # Requires: Linux with /dev/kvm (or macOS on Apple Silicon), `msb` on PATH or
 # MSB_PATH set. Install: curl -fsSL https://install.microsandbox.dev | sh
 # -----------------------------------------------------------------------------
 # terminal:
 #   backend: "microsandbox"
 #   cwd: "/root"                    # Path inside the VM
 #   timeout: 180
 #   lifetime_seconds: 300
 #   microsandbox_image: "python:3.12"  # Any OCI image usable by msb
 #   # Optional: explicitly forward selected env vars into the VM.
 #   # Values come from your current shell first, then ~/.hermes/.env.
 #   # Warning: anything forwarded here is visible to commands in the VM.
 #   # microsandbox_forward_env:
 #   #   - "GITHUB_TOKEN"
 #   # microsandbox_env:
 #   #   PYTHONUNBUFFERED: "1"
 #   # microsandbox_volumes:         # SOURCE:GUEST_PATH, host path or named volume
 #   #   - "/host/data:/mnt/data"
 #
-# --- Container resource limits (docker, singularity, modal, daytona -- ignored for local/ssh) ---
+# --- Container resource limits (docker, singularity, modal, daytona, microsandbox -- ignored for local/ssh) ---
 # These settings apply to all container backends. They control the resources
 # allocated to the sandbox and whether its filesystem persists across sessions.
  container_cpu: 1              # CPU cores
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -437,6 +437,15 @@ DEFAULT_CONFIG = {
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
        "modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "microsandbox_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "microsandbox_forward_env": [],
        # Explicit env vars set inside the microsandbox VM. Same semantics as
        # docker_env (values provided here, not read from the host process).
        "microsandbox_env": {},
        # Volume mounts shared with the microsandbox VM. Each entry is
        # "source:guest_path" where source is either an absolute host path or
        # a named msb volume.
        "microsandbox_volumes": [],
        # Container resource limits (docker, singularity, modal, daytona — ignored for local/ssh)
        "container_cpu": 1,
        "container_memory": 5120,       # MB (default 5GB)
@ -3886,6 +3895,7 @@ def set_config_value(key: str, value: str):
        "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "terminal.modal_image": "TERMINAL_MODAL_IMAGE",
        "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
        "terminal.microsandbox_image": "TERMINAL_MICROSANDBOX_IMAGE",
        "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
        "terminal.cwd": "TERMINAL_CWD",
        "terminal.timeout": "TERMINAL_TIMEOUT",
--- a/tests/integration/test_microsandbox_terminal.py
+++ b/tests/integration/test_microsandbox_terminal.py
@ -0,0 +1,124 @@
 """Integration tests for the Microsandbox terminal backend.
 Requires the ``msb`` CLI on PATH (or MSB_PATH set) and ``/dev/kvm`` readable
 on Linux. Run with:
    TERMINAL_ENV=microsandbox pytest tests/integration/test_microsandbox_terminal.py -v
 The module skips when either prerequisite is missing so CI and dev machines
 without KVM don't see spurious failures.
 """
 import importlib.util
 import json
 import os
 import shutil
 import sys
 from pathlib import Path
 import pytest
 pytestmark = pytest.mark.integration
 def _msb_available() -> bool:
    if os.getenv("MSB_PATH") and os.path.isfile(os.environ["MSB_PATH"]):
        return os.access(os.environ["MSB_PATH"], os.X_OK)
    return shutil.which("msb") is not None
 def _kvm_available() -> bool:
    # Non-Linux hosts (macOS Apple Silicon) don't have /dev/kvm but libkrun
    # uses the Hypervisor framework there; skip only when we're on Linux
    # without KVM access.
    if not sys.platform.startswith("linux"):
        return True
    return os.access("/dev/kvm", os.R_OK | os.W_OK)
 if not _msb_available():
    pytest.skip("msb not found on PATH or MSB_PATH", allow_module_level=True)
 if not _kvm_available():
    pytest.skip(
        "/dev/kvm not readable/writable by this user; add user to 'kvm' group",
        allow_module_level=True,
    )
 # Import terminal_tool via importlib to avoid tools/__init__.py side effects.
 parent_dir = Path(__file__).parent.parent.parent
 sys.path.insert(0, str(parent_dir))
 spec = importlib.util.spec_from_file_location(
    "terminal_tool", parent_dir / "tools" / "terminal_tool.py"
 )
 terminal_module = importlib.util.module_from_spec(spec)
 spec.loader.exec_module(terminal_module)
 terminal_tool = terminal_module.terminal_tool
 cleanup_vm = terminal_module.cleanup_vm
@pytest.fixture(autouse=True)
 def _force_microsandbox(monkeypatch):
    monkeypatch.setenv("TERMINAL_ENV", "microsandbox")
    # A minimal Alpine image boots fast and has sh + basic coreutils.
    monkeypatch.setenv("TERMINAL_MICROSANDBOX_IMAGE", "alpine:3")
    monkeypatch.setenv("TERMINAL_CONTAINER_PERSISTENT", "false")
@pytest.fixture()
 def task_id(request):
    """Provide a unique task_id and clean up the sandbox after the test."""
    tid = f"msb_test_{request.node.name}"
    yield tid
    cleanup_vm(tid)
 def _run(command, task_id, **kwargs):
    result = terminal_tool(command, task_id=task_id, **kwargs)
    return json.loads(result)
 class TestMicrosandboxBasic:
    def test_echo(self, task_id):
        r = _run("echo 'Hello from microsandbox!'", task_id)
        assert r["exit_code"] == 0
        assert "Hello from microsandbox!" in r["output"]
    def test_nonzero_exit(self, task_id):
        r = _run("exit 42", task_id)
        assert r["exit_code"] == 42
    def test_kernel_info(self, task_id):
        r = _run("uname -a", task_id)
        assert r["exit_code"] == 0
        assert "Linux" in r["output"]
 class TestMicrosandboxFilesystem:
    def test_write_and_read_file(self, task_id):
        _run("echo 'sandboxed' > /tmp/msb_test.txt", task_id)
        r = _run("cat /tmp/msb_test.txt", task_id)
        assert r["exit_code"] == 0
        assert "sandboxed" in r["output"]
 class TestMicrosandboxIsolation:
    def test_different_tasks_isolated(self):
        task_a = "msb_test_iso_a"
        task_b = "msb_test_iso_b"
        try:
            _run("echo 'secret' > /tmp/isolated.txt", task_a)
            r = _run("cat /tmp/isolated.txt 2>&1 || echo NOT_FOUND", task_b)
            assert "secret" not in r["output"] or "NOT_FOUND" in r["output"]
        finally:
            cleanup_vm(task_a)
            cleanup_vm(task_b)
    def test_host_env_secrets_not_leaked(self, task_id, monkeypatch):
        """A host env var not in the passthrough allowlist must not reach the VM."""
        monkeypatch.setenv("HERMES_TEST_SECRET_ABC123", "shouldnotleak")
        r = _run("env | grep HERMES_TEST_SECRET_ABC123 || echo ABSENT", task_id)
        assert "ABSENT" in r["output"]
        assert "shouldnotleak" not in r["output"]
--- a/tools/environments/microsandbox.py
+++ b/tools/environments/microsandbox.py
@ -0,0 +1,351 @@
 """Microsandbox execution environment for sandboxed command execution.
 Microsandbox (https://microsandbox.dev) runs each sandbox as a libkrun microVM
 with its own kernel.  Each ``msb exec`` call against a named sandbox runs a
 command inside that VM.  There is no long-running daemon — the CLI process
 itself boots and manages the VM.
 This backend spins up one long-lived sandbox per environment instance via
 ``msb create``, runs commands with ``msb exec``, and tears down with
 ``msb stop`` + ``msb remove`` in :meth:`cleanup`.
 Host requirements: Linux with ``/dev/kvm`` present and readable (or macOS on
 Apple Silicon).  The ``msb`` binary must be on PATH or at :envvar:`MSB_PATH`.
 """
 import logging
 import os
 import re
 import shutil
 import subprocess
 import sys
 import uuid
 from typing import Optional
 from tools.environments.base import BaseEnvironment, _popen_bash
 from tools.environments.local import _HERMES_PROVIDER_ENV_BLOCKLIST
 logger = logging.getLogger(__name__)
 _msb_executable: Optional[str] = None  # resolved once, cached
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 def find_msb() -> Optional[str]:
    """Locate the ``msb`` CLI binary.
    Honors :envvar:`MSB_PATH` if set, then falls back to :func:`shutil.which`.
    Returns the absolute path, or ``None`` if msb cannot be found.
    """
    global _msb_executable
    if _msb_executable is not None:
        return _msb_executable
    env_path = os.environ.get("MSB_PATH")
    if env_path and os.path.isfile(env_path) and os.access(env_path, os.X_OK):
        _msb_executable = env_path
        return env_path
    found = shutil.which("msb")
    if found:
        _msb_executable = found
        return found
    return None
 def _normalize_forward_env_names(forward_env: list[str] | None) -> list[str]:
    """Return a deduplicated list of valid environment variable names."""
    normalized: list[str] = []
    seen: set[str] = set()
    for item in forward_env or []:
        if not isinstance(item, str):
            logger.warning("Ignoring non-string microsandbox_forward_env entry: %r", item)
            continue
        key = item.strip()
        if not key:
            continue
        if not _ENV_VAR_NAME_RE.match(key):
            logger.warning("Ignoring invalid microsandbox_forward_env entry: %r", item)
            continue
        if key in seen:
            continue
        seen.add(key)
        normalized.append(key)
    return normalized
 def _normalize_env_dict(env: dict | None) -> dict[str, str]:
    """Validate and normalize a microsandbox_env dict to ``{str: str}``."""
    if not env:
        return {}
    if not isinstance(env, dict):
        logger.warning("microsandbox_env is not a dict: %r", env)
        return {}
    normalized: dict[str, str] = {}
    for key, value in env.items():
        if not isinstance(key, str) or not _ENV_VAR_NAME_RE.match(key.strip()):
            logger.warning("Ignoring invalid microsandbox_env key: %r", key)
            continue
        key = key.strip()
        if not isinstance(value, str):
            if isinstance(value, (int, float, bool)):
                value = str(value)
            else:
                logger.warning(
                    "Ignoring non-string microsandbox_env value for %r: %r", key, value
                )
                continue
        normalized[key] = value
    return normalized
 def _load_hermes_env_vars() -> dict[str, str]:
    """Load ~/.hermes/.env values without failing command execution."""
    try:
        from hermes_cli.config import load_env
        return load_env() or {}
    except Exception:
        return {}
 def _ensure_msb_available() -> str:
    """Resolve and health-check the msb CLI before use.
    Returns the absolute path, or raises :class:`RuntimeError` with a pointer
    at microsandbox install docs.
    """
    msb_exe = find_msb()
    if not msb_exe:
        raise RuntimeError(
            "msb executable not found on PATH or at MSB_PATH. "
            "Install microsandbox: curl -fsSL https://install.microsandbox.dev | sh"
        )
    try:
        result = subprocess.run(
            [msb_exe, "--version"],
            capture_output=True,
            text=True,
            timeout=10,
        )
    except FileNotFoundError:
        raise RuntimeError(f"msb executable at {msb_exe} could not be executed.")
    except subprocess.TimeoutExpired:
        raise RuntimeError(
            f"'{msb_exe} --version' timed out. Your microsandbox install may be broken."
        )
    if result.returncode != 0:
        raise RuntimeError(
            f"'{msb_exe} --version' failed (exit {result.returncode}, "
            f"stderr={result.stderr.strip()})."
        )
    # Preflight /dev/kvm on Linux so failures surface with a clear message
    # rather than a deep-inside-libkrun error on the first exec.
    if sys.platform.startswith("linux") and not os.access("/dev/kvm", os.R_OK | os.W_OK):
        raise RuntimeError(
            "/dev/kvm is not readable/writable by this process. "
            "Microsandbox requires KVM access. Ensure the user is in the 'kvm' "
            "group (or the container has --device /dev/kvm and --privileged), "
            "and that the host has virtualization enabled in BIOS."
        )
    return msb_exe
 class MicrosandboxEnvironment(BaseEnvironment):
    """Microsandbox-backed execution: one libkrun microVM per environment.
    The VM has its own kernel and rootfs, so commands run here can't read the
    host's or agent's filesystem, environment secrets, or bind mounts.  Only
    variables explicitly whitelisted via ``microsandbox_forward_env`` or
    ``microsandbox_env`` reach the guest.
    Persistence: set ``persistent_filesystem=True`` to keep the sandbox around
    across :meth:`cleanup` (useful for multi-turn tasks).  Default is
    ephemeral — the VM is stopped and removed at cleanup.
    """
    def __init__(
        self,
        image: str,
        cwd: str = "/root",
        timeout: int = 60,
        cpu: float = 0,
        memory: int = 0,
        disk: int = 0,  # accepted for API parity; msb has no per-sandbox disk cap flag
        persistent_filesystem: bool = False,
        task_id: str = "default",
        volumes: list = None,
        forward_env: list[str] | None = None,
        env: dict | None = None,
        network: bool = True,
        max_duration: str | None = None,
        idle_timeout: str | None = None,
    ):
        if cwd == "~":
            cwd = "/root"
        super().__init__(cwd=cwd, timeout=timeout)
        self._persistent = persistent_filesystem
        self._task_id = task_id
        self._forward_env = _normalize_forward_env_names(forward_env)
        self._env = _normalize_env_dict(env)
        self._sandbox_name: Optional[str] = None
        # Fail fast if msb (and on Linux, /dev/kvm) is unavailable.
        self._msb_exe = _ensure_msb_available()
        # Sanitize volumes — same shape as docker_volumes ("HOST:GUEST" strings).
        if volumes is not None and not isinstance(volumes, list):
            logger.warning("microsandbox_volumes config is not a list: %r", volumes)
            volumes = []
        create_args: list[str] = []
        if cpu and cpu > 0:
            # msb accepts an integer count of vCPUs; round up.
            create_args.extend(["--cpus", str(max(1, int(cpu)))])
        if memory and memory > 0:
            create_args.extend(["--memory", f"{memory}M"])
        if not network:
            # Requires msb compiled with the "net" feature; harmless otherwise.
            create_args.append("--no-network")
        for vol in (volumes or []):
            if not isinstance(vol, str):
                logger.warning("Microsandbox volume entry is not a string: %r", vol)
                continue
            vol = vol.strip()
            if not vol or ":" not in vol:
                logger.warning("Microsandbox volume %r missing colon, skipping", vol)
                continue
            create_args.extend(["--volume", vol])
        if max_duration:
            create_args.extend(["--max-duration", str(max_duration)])
        if idle_timeout:
            create_args.extend(["--idle-timeout", str(idle_timeout)])
        # Unique sandbox name so multiple instances (including parallel tasks)
        # don't collide.  ``--replace`` is additional belt-and-braces.
        self._sandbox_name = f"hermes-{task_id}-{uuid.uuid4().hex[:8]}"
        run_cmd = [
            self._msb_exe, "create",
            "--name", self._sandbox_name,
            "--replace",
            "--quiet",
            *create_args,
            image,
        ]
        logger.debug("Starting microsandbox: %s", " ".join(run_cmd))
        result = subprocess.run(
            run_cmd,
            capture_output=True,
            text=True,
            timeout=300,  # image pull + VM boot can take a while
        )
        if result.returncode != 0:
            raise RuntimeError(
                f"msb create failed (exit {result.returncode}): "
                f"{result.stderr.strip() or result.stdout.strip()}"
            )
        logger.info("Created microsandbox %s from %s", self._sandbox_name, image)
        self._init_env_args = self._build_init_env_args()
        # Initialize session snapshot inside the VM.
        self.init_session()
    def _build_init_env_args(self) -> list[str]:
        """Build ``--env KEY=VALUE`` args for injecting host env vars at init_session.
        Only explicit docker-style forwards (``microsandbox_forward_env``) bypass
        the Hermes provider blocklist.  Skill-declared passthroughs still go
        through the blocklist filter (same policy as the Docker backend).
        """
        exec_env: dict[str, str] = dict(self._env)
        explicit_forward_keys = set(self._forward_env)
        passthrough_keys: set[str] = set()
        try:
            from tools.env_passthrough import get_all_passthrough
            passthrough_keys = set(get_all_passthrough())
        except Exception:
            pass
        forward_keys = explicit_forward_keys | (
            passthrough_keys - _HERMES_PROVIDER_ENV_BLOCKLIST
        )
        hermes_env = _load_hermes_env_vars() if forward_keys else {}
        for key in sorted(forward_keys):
            value = os.getenv(key)
            if value is None:
                value = hermes_env.get(key)
            if value is not None:
                exec_env[key] = value
        args = []
        for key in sorted(exec_env):
            args.extend(["--env", f"{key}={exec_env[key]}"])
        return args
    def _run_bash(
        self,
        cmd_string: str,
        *,
        login: bool = False,
        timeout: int = 120,
        stdin_data: str | None = None,
    ) -> subprocess.Popen:
        """Spawn a bash process inside the microsandbox VM via ``msb exec``."""
        assert self._sandbox_name, "Sandbox not created"
        cmd = [self._msb_exe, "exec", "--quiet"]
        # Only inject env on the bootstrap call; afterwards, the snapshot file
        # (captured during init_session) carries state across invocations.
        if login:
            cmd.extend(self._init_env_args)
        cmd.append(self._sandbox_name)
        cmd.append("--")
        if login:
            cmd.extend(["bash", "-l", "-c", cmd_string])
        else:
            cmd.extend(["bash", "-c", cmd_string])
        return _popen_bash(cmd, stdin_data)
    def cleanup(self):
        """Stop and remove the microsandbox VM (unless ``persistent_filesystem``)."""
        if not self._sandbox_name:
            return
        name = self._sandbox_name
        if self._persistent:
            logger.info("Leaving microsandbox %s running (persistent=True)", name)
            self._sandbox_name = None
            return
        # Best-effort stop + remove; don't block caller on teardown.
        try:
            subprocess.Popen(
                [self._msb_exe, "stop", name, "--quiet"],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
            )
        except Exception as e:
            logger.warning("Failed to stop microsandbox %s: %s", name, e)
        try:
            subprocess.Popen(
                f"sleep 3 && {self._msb_exe} remove {name} --quiet "
                f">/dev/null 2>&1 &",
                shell=True,
            )
        except Exception:
            pass
        self._sandbox_name = None
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@ -873,6 +873,7 @@ def _get_env_config() -> Dict[str, Any]:
        "singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", f"docker://{default_image}"),
        "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image),
        "daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image),
        "microsandbox_image": os.getenv("TERMINAL_MICROSANDBOX_IMAGE", default_image),
        "cwd": cwd,
        "host_cwd": host_cwd,
        "docker_mount_cwd_to_workspace": mount_docker_cwd,
@ -918,8 +919,8 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
    Create an execution environment for sandboxed command execution.
    Args:
-        env_type: One of "local", "docker", "singularity", "modal", "daytona", "ssh"
+        env_type: One of "local", "docker", "singularity", "modal", "daytona", "microsandbox", "ssh"
-        image: Docker/Singularity/Modal image name (ignored for local/ssh)
+        image: Docker/OCI/Singularity/Modal image name (ignored for local/ssh)
        cwd: Working directory
        timeout: Default command timeout
        ssh_config: SSH connection config (for env_type="ssh")
@ -938,10 +939,13 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
    volumes = cc.get("docker_volumes", [])
    docker_forward_env = cc.get("docker_forward_env", [])
    docker_env = cc.get("docker_env", {})
    microsandbox_volumes = cc.get("microsandbox_volumes", [])
    microsandbox_forward_env = cc.get("microsandbox_forward_env", [])
    microsandbox_env = cc.get("microsandbox_env", {})
    if env_type == "local":
        return _LocalEnvironment(cwd=cwd, timeout=timeout)
-    
+
    elif env_type == "docker":
        return _DockerEnvironment(
            image=image, cwd=cwd, timeout=timeout,
@ -1022,6 +1026,18 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
            persistent_filesystem=persistent, task_id=task_id,
        )
    elif env_type == "microsandbox":
        # Lazy import so msb is only required when backend is selected.
        from tools.environments.microsandbox import MicrosandboxEnvironment as _MicrosandboxEnvironment
        return _MicrosandboxEnvironment(
            image=image, cwd=cwd, timeout=timeout,
            cpu=cpu, memory=memory, disk=disk,
            persistent_filesystem=persistent, task_id=task_id,
            volumes=microsandbox_volumes,
            forward_env=microsandbox_forward_env,
            env=microsandbox_env,
        )
    elif env_type == "ssh":
        if not ssh_config or not ssh_config.get("host") or not ssh_config.get("user"):
            raise ValueError("SSH environment requires ssh_host and ssh_user to be configured")
@ -1462,6 +1478,8 @@ def terminal_tool(
            image = overrides.get("modal_image") or config["modal_image"]
        elif env_type == "daytona":
            image = overrides.get("daytona_image") or config["daytona_image"]
        elif env_type == "microsandbox":
            image = overrides.get("microsandbox_image") or config["microsandbox_image"]
        else:
            image = ""