mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
313 lines
10 KiB
Python
313 lines
10 KiB
Python
"""Shared fixtures for docker-image integration tests.
|
|
|
|
Tests in this directory build the image with the current ``Dockerfile``
|
|
and exercise it via ``docker run``. They skip when Docker is unavailable
|
|
(e.g. on developer laptops without a daemon).
|
|
|
|
Override the image with ``HERMES_TEST_IMAGE`` env var to point at a pre-built
|
|
image (faster local iteration); otherwise the ``built_image`` fixture builds
|
|
the repo's Dockerfile once per session.
|
|
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import time
|
|
from collections.abc import Iterator
|
|
|
|
import pytest
|
|
|
|
IMAGE_TAG = os.environ.get("HERMES_TEST_IMAGE", "hermes-agent-harness:latest")
|
|
|
|
|
|
def _docker_available() -> bool:
|
|
"""Return True iff a docker CLI is on PATH and the daemon answers."""
|
|
if shutil.which("docker") is None:
|
|
return False
|
|
try:
|
|
r = subprocess.run(
|
|
["docker", "info"], capture_output=True, timeout=5,
|
|
)
|
|
return r.returncode == 0
|
|
except (subprocess.TimeoutExpired, OSError):
|
|
return False
|
|
|
|
|
|
def pytest_collection_modifyitems(config, items): # noqa: D401 - pytest hook
|
|
"""Apply docker-suite policy: timeout bump + skip on missing docker."""
|
|
docker_ok = _docker_available()
|
|
skip_docker = pytest.mark.skip(
|
|
reason="Docker not available or daemon not running",
|
|
)
|
|
for item in items:
|
|
if "tests/docker/" not in str(item.fspath).replace(os.sep, "/"):
|
|
continue
|
|
if not docker_ok:
|
|
item.add_marker(skip_docker)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def built_image() -> str:
|
|
"""Build the image once per test session.
|
|
|
|
Override with ``HERMES_TEST_IMAGE`` env var to point at a pre-built
|
|
image (faster local iteration).
|
|
"""
|
|
if os.environ.get("HERMES_TEST_IMAGE"):
|
|
return IMAGE_TAG
|
|
repo_root = os.path.abspath(
|
|
os.path.join(os.path.dirname(__file__), "..", ".."),
|
|
)
|
|
result = subprocess.run(
|
|
["docker", "build", "-t", IMAGE_TAG, repo_root],
|
|
capture_output=True, text=True, timeout=1200,
|
|
)
|
|
assert result.returncode == 0, (
|
|
f"docker build failed:\n{result.stderr[-2000:]}"
|
|
)
|
|
return IMAGE_TAG
|
|
|
|
|
|
@pytest.fixture
|
|
def container_name(request) -> Iterator[str]:
|
|
"""Generate a unique container name and ensure cleanup on test exit."""
|
|
safe = request.node.name.replace("[", "_").replace("]", "_")
|
|
name = f"hermes-test-{safe}"
|
|
yield name
|
|
subprocess.run(
|
|
["docker", "rm", "-f", name],
|
|
capture_output=True, timeout=10,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# docker_exec — default to the unprivileged hermes user
|
|
# ---------------------------------------------------------------------------
|
|
#
|
|
# Background: every Hermes runtime path inside the container drops to UID
|
|
# 10000 (the ``hermes`` user) via ``s6-setuidgid hermes``. ``docker exec``
|
|
# without ``-u`` runs as root, which is **not** representative of how
|
|
# production code executes. PR #30136 review caught a real regression
|
|
# this way — ``Path('/proc/1/exe').resolve()`` works as root and silently
|
|
# fails (PermissionError swallowed) for hermes, so a test that ran as root
|
|
# couldn't catch a feature that was inert for the actual runtime user.
|
|
#
|
|
# Tests in this directory MUST exercise the realistic user context. The
|
|
# helpers below run every probe under ``-u hermes`` unless a specific
|
|
# test explicitly opts into ``user="root"`` (rare — e.g. inspecting
|
|
# /proc/1/exe itself, chowning a volume).
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def docker_exec(
|
|
container: str,
|
|
*args: str,
|
|
user: str = "hermes",
|
|
timeout: int = 30,
|
|
extra_docker_args: tuple[str, ...] = (),
|
|
) -> subprocess.CompletedProcess[str]:
|
|
"""Run a command inside ``container`` as ``user`` (default: hermes).
|
|
|
|
Returns the CompletedProcess with text=True, capture_output=True.
|
|
|
|
Pass ``user="root"`` only when the test specifically needs root
|
|
capabilities (e.g. reading /proc/1/exe, manipulating ownership).
|
|
Most tests should use the default.
|
|
"""
|
|
cmd = ["docker", "exec", "-u", user, *extra_docker_args, container, *args]
|
|
return subprocess.run(
|
|
cmd, capture_output=True, text=True, timeout=timeout,
|
|
)
|
|
|
|
|
|
def docker_exec_sh(
|
|
container: str,
|
|
command: str,
|
|
*,
|
|
user: str = "hermes",
|
|
timeout: int = 30,
|
|
) -> subprocess.CompletedProcess[str]:
|
|
"""Run ``sh -c <command>`` inside the container as ``user``."""
|
|
return docker_exec(
|
|
container, "sh", "-c", command, user=user, timeout=timeout,
|
|
)
|
|
|
|
|
|
def wait_for_container_ready(
|
|
container: str,
|
|
*,
|
|
deadline_s: float = 30.0,
|
|
interval_s: float = 0.25,
|
|
) -> None:
|
|
"""Poll until the container has finished s6 cont-init (stage2 + reconcile).
|
|
|
|
The readiness signal is ``profile=default`` appearing in
|
|
``/opt/data/logs/container-boot.log``, which the 02-reconcile-profiles
|
|
cont-init script writes on every boot. That log entry fires AFTER
|
|
stage2-hook.sh completes, so by the time it appears the full
|
|
cont-init chain (UID remap, chown, config seeding, skills sync,
|
|
browser discovery, config migration) has run.
|
|
|
|
Raises ``TimeoutError`` if the container never becomes ready — much
|
|
better than a fixed ``time.sleep()`` that either wastes time on fast
|
|
machines or flakes on slow ones.
|
|
"""
|
|
end = time.monotonic() + deadline_s
|
|
while time.monotonic() < end:
|
|
r = docker_exec(
|
|
container,
|
|
"sh", "-c",
|
|
"cat /opt/data/logs/container-boot.log 2>/dev/null",
|
|
timeout=5,
|
|
)
|
|
if r.returncode == 0 and "profile=default" in r.stdout:
|
|
return
|
|
time.sleep(interval_s)
|
|
raise TimeoutError(
|
|
f"container {container} did not finish cont-init within {deadline_s}s"
|
|
)
|
|
|
|
|
|
def start_container(
|
|
image: str,
|
|
name: str,
|
|
*env: str,
|
|
cmd: str = "sleep infinity",
|
|
timeout: int = 60,
|
|
) -> str:
|
|
"""Start a detached container and wait for cont-init to finish.
|
|
|
|
Args:
|
|
image: Docker image to run.
|
|
name: Container name (cleanup is the caller's responsibility —
|
|
typically handled by the ``container_name`` fixture).
|
|
env: Env vars as ``KEY=VALUE`` strings, each passed via ``-e``.
|
|
cmd: Container CMD (default ``sleep infinity``).
|
|
timeout: ``docker run`` subprocess timeout.
|
|
|
|
Returns the container name. Raises on ``docker run`` failure or if
|
|
the container never finishes cont-init within 30s.
|
|
"""
|
|
args = ["docker", "run", "-d", "--name", name]
|
|
for e in env:
|
|
args.extend(["-e", e])
|
|
args.extend([image, *cmd.split()])
|
|
subprocess.run(args, check=True, capture_output=True, timeout=timeout)
|
|
wait_for_container_ready(name)
|
|
return name
|
|
|
|
|
|
def restart_container(container: str, timeout: int = 60) -> None:
|
|
"""Restart a container and wait for cont-init to finish.
|
|
|
|
Equivalent to ``docker restart <container>`` followed by
|
|
:func:`wait_for_container_ready`.
|
|
|
|
The readiness signal (``profile=default`` in
|
|
``/opt/data/logs/container-boot.log``) is append-only and persists
|
|
across restarts, so we truncate it BEFORE restarting — otherwise
|
|
``wait_for_container_ready`` would match the stale line from the
|
|
previous boot and return before cont-init runs on the new boot.
|
|
"""
|
|
docker_exec(container, "sh", "-c",
|
|
"truncate -s 0 /opt/data/logs/container-boot.log 2>/dev/null || true",
|
|
user="root", timeout=5)
|
|
subprocess.run(
|
|
["docker", "restart", container],
|
|
check=True, capture_output=True, timeout=timeout,
|
|
)
|
|
wait_for_container_ready(container)
|
|
|
|
|
|
def poll_container(
|
|
container: str,
|
|
probe: str,
|
|
*,
|
|
deadline_s: float = 30.0,
|
|
interval_s: float = 0.5,
|
|
user: str = "hermes",
|
|
) -> tuple[bool, str]:
|
|
"""Repeatedly run ``probe`` inside the container until it exits 0 or
|
|
``deadline_s`` elapses.
|
|
|
|
Returns ``(success, last_stdout)``. Useful for waiting on a process
|
|
to appear, a port to open, a file to contain a string, etc.
|
|
"""
|
|
end = time.monotonic() + deadline_s
|
|
last = ""
|
|
while time.monotonic() < end:
|
|
r = docker_exec_sh(container, probe, user=user, timeout=10)
|
|
last = r.stdout
|
|
if r.returncode == 0:
|
|
return True, last
|
|
time.sleep(interval_s)
|
|
return False, last
|
|
|
|
|
|
def wait_for_path(
|
|
container: str,
|
|
path: str,
|
|
*,
|
|
kind: str = "f",
|
|
deadline_s: float = 30.0,
|
|
interval_s: float = 0.25,
|
|
) -> bool:
|
|
"""Poll ``test -<kind> <path>`` inside the container until success or timeout.
|
|
|
|
``kind`` is the ``test`` flag: ``'f'`` for file, ``'d'`` for directory,
|
|
``'e'`` for existence. Returns ``True`` on success, ``False`` on timeout.
|
|
"""
|
|
return poll_container(
|
|
container, f"test -{kind} {path}",
|
|
deadline_s=deadline_s, interval_s=interval_s,
|
|
)[0]
|
|
|
|
|
|
def wait_for_log(
|
|
container: str,
|
|
log_path: str,
|
|
needle: str,
|
|
*,
|
|
deadline_s: float = 30.0,
|
|
interval_s: float = 0.25,
|
|
) -> str:
|
|
"""Poll until a log file inside the container contains ``needle``.
|
|
|
|
Returns the full log on success.
|
|
"""
|
|
end = time.monotonic() + deadline_s
|
|
last = ""
|
|
while time.monotonic() < end:
|
|
r = docker_exec_sh(
|
|
container, f"cat {log_path} 2>/dev/null", timeout=5,
|
|
)
|
|
if r.returncode == 0:
|
|
last = r.stdout
|
|
if needle in last:
|
|
return last
|
|
time.sleep(interval_s)
|
|
raise AssertionError(f"Didn't see `{needle}` in {log_path} within {deadline_s} in container {container}")
|
|
|
|
|
|
|
|
def wait_for_docker_logs(
|
|
container: str, needle: str, *, deadline_s: float = 30.0, interval_s: float = 0.5,
|
|
) -> str:
|
|
"""Poll ``docker logs`` until ``needle`` appears or deadline expires.
|
|
|
|
Returns the full docker logs on success.
|
|
"""
|
|
end = time.monotonic() + deadline_s
|
|
last = ""
|
|
while time.monotonic() < end:
|
|
r = subprocess.run(
|
|
["docker", "logs", container],
|
|
capture_output=True, text=True, timeout=10,
|
|
)
|
|
last = r.stdout + r.stderr
|
|
if needle in last:
|
|
return last
|
|
time.sleep(interval_s)
|
|
raise AssertionError(f"Didn't see `{needle}` in docker logs within {deadline_s} in container {container}")
|