mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
fix(docker): gate resource limit flags on cgroup controller availability (#54516)
On hosts where the cgroup v2 cpu/memory/pids controllers are not delegated
to the docker/podman process (unprivileged Proxmox LXCs, some rootless and
nested setups), --pids-limit/--cpus/--memory cause every container start to
fail with OCI runtime error / exit 126, breaking terminal + execute_code.
- Add _cgroup_limits_available(image): one-shot, host-wide cached probe that
spawns a throwaway container from the sandbox image itself (sleep 0) with
all three flags together, mirroring the existing _storage_opt_supported
probe-and-degrade pattern.
- Remove --pids-limit from static _BASE_SECURITY_ARGS; apply it (default 256
via _DEFAULT_PIDS_LIMIT) in resource_args gated on the probe.
- Gate --cpus and --memory on the same probe.
Behavior unchanged on cgroup-capable hosts; graceful degradation with a
one-time warning where controllers aren't delegated.
Fixes #6568.
(cherry picked from commit c933880b7e)
Co-authored-by: angelos <angelos@oikos.lan.home.malaiwah.com>
This commit is contained in:
parent
10043c6d0c
commit
7cfa2fa13f
3 changed files with 169 additions and 5 deletions
91
tests/tools/test_docker_cgroup_limits.py
Normal file
91
tests/tools/test_docker_cgroup_limits.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
"""Tests for cgroup resource-limit gating in the docker backend.
|
||||
|
||||
On hosts where the cgroup v2 cpu/memory/pids controllers are not delegated
|
||||
(e.g. unprivileged Proxmox LXCs), passing ``--cpus``/``--memory``/``--pids-limit``
|
||||
to ``docker run`` fails every container start with OCI runtime error / exit 126.
|
||||
``_cgroup_limits_available`` probes once and the resource flags are gated on it,
|
||||
so the sandbox degrades gracefully instead of failing.
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
import pytest
|
||||
|
||||
import tools.environments.docker as docker_env
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_cgroup_cache():
|
||||
"""The probe result is cached in a module-level global; reset per test."""
|
||||
docker_env._cgroup_limits_ok = None
|
||||
yield
|
||||
docker_env._cgroup_limits_ok = None
|
||||
|
||||
|
||||
def test_pids_limit_not_in_base_security_args():
|
||||
"""``--pids-limit`` must NOT be hardcoded in the static security args.
|
||||
|
||||
It requires the pids cgroup controller and is gated on the probe instead.
|
||||
"""
|
||||
assert "--pids-limit" not in docker_env._BASE_SECURITY_ARGS
|
||||
|
||||
|
||||
def test_probe_returns_true_when_container_starts(monkeypatch):
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
captured = {}
|
||||
|
||||
def _run(cmd, *a, **k):
|
||||
captured["cmd"] = cmd
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
assert docker_env._cgroup_limits_available("hermes-agent:latest") is True
|
||||
# Probes all three controllers together against the real sandbox image.
|
||||
assert "--cpus" in captured["cmd"]
|
||||
assert "--memory" in captured["cmd"]
|
||||
assert "--pids-limit" in captured["cmd"]
|
||||
assert "hermes-agent:latest" in captured["cmd"]
|
||||
|
||||
|
||||
def test_probe_returns_false_and_warns_on_oci_error(monkeypatch, caplog):
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
|
||||
def _run(cmd, *a, **k):
|
||||
return subprocess.CompletedProcess(
|
||||
cmd, 126, stdout="",
|
||||
stderr="crun: controller `pids` is not available",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
with caplog.at_level("WARNING"):
|
||||
assert docker_env._cgroup_limits_available("img") is False
|
||||
assert "Cgroup resource limits" in caplog.text
|
||||
|
||||
|
||||
def test_probe_returns_false_when_no_docker(monkeypatch):
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: None)
|
||||
assert docker_env._cgroup_limits_available("img") is False
|
||||
|
||||
|
||||
def test_probe_returns_false_on_empty_image(monkeypatch):
|
||||
"""An empty image string must not be probed (would be a malformed run)."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(
|
||||
docker_env.subprocess, "run",
|
||||
lambda *a, **k: pytest.fail("should not probe with empty image"),
|
||||
)
|
||||
assert docker_env._cgroup_limits_available("") is False
|
||||
|
||||
|
||||
def test_probe_result_is_cached(monkeypatch):
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
calls = []
|
||||
|
||||
def _run(cmd, *a, **k):
|
||||
calls.append(cmd)
|
||||
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
|
||||
|
||||
monkeypatch.setattr(docker_env.subprocess, "run", _run)
|
||||
docker_env._cgroup_limits_available("img")
|
||||
docker_env._cgroup_limits_available("img")
|
||||
docker_env._cgroup_limits_available("img")
|
||||
assert len(calls) == 1 # probe runs once, then cached
|
||||
|
|
@ -11,7 +11,13 @@ def _mock_subprocess_run(monkeypatch):
|
|||
"""Mock subprocess.run to intercept docker run -d and docker version calls.
|
||||
|
||||
Returns a list of captured (cmd, kwargs) tuples for inspection.
|
||||
|
||||
Pre-seeds the cgroup-limit probe cache to ``True`` so the throwaway probe
|
||||
container (a ``docker run ... sleep 0``) does not run and pollute the
|
||||
captured call list — these tests inspect the real sandbox-start ``run``.
|
||||
Tests that exercise the probe itself live in test_docker_cgroup_limits.py.
|
||||
"""
|
||||
docker_env._cgroup_limits_ok = True
|
||||
calls = []
|
||||
|
||||
def _run(cmd, **kwargs):
|
||||
|
|
|
|||
|
|
@ -322,19 +322,28 @@ def find_docker() -> Optional[str]:
|
|||
# preserved. Omitted entirely when the container starts as a
|
||||
# non-root user via --user, since no privilege drop is needed
|
||||
# in that mode.
|
||||
# Block privilege escalation and limit PIDs.
|
||||
# Block privilege escalation.
|
||||
# /tmp is size-limited and nosuid but allows exec (needed by pip/npm builds).
|
||||
#
|
||||
# Note: ``--pids-limit`` is *not* in this list — it lives in ``resource_args``
|
||||
# and is gated on ``_cgroup_limits_available(image)`` because it requires the
|
||||
# ``pids`` cgroup controller to be delegated, which is not the case on hosts
|
||||
# such as unprivileged LXCs. ``--cpus``/``--memory`` are gated for the same
|
||||
# reason.
|
||||
_BASE_SECURITY_ARGS = [
|
||||
"--cap-drop", "ALL",
|
||||
"--cap-add", "DAC_OVERRIDE",
|
||||
"--cap-add", "CHOWN",
|
||||
"--cap-add", "FOWNER",
|
||||
"--security-opt", "no-new-privileges",
|
||||
"--pids-limit", "256",
|
||||
"--tmpfs", "/tmp:rw,nosuid,size=512m",
|
||||
"--tmpfs", "/var/tmp:rw,noexec,nosuid,size=256m",
|
||||
]
|
||||
|
||||
# Default per-container PID limit. Applied as ``--pids-limit`` only when the
|
||||
# cgroup ``pids`` controller is available (see ``_cgroup_limits_available``).
|
||||
_DEFAULT_PIDS_LIMIT = "256"
|
||||
|
||||
# /run is split out from _BASE_SECURITY_ARGS because s6-overlay images need it
|
||||
# mounted ``exec``: s6 stage0 later runs ``exec /run/s6/basedir/bin/init``, which
|
||||
# fails with "Permission denied" (exit 126) on a ``noexec`` mount. For all other
|
||||
|
|
@ -431,6 +440,59 @@ def _resolve_host_user_spec() -> Optional[str]:
|
|||
|
||||
|
||||
_storage_opt_ok: Optional[bool] = None # cached result across instances
|
||||
_cgroup_limits_ok: Optional[bool] = None # cached result across instances
|
||||
|
||||
|
||||
def _cgroup_limits_available(image: str) -> bool:
|
||||
"""Probe whether cgroup resource limits work in this environment.
|
||||
|
||||
Tests ``--cpus``, ``--memory`` and ``--pids-limit`` together by spawning
|
||||
a throwaway container from *image* (the same sandbox image we are about
|
||||
to use for real, so no extra pull and no dependency on a public
|
||||
registry). The container runs ``sleep 0`` — sleep is guaranteed to be
|
||||
present because the sandbox itself uses ``sleep 2h`` as its long-lived
|
||||
entrypoint.
|
||||
|
||||
On hosts where the corresponding cgroup controllers are not delegated
|
||||
to this process (typical inside unprivileged LXCs and some rootless
|
||||
setups) these flags cause every container start to fail with ``OCI
|
||||
runtime error`` / exit 126. The probe runs once per process and the
|
||||
result — which is host-wide, not image-specific — is cached.
|
||||
"""
|
||||
global _cgroup_limits_ok
|
||||
if _cgroup_limits_ok is not None:
|
||||
return _cgroup_limits_ok
|
||||
|
||||
docker_exe = find_docker()
|
||||
if not docker_exe or not image:
|
||||
_cgroup_limits_ok = False
|
||||
return False
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[docker_exe, "run", "--rm",
|
||||
"--cpus", "0.5", "--memory", "64m", "--pids-limit", "32",
|
||||
image, "sleep", "0"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
stdin=subprocess.DEVNULL,
|
||||
)
|
||||
_cgroup_limits_ok = result.returncode == 0
|
||||
if not _cgroup_limits_ok:
|
||||
logger.warning(
|
||||
"Cgroup resource limits (--cpus/--memory/--pids-limit) not "
|
||||
"available in this environment. Containers will run without "
|
||||
"CPU, memory or PID limits. To enable, delegate the cpu, "
|
||||
"memory and pids cgroup controllers to this container. "
|
||||
"Probe stderr: %s",
|
||||
(result.stderr or "").strip()[:500],
|
||||
)
|
||||
except Exception as e:
|
||||
_cgroup_limits_ok = False
|
||||
logger.warning("Cgroup limit probe failed; disabling resource limits: %s", e)
|
||||
|
||||
return _cgroup_limits_ok
|
||||
|
||||
|
||||
def _ensure_docker_available() -> None:
|
||||
|
|
@ -555,12 +617,17 @@ class DockerEnvironment(BaseEnvironment):
|
|||
# Fail fast if Docker is not available.
|
||||
_ensure_docker_available()
|
||||
|
||||
# Build resource limit args
|
||||
# Build resource limit args (gated by cgroup availability probe so
|
||||
# they degrade gracefully on hosts without controller delegation,
|
||||
# e.g. unprivileged LXCs). The probe runs once per process and is
|
||||
# cached host-wide.
|
||||
resource_args = []
|
||||
if cpu > 0:
|
||||
if cpu > 0 and _cgroup_limits_available(image):
|
||||
resource_args.extend(["--cpus", str(cpu)])
|
||||
if memory > 0:
|
||||
if memory > 0 and _cgroup_limits_available(image):
|
||||
resource_args.extend(["--memory", f"{memory}m"])
|
||||
if _cgroup_limits_available(image):
|
||||
resource_args.extend(["--pids-limit", _DEFAULT_PIDS_LIMIT])
|
||||
if disk > 0 and sys.platform != "darwin":
|
||||
if self._storage_opt_supported():
|
||||
resource_args.extend(["--storage-opt", f"size={disk}m"])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue