feat(nix): container-aware CLI — auto-route into managed container (#7543)

* feat(nix): container-aware CLI — auto-route all subcommands into managed container

When container.enable = true, the host `hermes` CLI transparently execs
every subcommand into the managed Docker/Podman container. A symlink
bridge (~/.hermes -> /var/lib/hermes/.hermes) unifies state between host
and container so sessions, config, and memories are shared.

CLI changes:
- Global routing before subcommand dispatch (all commands forwarded)
- docker exec with -u exec_user, env passthrough (TERM, COLORTERM,
  LANG, LC_ALL), TTY-aware flags
- Retry with spinner on failure (TTY: 5s, non-TTY: 10s silent)
- Hard fail instead of silent fallback
- HERMES_DEV=1 env var bypasses routing for development
- No routing messages (invisible to user)

NixOS module changes:
- container.hostUsers option: lists users who get ~/.hermes symlink
  and automatic hermes group membership
- Activation script creates symlink bridge (with backup of existing
  ~/.hermes dirs), writes exec_user to .container-mode
- Cleanup on disable: removes symlinks + .container-mode + stops service
- Warning when hostUsers set without addToSystemPackages

* fix: address review — reuse sudo var, add chown -h on symlink update

- hermes_cli/main.py: reuse the existing `sudo` variable instead of
  redundant `shutil.which("sudo")` call that could return None
- nix/nixosModules.nix: add missing `chown -h` when updating an
  existing symlink target so ownership stays consistent with the
  fresh-create and backup-replace branches

* fix: address remaining review items from cursor bugbot

- hermes_cli/main.py: move container routing BEFORE parse_args() so
  --help, unrecognised flags, and all subcommands are forwarded
  transparently into the container instead of being intercepted by
  argparse on the host (high severity)

- nix/nixosModules.nix: resolve home dirs via
  config.users.users.${user}.home instead of hardcoding /home/${user},
  supporting users with custom home directories (medium severity)

- nix/nixosModules.nix: gate hostUsers group membership on
  container.enable so setting hostUsers without container mode doesn't
  silently add users to the hermes group (low severity)

* fix: simplify container routing — execvp, no retries, let it crash

- Replace subprocess.run retry loop with os.execvp (no idle parent process)
- Extract _probe_container helper for sudo detection with 15s timeout
- Narrow exception handling: FileNotFoundError only in get_container_exec_info,
  catch TimeoutExpired specifically, remove silent except Exception: pass
- Collapse needs_sudo + sudo into single sudo_path variable
- Simplify NixOS symlink creation from 4 branches to 2
- Gate NixOS sudoers hint with "On NixOS:" prefix
- Full test rewrite: 18 tests covering execvp, sudo probe, timeout, permissions

---------

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
Siddharth Balyan 2026-04-11 16:47:46 -07:00 committed by GitHub
parent 5c2ecdec49
commit cab814af15
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 983 additions and 1 deletions

View file

@ -143,6 +143,73 @@ def managed_error(action: str = "modify configuration"):
print(format_managed_message(action), file=sys.stderr)
# =============================================================================
# Container-aware CLI (NixOS container mode)
# =============================================================================
def _is_inside_container() -> bool:
"""Detect if we're already running inside a Docker/Podman container."""
# Standard Docker/Podman indicators
if os.path.exists("/.dockerenv"):
return True
# Podman uses /run/.containerenv
if os.path.exists("/run/.containerenv"):
return True
# Check cgroup for container runtime evidence (works for both Docker & Podman)
try:
with open("/proc/1/cgroup", "r") as f:
cgroup = f.read()
if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup:
return True
except OSError:
pass
return False
def get_container_exec_info() -> Optional[dict]:
"""Read container mode metadata from HERMES_HOME/.container-mode.
Returns a dict with keys: backend, container_name, exec_user, hermes_bin
or None if container mode is not active, we're already inside the
container, or HERMES_DEV=1 is set.
The .container-mode file is written by the NixOS activation script when
container.enable = true. It tells the host CLI to exec into the container
instead of running locally.
"""
if os.environ.get("HERMES_DEV") == "1":
return None
if _is_inside_container():
return None
container_mode_file = get_hermes_home() / ".container-mode"
try:
info = {}
with open(container_mode_file, "r") as f:
for line in f:
line = line.strip()
if "=" in line and not line.startswith("#"):
key, _, value = line.partition("=")
info[key.strip()] = value.strip()
except FileNotFoundError:
return None
# All other exceptions (PermissionError, malformed data, etc.) propagate
backend = info.get("backend", "docker")
container_name = info.get("container_name", "hermes-agent")
exec_user = info.get("exec_user", "hermes")
hermes_bin = info.get("hermes_bin", "/data/current-package/bin/hermes")
return {
"backend": backend,
"container_name": container_name,
"exec_user": exec_user,
"hermes_bin": hermes_bin,
}
# =============================================================================
# Config paths
# =============================================================================

View file

@ -528,6 +528,113 @@ def _resolve_last_cli_session() -> Optional[str]:
return None
def _probe_container(cmd: list, backend: str, via_sudo: bool = False):
"""Run a container inspect probe, returning the CompletedProcess.
Catches TimeoutExpired specifically for a human-readable message;
all other exceptions propagate naturally.
"""
try:
return subprocess.run(cmd, capture_output=True, text=True, timeout=15)
except subprocess.TimeoutExpired:
label = f"sudo {backend}" if via_sudo else backend
print(
f"Error: timed out waiting for {label} to respond.\n"
f"The {backend} daemon may be unresponsive or starting up.",
file=sys.stderr,
)
sys.exit(1)
def _exec_in_container(container_info: dict, cli_args: list):
"""Replace the current process with a command inside the managed container.
Probes whether sudo is needed (rootful containers), then os.execvp
into the container. On success the Python process is replaced entirely
and the container's exit code becomes the process exit code (OS semantics).
On failure, OSError propagates naturally.
Args:
container_info: dict with backend, container_name, exec_user, hermes_bin
cli_args: the original CLI arguments (everything after 'hermes')
"""
import shutil
backend = container_info["backend"]
container_name = container_info["container_name"]
exec_user = container_info["exec_user"]
hermes_bin = container_info["hermes_bin"]
runtime = shutil.which(backend)
if not runtime:
print(f"Error: {backend} not found on PATH. Cannot route to container.",
file=sys.stderr)
sys.exit(1)
# Rootful containers (NixOS systemd service) are invisible to unprivileged
# users — Podman uses per-user namespaces, Docker needs group access.
# Probe whether the runtime can see the container; if not, try via sudo.
sudo_path = None
probe = _probe_container(
[runtime, "inspect", "--format", "ok", container_name], backend,
)
if probe.returncode != 0:
sudo_path = shutil.which("sudo")
if sudo_path:
probe2 = _probe_container(
[sudo_path, "-n", runtime, "inspect", "--format", "ok", container_name],
backend, via_sudo=True,
)
if probe2.returncode != 0:
print(
f"Error: container '{container_name}' not found via {backend}.\n"
f"\n"
f"The container is likely running as root. Your user cannot see it\n"
f"because {backend} uses per-user namespaces. Grant passwordless\n"
f"sudo for {backend} — the -n (non-interactive) flag is required\n"
f"because a password prompt would hang or break piped commands.\n"
f"\n"
f"On NixOS:\n"
f"\n"
f' security.sudo.extraRules = [{{\n'
f' users = [ "{os.getenv("USER", "your-user")}" ];\n'
f' commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n'
f' }}];\n'
f"\n"
f"Or run: sudo hermes {' '.join(cli_args)}",
file=sys.stderr,
)
sys.exit(1)
else:
print(
f"Error: container '{container_name}' not found via {backend}.\n"
f"The container may be running under root. Try: sudo hermes {' '.join(cli_args)}",
file=sys.stderr,
)
sys.exit(1)
is_tty = sys.stdin.isatty()
tty_flags = ["-it"] if is_tty else ["-i"]
env_flags = []
for var in ("TERM", "COLORTERM", "LANG", "LC_ALL"):
val = os.environ.get(var)
if val:
env_flags.extend(["-e", f"{var}={val}"])
cmd_prefix = [sudo_path, "-n", runtime] if sudo_path else [runtime]
exec_cmd = (
cmd_prefix + ["exec"]
+ tty_flags
+ ["-u", exec_user]
+ env_flags
+ [container_name, hermes_bin]
+ cli_args
)
os.execvp(exec_cmd[0], exec_cmd)
def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
"""Resolve a session name (title) or ID to a session ID.
@ -5667,9 +5774,22 @@ Examples:
# Pre-process argv so unquoted multi-word session names after -c / -r
# are merged into a single token before argparse sees them.
# e.g. ``hermes -c Pokemon Agent Dev`` → ``hermes -c 'Pokemon Agent Dev'``
# ── Container-aware routing ────────────────────────────────────────
# When NixOS container mode is active, route ALL subcommands into
# the managed container. This MUST run before parse_args() so that
# --help, unrecognised flags, and every subcommand are forwarded
# transparently instead of being intercepted by argparse on the host.
from hermes_cli.config import get_container_exec_info
container_info = get_container_exec_info()
if container_info:
_exec_in_container(container_info, sys.argv[1:])
# Unreachable: os.execvp never returns on success (process is replaced)
# and raises OSError on failure (which propagates as a traceback).
sys.exit(1)
_processed_argv = _coalesce_session_name_args(sys.argv[1:])
args = parser.parse_args(_processed_argv)
# Handle --version flag
if args.version:
cmd_version(args)