mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
feat(nix): container-aware CLI — auto-route into managed container (#7543)
* feat(nix): container-aware CLI — auto-route all subcommands into managed container
When container.enable = true, the host `hermes` CLI transparently execs
every subcommand into the managed Docker/Podman container. A symlink
bridge (~/.hermes -> /var/lib/hermes/.hermes) unifies state between host
and container so sessions, config, and memories are shared.
CLI changes:
- Global routing before subcommand dispatch (all commands forwarded)
- docker exec with -u exec_user, env passthrough (TERM, COLORTERM,
LANG, LC_ALL), TTY-aware flags
- Retry with spinner on failure (TTY: 5s, non-TTY: 10s silent)
- Hard fail instead of silent fallback
- HERMES_DEV=1 env var bypasses routing for development
- No routing messages (invisible to user)
NixOS module changes:
- container.hostUsers option: lists users who get ~/.hermes symlink
and automatic hermes group membership
- Activation script creates symlink bridge (with backup of existing
~/.hermes dirs), writes exec_user to .container-mode
- Cleanup on disable: removes symlinks + .container-mode + stops service
- Warning when hostUsers set without addToSystemPackages
* fix: address review — reuse sudo var, add chown -h on symlink update
- hermes_cli/main.py: reuse the existing `sudo` variable instead of
redundant `shutil.which("sudo")` call that could return None
- nix/nixosModules.nix: add missing `chown -h` when updating an
existing symlink target so ownership stays consistent with the
fresh-create and backup-replace branches
* fix: address remaining review items from cursor bugbot
- hermes_cli/main.py: move container routing BEFORE parse_args() so
--help, unrecognised flags, and all subcommands are forwarded
transparently into the container instead of being intercepted by
argparse on the host (high severity)
- nix/nixosModules.nix: resolve home dirs via
config.users.users.${user}.home instead of hardcoding /home/${user},
supporting users with custom home directories (medium severity)
- nix/nixosModules.nix: gate hostUsers group membership on
container.enable so setting hostUsers without container mode doesn't
silently add users to the hermes group (low severity)
* fix: simplify container routing — execvp, no retries, let it crash
- Replace subprocess.run retry loop with os.execvp (no idle parent process)
- Extract _probe_container helper for sudo detection with 15s timeout
- Narrow exception handling: FileNotFoundError only in get_container_exec_info,
catch TimeoutExpired specifically, remove silent except Exception: pass
- Collapse needs_sudo + sudo into single sudo_path variable
- Simplify NixOS symlink creation from 4 branches to 2
- Gate NixOS sudoers hint with "On NixOS:" prefix
- Full test rewrite: 18 tests covering execvp, sudo probe, timeout, permissions
---------
Co-authored-by: Hermes Agent <hermes@nousresearch.com>
This commit is contained in:
parent
5c2ecdec49
commit
cab814af15
6 changed files with 983 additions and 1 deletions
|
|
@ -143,6 +143,73 @@ def managed_error(action: str = "modify configuration"):
|
|||
print(format_managed_message(action), file=sys.stderr)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Container-aware CLI (NixOS container mode)
|
||||
# =============================================================================
|
||||
|
||||
def _is_inside_container() -> bool:
|
||||
"""Detect if we're already running inside a Docker/Podman container."""
|
||||
# Standard Docker/Podman indicators
|
||||
if os.path.exists("/.dockerenv"):
|
||||
return True
|
||||
# Podman uses /run/.containerenv
|
||||
if os.path.exists("/run/.containerenv"):
|
||||
return True
|
||||
# Check cgroup for container runtime evidence (works for both Docker & Podman)
|
||||
try:
|
||||
with open("/proc/1/cgroup", "r") as f:
|
||||
cgroup = f.read()
|
||||
if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup:
|
||||
return True
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def get_container_exec_info() -> Optional[dict]:
|
||||
"""Read container mode metadata from HERMES_HOME/.container-mode.
|
||||
|
||||
Returns a dict with keys: backend, container_name, exec_user, hermes_bin
|
||||
or None if container mode is not active, we're already inside the
|
||||
container, or HERMES_DEV=1 is set.
|
||||
|
||||
The .container-mode file is written by the NixOS activation script when
|
||||
container.enable = true. It tells the host CLI to exec into the container
|
||||
instead of running locally.
|
||||
"""
|
||||
if os.environ.get("HERMES_DEV") == "1":
|
||||
return None
|
||||
|
||||
if _is_inside_container():
|
||||
return None
|
||||
|
||||
container_mode_file = get_hermes_home() / ".container-mode"
|
||||
|
||||
try:
|
||||
info = {}
|
||||
with open(container_mode_file, "r") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if "=" in line and not line.startswith("#"):
|
||||
key, _, value = line.partition("=")
|
||||
info[key.strip()] = value.strip()
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
# All other exceptions (PermissionError, malformed data, etc.) propagate
|
||||
|
||||
backend = info.get("backend", "docker")
|
||||
container_name = info.get("container_name", "hermes-agent")
|
||||
exec_user = info.get("exec_user", "hermes")
|
||||
hermes_bin = info.get("hermes_bin", "/data/current-package/bin/hermes")
|
||||
|
||||
return {
|
||||
"backend": backend,
|
||||
"container_name": container_name,
|
||||
"exec_user": exec_user,
|
||||
"hermes_bin": hermes_bin,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Config paths
|
||||
# =============================================================================
|
||||
|
|
|
|||
|
|
@ -528,6 +528,113 @@ def _resolve_last_cli_session() -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def _probe_container(cmd: list, backend: str, via_sudo: bool = False):
|
||||
"""Run a container inspect probe, returning the CompletedProcess.
|
||||
|
||||
Catches TimeoutExpired specifically for a human-readable message;
|
||||
all other exceptions propagate naturally.
|
||||
"""
|
||||
try:
|
||||
return subprocess.run(cmd, capture_output=True, text=True, timeout=15)
|
||||
except subprocess.TimeoutExpired:
|
||||
label = f"sudo {backend}" if via_sudo else backend
|
||||
print(
|
||||
f"Error: timed out waiting for {label} to respond.\n"
|
||||
f"The {backend} daemon may be unresponsive or starting up.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _exec_in_container(container_info: dict, cli_args: list):
|
||||
"""Replace the current process with a command inside the managed container.
|
||||
|
||||
Probes whether sudo is needed (rootful containers), then os.execvp
|
||||
into the container. On success the Python process is replaced entirely
|
||||
and the container's exit code becomes the process exit code (OS semantics).
|
||||
On failure, OSError propagates naturally.
|
||||
|
||||
Args:
|
||||
container_info: dict with backend, container_name, exec_user, hermes_bin
|
||||
cli_args: the original CLI arguments (everything after 'hermes')
|
||||
"""
|
||||
import shutil
|
||||
|
||||
backend = container_info["backend"]
|
||||
container_name = container_info["container_name"]
|
||||
exec_user = container_info["exec_user"]
|
||||
hermes_bin = container_info["hermes_bin"]
|
||||
|
||||
runtime = shutil.which(backend)
|
||||
if not runtime:
|
||||
print(f"Error: {backend} not found on PATH. Cannot route to container.",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Rootful containers (NixOS systemd service) are invisible to unprivileged
|
||||
# users — Podman uses per-user namespaces, Docker needs group access.
|
||||
# Probe whether the runtime can see the container; if not, try via sudo.
|
||||
sudo_path = None
|
||||
probe = _probe_container(
|
||||
[runtime, "inspect", "--format", "ok", container_name], backend,
|
||||
)
|
||||
if probe.returncode != 0:
|
||||
sudo_path = shutil.which("sudo")
|
||||
if sudo_path:
|
||||
probe2 = _probe_container(
|
||||
[sudo_path, "-n", runtime, "inspect", "--format", "ok", container_name],
|
||||
backend, via_sudo=True,
|
||||
)
|
||||
if probe2.returncode != 0:
|
||||
print(
|
||||
f"Error: container '{container_name}' not found via {backend}.\n"
|
||||
f"\n"
|
||||
f"The container is likely running as root. Your user cannot see it\n"
|
||||
f"because {backend} uses per-user namespaces. Grant passwordless\n"
|
||||
f"sudo for {backend} — the -n (non-interactive) flag is required\n"
|
||||
f"because a password prompt would hang or break piped commands.\n"
|
||||
f"\n"
|
||||
f"On NixOS:\n"
|
||||
f"\n"
|
||||
f' security.sudo.extraRules = [{{\n'
|
||||
f' users = [ "{os.getenv("USER", "your-user")}" ];\n'
|
||||
f' commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n'
|
||||
f' }}];\n'
|
||||
f"\n"
|
||||
f"Or run: sudo hermes {' '.join(cli_args)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(
|
||||
f"Error: container '{container_name}' not found via {backend}.\n"
|
||||
f"The container may be running under root. Try: sudo hermes {' '.join(cli_args)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
is_tty = sys.stdin.isatty()
|
||||
tty_flags = ["-it"] if is_tty else ["-i"]
|
||||
|
||||
env_flags = []
|
||||
for var in ("TERM", "COLORTERM", "LANG", "LC_ALL"):
|
||||
val = os.environ.get(var)
|
||||
if val:
|
||||
env_flags.extend(["-e", f"{var}={val}"])
|
||||
|
||||
cmd_prefix = [sudo_path, "-n", runtime] if sudo_path else [runtime]
|
||||
exec_cmd = (
|
||||
cmd_prefix + ["exec"]
|
||||
+ tty_flags
|
||||
+ ["-u", exec_user]
|
||||
+ env_flags
|
||||
+ [container_name, hermes_bin]
|
||||
+ cli_args
|
||||
)
|
||||
|
||||
os.execvp(exec_cmd[0], exec_cmd)
|
||||
|
||||
|
||||
def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
|
||||
"""Resolve a session name (title) or ID to a session ID.
|
||||
|
||||
|
|
@ -5667,9 +5774,22 @@ Examples:
|
|||
# Pre-process argv so unquoted multi-word session names after -c / -r
|
||||
# are merged into a single token before argparse sees them.
|
||||
# e.g. ``hermes -c Pokemon Agent Dev`` → ``hermes -c 'Pokemon Agent Dev'``
|
||||
# ── Container-aware routing ────────────────────────────────────────
|
||||
# When NixOS container mode is active, route ALL subcommands into
|
||||
# the managed container. This MUST run before parse_args() so that
|
||||
# --help, unrecognised flags, and every subcommand are forwarded
|
||||
# transparently instead of being intercepted by argparse on the host.
|
||||
from hermes_cli.config import get_container_exec_info
|
||||
container_info = get_container_exec_info()
|
||||
if container_info:
|
||||
_exec_in_container(container_info, sys.argv[1:])
|
||||
# Unreachable: os.execvp never returns on success (process is replaced)
|
||||
# and raises OSError on failure (which propagates as a traceback).
|
||||
sys.exit(1)
|
||||
|
||||
_processed_argv = _coalesce_session_name_args(sys.argv[1:])
|
||||
args = parser.parse_args(_processed_argv)
|
||||
|
||||
|
||||
# Handle --version flag
|
||||
if args.version:
|
||||
cmd_version(args)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue