mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-17 04:31:55 +00:00
feat(nix): container-aware CLI — auto-route all subcommands into managed container
When container.enable = true, the host `hermes` CLI transparently execs every subcommand into the managed Docker/Podman container. A symlink bridge (~/.hermes -> /var/lib/hermes/.hermes) unifies state between host and container so sessions, config, and memories are shared. CLI changes: - Global routing before subcommand dispatch (all commands forwarded) - docker exec with -u exec_user, env passthrough (TERM, COLORTERM, LANG, LC_ALL), TTY-aware flags - Retry with spinner on failure (TTY: 5s, non-TTY: 10s silent) - Hard fail instead of silent fallback - HERMES_DEV=1 env var bypasses routing for development - No routing messages (invisible to user) NixOS module changes: - container.hostUsers option: lists users who get ~/.hermes symlink and automatic hermes group membership - Activation script creates symlink bridge (with backup of existing ~/.hermes dirs), writes exec_user to .container-mode - Cleanup on disable: removes symlinks + .container-mode + stops service - Warning when hostUsers set without addToSystemPackages
This commit is contained in:
parent
3065e69dc5
commit
79e8cd1233
5 changed files with 695 additions and 1 deletions
|
|
@ -141,6 +141,72 @@ def managed_error(action: str = "modify configuration"):
|
|||
print(format_managed_message(action), file=sys.stderr)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Container-aware CLI (NixOS container mode)
|
||||
# =============================================================================
|
||||
|
||||
def _is_inside_container() -> bool:
|
||||
"""Detect if we're already running inside a Docker/Podman container."""
|
||||
# Standard Docker/Podman indicators
|
||||
if os.path.exists("/.dockerenv"):
|
||||
return True
|
||||
# Podman uses /run/.containerenv
|
||||
if os.path.exists("/run/.containerenv"):
|
||||
return True
|
||||
# Check cgroup for container runtime evidence (works for both Docker & Podman)
|
||||
try:
|
||||
with open("/proc/1/cgroup", "r") as f:
|
||||
cgroup = f.read()
|
||||
if "docker" in cgroup or "podman" in cgroup or "/lxc/" in cgroup:
|
||||
return True
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def get_container_exec_info() -> Optional[dict]:
|
||||
"""Read container mode metadata from HERMES_HOME/.container-mode.
|
||||
|
||||
Returns a dict with keys: backend, container_name, exec_user, hermes_bin
|
||||
or None if container mode is not active, we're already inside the
|
||||
container, or HERMES_DEV=1 is set.
|
||||
|
||||
The .container-mode file is written by the NixOS activation script when
|
||||
container.enable = true. It tells the host CLI to exec into the container
|
||||
instead of running locally.
|
||||
"""
|
||||
if os.environ.get("HERMES_DEV") == "1":
|
||||
return None
|
||||
|
||||
if _is_inside_container():
|
||||
return None
|
||||
|
||||
container_mode_file = get_hermes_home() / ".container-mode"
|
||||
|
||||
try:
|
||||
info = {}
|
||||
with open(container_mode_file, "r") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if "=" in line and not line.startswith("#"):
|
||||
key, _, value = line.partition("=")
|
||||
info[key.strip()] = value.strip()
|
||||
|
||||
backend = info.get("backend", "docker")
|
||||
container_name = info.get("container_name", "hermes-agent")
|
||||
exec_user = info.get("exec_user", "hermes")
|
||||
hermes_bin = info.get("hermes_bin", "/data/current-package/bin/hermes")
|
||||
|
||||
return {
|
||||
"backend": backend,
|
||||
"container_name": container_name,
|
||||
"exec_user": exec_user,
|
||||
"hermes_bin": hermes_bin,
|
||||
}
|
||||
except (OSError, IOError):
|
||||
return None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Config paths
|
||||
# =============================================================================
|
||||
|
|
|
|||
|
|
@ -528,6 +528,133 @@ def _resolve_last_cli_session() -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def _exec_in_container(container_info: dict, cli_args: list):
|
||||
"""Route a CLI invocation into the managed container.
|
||||
|
||||
Uses subprocess.run so we can detect docker-level failures (container
|
||||
not running, user not found, etc.) and retry. On the happy path the
|
||||
exit code from the containerised hermes is propagated directly.
|
||||
|
||||
Failure behaviour:
|
||||
- TTY: spinner for up to 5s, then hard fail (exit 1)
|
||||
- Non-TTY: silent retry for 10s, then exit 126
|
||||
|
||||
Args:
|
||||
container_info: dict with backend, container_name, exec_user, hermes_bin
|
||||
cli_args: the original CLI arguments (everything after 'hermes')
|
||||
"""
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
backend = container_info["backend"]
|
||||
container_name = container_info["container_name"]
|
||||
exec_user = container_info["exec_user"]
|
||||
hermes_bin = container_info["hermes_bin"]
|
||||
|
||||
runtime = shutil.which(backend)
|
||||
if not runtime:
|
||||
print(f"Error: {backend} not found on PATH. Cannot route to container.",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# The NixOS systemd service runs containers as root. Docker users
|
||||
# typically have group-based socket access, but Podman rootful
|
||||
# containers require sudo. Probe whether the runtime can see the
|
||||
# container; if not, retry via sudo.
|
||||
needs_sudo = False
|
||||
probe = subprocess.run(
|
||||
[runtime, "inspect", "--format", "ok", container_name],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if probe.returncode != 0:
|
||||
sudo = shutil.which("sudo")
|
||||
if sudo:
|
||||
probe2 = subprocess.run(
|
||||
[sudo, "-n", runtime, "inspect", "--format", "ok", container_name],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if probe2.returncode == 0:
|
||||
needs_sudo = True
|
||||
else:
|
||||
print(
|
||||
f"Error: container '{container_name}' not found via {backend}.\n"
|
||||
f"\n"
|
||||
f"The NixOS service runs the container as root. Your user cannot\n"
|
||||
f"see it because {backend} uses per-user namespaces.\n"
|
||||
f"\n"
|
||||
f"Fix: grant passwordless sudo for {backend}:\n"
|
||||
f"\n"
|
||||
f' security.sudo.extraRules = [{{\n'
|
||||
f' users = [ "{os.getenv("USER", "your-user")}" ];\n'
|
||||
f' commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n'
|
||||
f' }}];\n'
|
||||
f"\n"
|
||||
f"Or run: sudo hermes {' '.join(cli_args)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(
|
||||
f"Error: container '{container_name}' not found via {backend}.\n"
|
||||
f"The container may be running under root. Try: sudo hermes {' '.join(cli_args)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
is_tty = sys.stdin.isatty()
|
||||
tty_flags = ["-it"] if is_tty else ["-i"]
|
||||
|
||||
# Forward terminal environment variables
|
||||
env_flags = []
|
||||
for var in ("TERM", "COLORTERM", "LANG", "LC_ALL"):
|
||||
val = os.environ.get(var)
|
||||
if val:
|
||||
env_flags.extend(["-e", f"{var}={val}"])
|
||||
|
||||
cmd_prefix = [shutil.which("sudo"), "-n", runtime] if needs_sudo else [runtime]
|
||||
exec_cmd = (
|
||||
cmd_prefix + ["exec"]
|
||||
+ tty_flags
|
||||
+ ["-u", exec_user]
|
||||
+ env_flags
|
||||
+ [container_name, hermes_bin]
|
||||
+ cli_args
|
||||
)
|
||||
|
||||
max_retries = 5 if is_tty else 10
|
||||
for attempt in range(max_retries):
|
||||
result = subprocess.run(exec_cmd)
|
||||
if result.returncode == 0:
|
||||
sys.exit(0)
|
||||
|
||||
# Exit code 125/126/127 from docker exec = container-level failure
|
||||
# (not running, user not found, command not found). Retry these.
|
||||
if result.returncode not in (125, 126, 127):
|
||||
# Hermes itself exited non-zero — propagate as-is
|
||||
sys.exit(result.returncode)
|
||||
|
||||
# Container-level failure — retry
|
||||
if attempt < max_retries - 1:
|
||||
if is_tty and attempt == 0:
|
||||
print("Waiting for container...", end="", flush=True,
|
||||
file=sys.stderr)
|
||||
elif is_tty:
|
||||
print(".", end="", flush=True, file=sys.stderr)
|
||||
time.sleep(1)
|
||||
else:
|
||||
if is_tty:
|
||||
print(file=sys.stderr) # newline after dots
|
||||
print(
|
||||
f"Error: container '{container_name}' is not reachable "
|
||||
f"via {backend}. Is the hermes-agent service running?",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
else:
|
||||
sys.exit(126)
|
||||
|
||||
|
||||
def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
|
||||
"""Resolve a session name (title) or ID to a session ID.
|
||||
|
||||
|
|
@ -5633,7 +5760,21 @@ Examples:
|
|||
# e.g. ``hermes -c Pokemon Agent Dev`` → ``hermes -c 'Pokemon Agent Dev'``
|
||||
_processed_argv = _coalesce_session_name_args(sys.argv[1:])
|
||||
args = parser.parse_args(_processed_argv)
|
||||
|
||||
|
||||
# ── Container-aware routing ────────────────────────────────────────
|
||||
# When NixOS container mode is active, route ALL subcommands into
|
||||
# the managed container. This runs before any subcommand dispatch.
|
||||
try:
|
||||
from hermes_cli.config import get_container_exec_info
|
||||
container_info = get_container_exec_info()
|
||||
if container_info:
|
||||
_exec_in_container(container_info, sys.argv[1:])
|
||||
sys.exit(1) # exec failed if we reach here
|
||||
except SystemExit:
|
||||
raise # Re-raise sys.exit from _exec_in_container
|
||||
except Exception:
|
||||
pass # Container routing unavailable, proceed locally
|
||||
|
||||
# Handle --version flag
|
||||
if args.version:
|
||||
cmd_version(args)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue