diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 4794e74c75..ee66d51a7e 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -241,13 +241,41 @@ def _secure_dir(path): pass +def _is_container() -> bool: + """Detect if we're running inside a Docker/Podman/LXC container. + + When Hermes runs in a container with volume-mounted config files, forcing + 0o600 permissions breaks multi-process setups where the gateway and + dashboard run as different UIDs or the volume mount requires broader + permissions. + """ + # Explicit opt-out + if os.environ.get("HERMES_CONTAINER") or os.environ.get("HERMES_SKIP_CHMOD"): + return True + # Docker / Podman marker file + if os.path.exists("/.dockerenv"): + return True + # LXC / cgroup-based detection + try: + with open("/proc/1/cgroup", "r") as f: + cgroup_content = f.read() + if "docker" in cgroup_content or "lxc" in cgroup_content or "kubepods" in cgroup_content: + return True + except (OSError, IOError): + pass + return False + + def _secure_file(path): """Set file to owner-only read/write (0600). No-op on Windows. Skipped in managed mode — the NixOS activation script sets group-readable permissions (0640) on config files. + + Skipped in containers — Docker/Podman volume mounts often need broader + permissions. Set HERMES_SKIP_CHMOD=1 to force-skip on other systems. """ - if is_managed(): + if is_managed() or _is_container(): return try: if os.path.exists(str(path)): @@ -2900,12 +2928,25 @@ def save_env_value(key: str, value: str): lines.append(f"{key}={value}\n") fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_') + # Preserve original permissions so Docker volume mounts aren't clobbered. + original_mode = None + if env_path.exists(): + try: + original_mode = stat.S_IMODE(env_path.stat().st_mode) + except OSError: + pass try: with os.fdopen(fd, 'w', **write_kw) as f: f.writelines(lines) f.flush() os.fsync(f.fileno()) os.replace(tmp_path, env_path) + # Restore original permissions before _secure_file may tighten them. + if original_mode is not None: + try: + os.chmod(env_path, original_mode) + except OSError: + pass except BaseException: try: os.unlink(tmp_path) @@ -2916,13 +2957,6 @@ def save_env_value(key: str, value: str): os.environ[key] = value - # Restrict .env permissions to owner-only (contains API keys) - if not _IS_WINDOWS: - try: - os.chmod(env_path, stat.S_IRUSR | stat.S_IWUSR) - except OSError: - pass - def remove_env_value(key: str) -> bool: """Remove a key from ~/.hermes/.env and os.environ. @@ -2951,12 +2985,23 @@ def remove_env_value(key: str) -> bool: if found: fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_') + # Preserve original permissions so Docker volume mounts aren't clobbered. + original_mode = None + try: + original_mode = stat.S_IMODE(env_path.stat().st_mode) + except OSError: + pass try: with os.fdopen(fd, 'w', **write_kw) as f: f.writelines(new_lines) f.flush() os.fsync(f.fileno()) os.replace(tmp_path, env_path) + if original_mode is not None: + try: + os.chmod(env_path, original_mode) + except OSError: + pass except BaseException: try: os.unlink(tmp_path) diff --git a/utils.py b/utils.py index f967c08aed..cf2582853f 100644 --- a/utils.py +++ b/utils.py @@ -3,6 +3,7 @@ import json import logging import os +import stat import tempfile from pathlib import Path from typing import Any, Union @@ -31,6 +32,31 @@ def env_var_enabled(name: str, default: str = "") -> bool: return is_truthy_value(os.getenv(name, default), default=False) +def _preserve_file_mode(path: Path) -> "int | None": + """Capture the permission bits of *path* if it exists, else ``None``.""" + try: + return stat.S_IMODE(path.stat().st_mode) if path.exists() else None + except OSError: + return None + + +def _restore_file_mode(path: Path, mode: "int | None") -> None: + """Re-apply *mode* to *path* after an atomic replace. + + ``tempfile.mkstemp`` creates files with 0o600 (owner-only). After + ``os.replace`` swaps the temp file into place the target inherits + those restrictive permissions, breaking Docker / NAS volume mounts + that rely on broader permissions set by the user. Calling this + right after ``os.replace`` restores the original permissions. + """ + if mode is None: + return + try: + os.chmod(path, mode) + except OSError: + pass + + def atomic_json_write( path: Union[str, Path], data: Any, @@ -54,6 +80,8 @@ def atomic_json_write( path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) + original_mode = _preserve_file_mode(path) + fd, tmp_path = tempfile.mkstemp( dir=str(path.parent), prefix=f".{path.stem}_", @@ -71,6 +99,7 @@ def atomic_json_write( f.flush() os.fsync(f.fileno()) os.replace(tmp_path, path) + _restore_file_mode(path, original_mode) except BaseException: # Intentionally catch BaseException so temp-file cleanup still runs for # KeyboardInterrupt/SystemExit before re-raising the original signal. @@ -106,6 +135,8 @@ def atomic_yaml_write( path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) + original_mode = _preserve_file_mode(path) + fd, tmp_path = tempfile.mkstemp( dir=str(path.parent), prefix=f".{path.stem}_", @@ -119,6 +150,7 @@ def atomic_yaml_write( f.flush() os.fsync(f.fileno()) os.replace(tmp_path, path) + _restore_file_mode(path, original_mode) except BaseException: # Match atomic_json_write: cleanup must also happen for process-level # interruptions before we re-raise them.