fix: preserve file permissions on atomic writes (Docker/NAS fix) (#10618)

atomic_yaml_write() and atomic_json_write() used tempfile.mkstemp()
which creates files with 0o600 (owner-only). After os.replace(), the
original file's permissions were destroyed. Combined with _secure_file()
forcing 0o600, this broke Docker/NAS setups where volume-mounted config
files need broader permissions (e.g. 0o666).

Changes:
- atomic_yaml_write/atomic_json_write: capture original permissions
  before write, restore after os.replace()
- _secure_file: skip permission tightening in container environments
  (detected via /.dockerenv, /proc/1/cgroup, or HERMES_SKIP_CHMOD env)
- save_env_value: preserve original .env permissions, remove redundant
  third os.chmod call
- remove_env_value: same permission preservation

On desktop installs, _secure_file() still tightens to 0o600 as before.
In containers, the user's original permissions are respected.

Reported by Cedric Weber (Docker/Portainer on NAS).
This commit is contained in:
Teknium 2026-04-15 19:52:46 -07:00 committed by GitHub
parent cc6e8941db
commit df714add9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 85 additions and 8 deletions

View file

@ -241,13 +241,41 @@ def _secure_dir(path):
pass pass
def _is_container() -> bool:
"""Detect if we're running inside a Docker/Podman/LXC container.
When Hermes runs in a container with volume-mounted config files, forcing
0o600 permissions breaks multi-process setups where the gateway and
dashboard run as different UIDs or the volume mount requires broader
permissions.
"""
# Explicit opt-out
if os.environ.get("HERMES_CONTAINER") or os.environ.get("HERMES_SKIP_CHMOD"):
return True
# Docker / Podman marker file
if os.path.exists("/.dockerenv"):
return True
# LXC / cgroup-based detection
try:
with open("/proc/1/cgroup", "r") as f:
cgroup_content = f.read()
if "docker" in cgroup_content or "lxc" in cgroup_content or "kubepods" in cgroup_content:
return True
except (OSError, IOError):
pass
return False
def _secure_file(path): def _secure_file(path):
"""Set file to owner-only read/write (0600). No-op on Windows. """Set file to owner-only read/write (0600). No-op on Windows.
Skipped in managed mode the NixOS activation script sets Skipped in managed mode the NixOS activation script sets
group-readable permissions (0640) on config files. group-readable permissions (0640) on config files.
Skipped in containers Docker/Podman volume mounts often need broader
permissions. Set HERMES_SKIP_CHMOD=1 to force-skip on other systems.
""" """
if is_managed(): if is_managed() or _is_container():
return return
try: try:
if os.path.exists(str(path)): if os.path.exists(str(path)):
@ -2900,12 +2928,25 @@ def save_env_value(key: str, value: str):
lines.append(f"{key}={value}\n") lines.append(f"{key}={value}\n")
fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_') fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
# Preserve original permissions so Docker volume mounts aren't clobbered.
original_mode = None
if env_path.exists():
try:
original_mode = stat.S_IMODE(env_path.stat().st_mode)
except OSError:
pass
try: try:
with os.fdopen(fd, 'w', **write_kw) as f: with os.fdopen(fd, 'w', **write_kw) as f:
f.writelines(lines) f.writelines(lines)
f.flush() f.flush()
os.fsync(f.fileno()) os.fsync(f.fileno())
os.replace(tmp_path, env_path) os.replace(tmp_path, env_path)
# Restore original permissions before _secure_file may tighten them.
if original_mode is not None:
try:
os.chmod(env_path, original_mode)
except OSError:
pass
except BaseException: except BaseException:
try: try:
os.unlink(tmp_path) os.unlink(tmp_path)
@ -2916,13 +2957,6 @@ def save_env_value(key: str, value: str):
os.environ[key] = value os.environ[key] = value
# Restrict .env permissions to owner-only (contains API keys)
if not _IS_WINDOWS:
try:
os.chmod(env_path, stat.S_IRUSR | stat.S_IWUSR)
except OSError:
pass
def remove_env_value(key: str) -> bool: def remove_env_value(key: str) -> bool:
"""Remove a key from ~/.hermes/.env and os.environ. """Remove a key from ~/.hermes/.env and os.environ.
@ -2951,12 +2985,23 @@ def remove_env_value(key: str) -> bool:
if found: if found:
fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_') fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
# Preserve original permissions so Docker volume mounts aren't clobbered.
original_mode = None
try:
original_mode = stat.S_IMODE(env_path.stat().st_mode)
except OSError:
pass
try: try:
with os.fdopen(fd, 'w', **write_kw) as f: with os.fdopen(fd, 'w', **write_kw) as f:
f.writelines(new_lines) f.writelines(new_lines)
f.flush() f.flush()
os.fsync(f.fileno()) os.fsync(f.fileno())
os.replace(tmp_path, env_path) os.replace(tmp_path, env_path)
if original_mode is not None:
try:
os.chmod(env_path, original_mode)
except OSError:
pass
except BaseException: except BaseException:
try: try:
os.unlink(tmp_path) os.unlink(tmp_path)

View file

@ -3,6 +3,7 @@
import json import json
import logging import logging
import os import os
import stat
import tempfile import tempfile
from pathlib import Path from pathlib import Path
from typing import Any, Union from typing import Any, Union
@ -31,6 +32,31 @@ def env_var_enabled(name: str, default: str = "") -> bool:
return is_truthy_value(os.getenv(name, default), default=False) return is_truthy_value(os.getenv(name, default), default=False)
def _preserve_file_mode(path: Path) -> "int | None":
"""Capture the permission bits of *path* if it exists, else ``None``."""
try:
return stat.S_IMODE(path.stat().st_mode) if path.exists() else None
except OSError:
return None
def _restore_file_mode(path: Path, mode: "int | None") -> None:
"""Re-apply *mode* to *path* after an atomic replace.
``tempfile.mkstemp`` creates files with 0o600 (owner-only). After
``os.replace`` swaps the temp file into place the target inherits
those restrictive permissions, breaking Docker / NAS volume mounts
that rely on broader permissions set by the user. Calling this
right after ``os.replace`` restores the original permissions.
"""
if mode is None:
return
try:
os.chmod(path, mode)
except OSError:
pass
def atomic_json_write( def atomic_json_write(
path: Union[str, Path], path: Union[str, Path],
data: Any, data: Any,
@ -54,6 +80,8 @@ def atomic_json_write(
path = Path(path) path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
original_mode = _preserve_file_mode(path)
fd, tmp_path = tempfile.mkstemp( fd, tmp_path = tempfile.mkstemp(
dir=str(path.parent), dir=str(path.parent),
prefix=f".{path.stem}_", prefix=f".{path.stem}_",
@ -71,6 +99,7 @@ def atomic_json_write(
f.flush() f.flush()
os.fsync(f.fileno()) os.fsync(f.fileno())
os.replace(tmp_path, path) os.replace(tmp_path, path)
_restore_file_mode(path, original_mode)
except BaseException: except BaseException:
# Intentionally catch BaseException so temp-file cleanup still runs for # Intentionally catch BaseException so temp-file cleanup still runs for
# KeyboardInterrupt/SystemExit before re-raising the original signal. # KeyboardInterrupt/SystemExit before re-raising the original signal.
@ -106,6 +135,8 @@ def atomic_yaml_write(
path = Path(path) path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
original_mode = _preserve_file_mode(path)
fd, tmp_path = tempfile.mkstemp( fd, tmp_path = tempfile.mkstemp(
dir=str(path.parent), dir=str(path.parent),
prefix=f".{path.stem}_", prefix=f".{path.stem}_",
@ -119,6 +150,7 @@ def atomic_yaml_write(
f.flush() f.flush()
os.fsync(f.fileno()) os.fsync(f.fileno())
os.replace(tmp_path, path) os.replace(tmp_path, path)
_restore_file_mode(path, original_mode)
except BaseException: except BaseException:
# Match atomic_json_write: cleanup must also happen for process-level # Match atomic_json_write: cleanup must also happen for process-level
# interruptions before we re-raise them. # interruptions before we re-raise them.