fix: preserve file permissions on atomic writes (Docker/NAS fix) (#10618)

atomic_yaml_write() and atomic_json_write() used tempfile.mkstemp()
which creates files with 0o600 (owner-only). After os.replace(), the
original file's permissions were destroyed. Combined with _secure_file()
forcing 0o600, this broke Docker/NAS setups where volume-mounted config
files need broader permissions (e.g. 0o666).

Changes:
- atomic_yaml_write/atomic_json_write: capture original permissions
  before write, restore after os.replace()
- _secure_file: skip permission tightening in container environments
  (detected via /.dockerenv, /proc/1/cgroup, or HERMES_SKIP_CHMOD env)
- save_env_value: preserve original .env permissions, remove redundant
  third os.chmod call
- remove_env_value: same permission preservation

On desktop installs, _secure_file() still tightens to 0o600 as before.
In containers, the user's original permissions are respected.

Reported by Cedric Weber (Docker/Portainer on NAS).
This commit is contained in:
Teknium 2026-04-15 19:52:46 -07:00 committed by GitHub
parent cc6e8941db
commit df714add9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 85 additions and 8 deletions

View file

@ -241,13 +241,41 @@ def _secure_dir(path):
pass
def _is_container() -> bool:
"""Detect if we're running inside a Docker/Podman/LXC container.
When Hermes runs in a container with volume-mounted config files, forcing
0o600 permissions breaks multi-process setups where the gateway and
dashboard run as different UIDs or the volume mount requires broader
permissions.
"""
# Explicit opt-out
if os.environ.get("HERMES_CONTAINER") or os.environ.get("HERMES_SKIP_CHMOD"):
return True
# Docker / Podman marker file
if os.path.exists("/.dockerenv"):
return True
# LXC / cgroup-based detection
try:
with open("/proc/1/cgroup", "r") as f:
cgroup_content = f.read()
if "docker" in cgroup_content or "lxc" in cgroup_content or "kubepods" in cgroup_content:
return True
except (OSError, IOError):
pass
return False
def _secure_file(path):
"""Set file to owner-only read/write (0600). No-op on Windows.
Skipped in managed mode the NixOS activation script sets
group-readable permissions (0640) on config files.
Skipped in containers Docker/Podman volume mounts often need broader
permissions. Set HERMES_SKIP_CHMOD=1 to force-skip on other systems.
"""
if is_managed():
if is_managed() or _is_container():
return
try:
if os.path.exists(str(path)):
@ -2900,12 +2928,25 @@ def save_env_value(key: str, value: str):
lines.append(f"{key}={value}\n")
fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
# Preserve original permissions so Docker volume mounts aren't clobbered.
original_mode = None
if env_path.exists():
try:
original_mode = stat.S_IMODE(env_path.stat().st_mode)
except OSError:
pass
try:
with os.fdopen(fd, 'w', **write_kw) as f:
f.writelines(lines)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, env_path)
# Restore original permissions before _secure_file may tighten them.
if original_mode is not None:
try:
os.chmod(env_path, original_mode)
except OSError:
pass
except BaseException:
try:
os.unlink(tmp_path)
@ -2916,13 +2957,6 @@ def save_env_value(key: str, value: str):
os.environ[key] = value
# Restrict .env permissions to owner-only (contains API keys)
if not _IS_WINDOWS:
try:
os.chmod(env_path, stat.S_IRUSR | stat.S_IWUSR)
except OSError:
pass
def remove_env_value(key: str) -> bool:
"""Remove a key from ~/.hermes/.env and os.environ.
@ -2951,12 +2985,23 @@ def remove_env_value(key: str) -> bool:
if found:
fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
# Preserve original permissions so Docker volume mounts aren't clobbered.
original_mode = None
try:
original_mode = stat.S_IMODE(env_path.stat().st_mode)
except OSError:
pass
try:
with os.fdopen(fd, 'w', **write_kw) as f:
f.writelines(new_lines)
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, env_path)
if original_mode is not None:
try:
os.chmod(env_path, original_mode)
except OSError:
pass
except BaseException:
try:
os.unlink(tmp_path)

View file

@ -3,6 +3,7 @@
import json
import logging
import os
import stat
import tempfile
from pathlib import Path
from typing import Any, Union
@ -31,6 +32,31 @@ def env_var_enabled(name: str, default: str = "") -> bool:
return is_truthy_value(os.getenv(name, default), default=False)
def _preserve_file_mode(path: Path) -> "int | None":
"""Capture the permission bits of *path* if it exists, else ``None``."""
try:
return stat.S_IMODE(path.stat().st_mode) if path.exists() else None
except OSError:
return None
def _restore_file_mode(path: Path, mode: "int | None") -> None:
"""Re-apply *mode* to *path* after an atomic replace.
``tempfile.mkstemp`` creates files with 0o600 (owner-only). After
``os.replace`` swaps the temp file into place the target inherits
those restrictive permissions, breaking Docker / NAS volume mounts
that rely on broader permissions set by the user. Calling this
right after ``os.replace`` restores the original permissions.
"""
if mode is None:
return
try:
os.chmod(path, mode)
except OSError:
pass
def atomic_json_write(
path: Union[str, Path],
data: Any,
@ -54,6 +80,8 @@ def atomic_json_write(
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
original_mode = _preserve_file_mode(path)
fd, tmp_path = tempfile.mkstemp(
dir=str(path.parent),
prefix=f".{path.stem}_",
@ -71,6 +99,7 @@ def atomic_json_write(
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, path)
_restore_file_mode(path, original_mode)
except BaseException:
# Intentionally catch BaseException so temp-file cleanup still runs for
# KeyboardInterrupt/SystemExit before re-raising the original signal.
@ -106,6 +135,8 @@ def atomic_yaml_write(
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
original_mode = _preserve_file_mode(path)
fd, tmp_path = tempfile.mkstemp(
dir=str(path.parent),
prefix=f".{path.stem}_",
@ -119,6 +150,7 @@ def atomic_yaml_write(
f.flush()
os.fsync(f.fileno())
os.replace(tmp_path, path)
_restore_file_mode(path, original_mode)
except BaseException:
# Match atomic_json_write: cleanup must also happen for process-level
# interruptions before we re-raise them.