fix(utils): copy fallback for atomic replace across devices (#43852)

Fallback from `os.replace` on EXDEV/EBUSY using copy+fsync+unlink while preserving symlink target semantics and metadata.
This commit is contained in:
Teknium 2026-06-13 14:50:05 -07:00 committed by GitHub
parent 817f392311
commit bf8effad02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 139 additions and 2 deletions

View file

@ -12,6 +12,7 @@ the codebase were migrated to the helper; these tests pin that invariant.
"""
from __future__ import annotations
import errno
import json
import os
import sys
@ -158,3 +159,113 @@ def test_atomic_replace_broken_symlink_creates_target(tmp_path: Path) -> None:
assert link.is_symlink(), "symlink must be preserved"
assert missing.exists(), "real target should now exist"
assert missing.read_text(encoding="utf-8") == "created-through-link\n"
# ─── EXDEV / EBUSY copy fallback ───────────────────────────────────────────
@pytest.mark.parametrize("fail_errno", [errno.EXDEV, errno.EBUSY])
def test_atomic_replace_copy_fallback(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch, fail_errno: int
) -> None:
target = tmp_path / "config.yaml"
target.write_text("old\n", encoding="utf-8")
tmp = _write_tmp(tmp_path, "new\n")
def fail_replace(src: str, dst: str) -> None:
raise OSError(fail_errno, os.strerror(fail_errno), src, None, dst)
monkeypatch.setattr("utils.os.replace", fail_replace)
assert Path(atomic_replace(tmp, target)) == target
assert target.read_text(encoding="utf-8") == "new\n"
assert not tmp.exists()
def test_atomic_replace_copy_fallback_preserves_symlink(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
real = tmp_path / "real.yaml"
link = tmp_path / "link.yaml"
real.write_text("old\n", encoding="utf-8")
link.symlink_to(real)
tmp = _write_tmp(tmp_path, "new\n")
def fail_replace(src: str, dst: str) -> None:
raise OSError(errno.EXDEV, os.strerror(errno.EXDEV), src, None, dst)
monkeypatch.setattr("utils.os.replace", fail_replace)
assert Path(atomic_replace(tmp, link)) == real
assert link.is_symlink()
assert real.read_text(encoding="utf-8") == "new\n"
assert not tmp.exists()
def test_atomic_replace_copy_fallback_preserves_metadata(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
if os.name != "posix":
pytest.skip("POSIX-only")
target = tmp_path / "config.yaml"
target.write_text("old\n", encoding="utf-8")
os.chmod(target, 0o600)
tmp = _write_tmp(tmp_path, "new\n")
os.chmod(tmp, 0o644)
def fail_replace(src: str, dst: str) -> None:
raise OSError(errno.EBUSY, os.strerror(errno.EBUSY), src, None, dst)
monkeypatch.setattr("utils.os.replace", fail_replace)
atomic_replace(tmp, target)
assert target.read_text(encoding="utf-8") == "new\n"
assert target.stat().st_mode & 0o777 == 0o644
def test_atomic_replace_other_oserror_propagates(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
target = tmp_path / "config.yaml"
target.write_text("old\n", encoding="utf-8")
tmp = _write_tmp(tmp_path, "new\n")
def fail_replace(src: str, dst: str) -> None:
raise OSError(errno.EACCES, os.strerror(errno.EACCES), src, None, dst)
monkeypatch.setattr("utils.os.replace", fail_replace)
with pytest.raises(OSError) as excinfo:
atomic_replace(tmp, target)
assert excinfo.value.errno == errno.EACCES
assert target.read_text(encoding="utf-8") == "old\n"
assert tmp.exists()
def test_atomic_replace_real_cross_device(tmp_path: Path) -> None:
shm = Path("/dev/shm")
if os.name != "posix" or not os.access(shm, os.W_OK):
pytest.skip("requires writable /dev/shm")
import shutil as _shutil
import uuid as _uuid
other_fs_dir = shm / f"hermes-exdev-test-{_uuid.uuid4().hex[:8]}"
other_fs_dir.mkdir()
try:
real = other_fs_dir / "config.yaml"
real.write_text("old\n", encoding="utf-8")
if os.stat(real).st_dev == os.stat(tmp_path).st_dev:
pytest.skip("/dev/shm is not a separate filesystem here")
link = tmp_path / "config.yaml"
link.symlink_to(real)
tmp = _write_tmp(tmp_path, "new\n")
assert Path(atomic_replace(tmp, link)) == real
assert link.is_symlink()
assert real.read_text(encoding="utf-8") == "new\n"
assert not tmp.exists()
finally:
_shutil.rmtree(other_fs_dir, ignore_errors=True)

View file

@ -1,8 +1,10 @@
"""Shared utility functions for hermes-agent."""
import errno
import json
import logging
import os
import shutil
import stat
import tempfile
from pathlib import Path
@ -71,14 +73,38 @@ def atomic_replace(tmp_path: Union[str, Path], target: Union[str, Path]) -> str:
This helper resolves the symlink first so ``os.replace`` writes to
the real file in-place while the symlink survives. For non-symlink
and non-existent paths the behavior is identical to a plain
``os.replace`` call.
``os.replace`` call unless the rename fails with ``EXDEV`` or ``EBUSY``;
those cases fall back to copy/fsync/unlink for cross-device, bind-mount,
and busy-file deployments.
Returns the resolved real path used for the replace, so callers that
need to re-apply permissions can target it instead of the symlink.
"""
target_str = str(target)
real_path = os.path.realpath(target_str) if os.path.islink(target_str) else target_str
os.replace(str(tmp_path), real_path)
tmp_str = str(tmp_path)
try:
os.replace(tmp_str, real_path)
except OSError as exc:
if exc.errno not in (errno.EXDEV, errno.EBUSY):
raise
logger.debug(
"atomic_replace: %s -> %s failed with %s; falling back to copy",
tmp_str,
real_path,
errno.errorcode.get(exc.errno, exc.errno),
)
shutil.copyfile(tmp_str, real_path)
try:
shutil.copystat(tmp_str, real_path)
except OSError:
pass
try:
with open(real_path, "rb") as f:
os.fsync(f.fileno())
except OSError:
pass
os.unlink(tmp_str)
return real_path