fix: normalize checkpoint manager home-relative paths

Adds _normalize_path() helper that calls expanduser().resolve() to
properly handle tilde paths (e.g. ~/.hermes, ~/.config).  Previously
Path.resolve() alone treated ~ as a literal directory name, producing
invalid paths like /root/~/.hermes.

Also improves _run_git() error handling to distinguish missing working
directories from missing git executable, and adds pre-flight directory
validation.

Cherry-picked from PR #7898 by faishal882.
Fixes #7807
This commit is contained in:
faishal 2026-04-11 14:48:51 -07:00 committed by Teknium
parent ee39e88b03
commit 90352b2adf
2 changed files with 150 additions and 18 deletions

View file

@ -100,7 +100,7 @@ def _validate_file_path(file_path: str, working_dir: str) -> Optional[str]:
if os.path.isabs(file_path):
return f"File path must be relative, got absolute path: {file_path!r}"
# Resolve and check containment within working_dir
abs_workdir = Path(working_dir).resolve()
abs_workdir = _normalize_path(working_dir)
resolved = (abs_workdir / file_path).resolve()
try:
resolved.relative_to(abs_workdir)
@ -113,18 +113,24 @@ def _validate_file_path(file_path: str, working_dir: str) -> Optional[str]:
# Shadow repo helpers
# ---------------------------------------------------------------------------
def _normalize_path(path_value: str) -> Path:
"""Return a canonical absolute path for checkpoint operations."""
return Path(path_value).expanduser().resolve()
def _shadow_repo_path(working_dir: str) -> Path:
"""Deterministic shadow repo path: sha256(abs_path)[:16]."""
abs_path = str(Path(working_dir).resolve())
abs_path = str(_normalize_path(working_dir))
dir_hash = hashlib.sha256(abs_path.encode()).hexdigest()[:16]
return CHECKPOINT_BASE / dir_hash
def _git_env(shadow_repo: Path, working_dir: str) -> dict:
"""Build env dict that redirects git to the shadow repo."""
normalized_working_dir = _normalize_path(working_dir)
env = os.environ.copy()
env["GIT_DIR"] = str(shadow_repo)
env["GIT_WORK_TREE"] = str(Path(working_dir).resolve())
env["GIT_WORK_TREE"] = str(normalized_working_dir)
env.pop("GIT_INDEX_FILE", None)
env.pop("GIT_NAMESPACE", None)
env.pop("GIT_ALTERNATE_OBJECT_DIRECTORIES", None)
@ -144,7 +150,17 @@ def _run_git(
exits while preserving the normal ``ok = (returncode == 0)`` contract.
Example: ``git diff --cached --quiet`` returns 1 when changes exist.
"""
env = _git_env(shadow_repo, working_dir)
normalized_working_dir = _normalize_path(working_dir)
if not normalized_working_dir.exists():
msg = f"working directory not found: {normalized_working_dir}"
logger.error("Git command skipped: %s (%s)", " ".join(["git"] + list(args)), msg)
return False, "", msg
if not normalized_working_dir.is_dir():
msg = f"working directory is not a directory: {normalized_working_dir}"
logger.error("Git command skipped: %s (%s)", " ".join(["git"] + list(args)), msg)
return False, "", msg
env = _git_env(shadow_repo, str(normalized_working_dir))
cmd = ["git"] + list(args)
allowed_returncodes = allowed_returncodes or set()
try:
@ -154,7 +170,7 @@ def _run_git(
text=True,
timeout=timeout,
env=env,
cwd=str(Path(working_dir).resolve()),
cwd=str(normalized_working_dir),
)
ok = result.returncode == 0
stdout = result.stdout.strip()
@ -169,9 +185,14 @@ def _run_git(
msg = f"git timed out after {timeout}s: {' '.join(cmd)}"
logger.error(msg, exc_info=True)
return False, "", msg
except FileNotFoundError:
logger.error("Git executable not found: %s", " ".join(cmd), exc_info=True)
return False, "", "git not found"
except FileNotFoundError as exc:
missing_target = getattr(exc, "filename", None)
if missing_target == "git":
logger.error("Git executable not found: %s", " ".join(cmd), exc_info=True)
return False, "", "git not found"
msg = f"working directory not found: {normalized_working_dir}"
logger.error("Git command failed before execution: %s (%s)", " ".join(cmd), msg, exc_info=True)
return False, "", msg
except Exception as exc:
logger.error("Unexpected git error running %s: %s", " ".join(cmd), exc, exc_info=True)
return False, "", str(exc)
@ -198,7 +219,7 @@ def _init_shadow_repo(shadow_repo: Path, working_dir: str) -> Optional[str]:
)
(shadow_repo / "HERMES_WORKDIR").write_text(
str(Path(working_dir).resolve()) + "\n", encoding="utf-8"
str(_normalize_path(working_dir)) + "\n", encoding="utf-8"
)
logger.debug("Initialised checkpoint repo at %s for %s", shadow_repo, working_dir)
@ -273,7 +294,7 @@ class CheckpointManager:
if not self._git_available:
return False
abs_dir = str(Path(working_dir).resolve())
abs_dir = str(_normalize_path(working_dir))
# Skip root, home, and other overly broad directories
if abs_dir in ("/", str(Path.home())):
@ -298,7 +319,7 @@ class CheckpointManager:
Returns a list of dicts with keys: hash, short_hash, timestamp, reason,
files_changed, insertions, deletions. Most recent first.
"""
abs_dir = str(Path(working_dir).resolve())
abs_dir = str(_normalize_path(working_dir))
shadow = _shadow_repo_path(abs_dir)
if not (shadow / "HEAD").exists():
@ -360,7 +381,7 @@ class CheckpointManager:
if hash_err:
return {"success": False, "error": hash_err}
abs_dir = str(Path(working_dir).resolve())
abs_dir = str(_normalize_path(working_dir))
shadow = _shadow_repo_path(abs_dir)
if not (shadow / "HEAD").exists():
@ -418,7 +439,7 @@ class CheckpointManager:
if hash_err:
return {"success": False, "error": hash_err}
abs_dir = str(Path(working_dir).resolve())
abs_dir = str(_normalize_path(working_dir))
# Validate file_path to prevent path traversal outside the working dir
if file_path:
@ -474,7 +495,7 @@ class CheckpointManager:
(directory containing .git, pyproject.toml, package.json, etc.).
Falls back to the file's parent directory.
"""
path = Path(file_path).resolve()
path = _normalize_path(file_path)
if path.is_dir():
candidate = path
else: