diff --git a/tests/tools/test_approval.py b/tests/tools/test_approval.py index b37d57555fb..62359601ea8 100644 --- a/tests/tools/test_approval.py +++ b/tests/tools/test_approval.py @@ -738,6 +738,64 @@ class TestSensitiveInPlaceEditPattern: assert key is None +class TestWindowsAbsolutePathFolding: + """Windows absolute home / Hermes-home prefixes must fold to ~/ and + ~/.hermes/ in dangerous-command detection. + + Regression: on native Windows the home prefix uses backslash separators + (``C:\\Users\\alice\\.ssh\\authorized_keys``). Detection stripped backslash + escapes *before* folding, dissolving those separators, so writes to startup, + SSH, and Hermes config/env files returned "safe" without an approval prompt. + The OS-specific ``Path.home()`` / ``get_hermes_home()`` tests above only + exercise this branch on a Windows host; these monkeypatch a Windows-style + HOME/HERMES_HOME so the fold is verified on the POSIX CI runner too.""" + + def test_windows_home_bashrc_folds(self, monkeypatch): + monkeypatch.setenv("HOME", r"C:\Users\tester") + dangerous, key, _ = detect_dangerous_command( + r"echo 'pwned' > C:\Users\tester\.bashrc" + ) + assert dangerous is True + assert key is not None + + def test_windows_home_ssh_authorized_keys_multiseg_folds(self, monkeypatch): + # The multi-segment suffix (\.ssh\authorized_keys) must also have its + # separators normalized, not just the home prefix. + monkeypatch.setenv("HOME", r"C:\Users\tester") + dangerous, key, _ = detect_dangerous_command( + r"cat key >> C:\Users\tester\.ssh\authorized_keys" + ) + assert dangerous is True + assert key is not None + + def test_windows_home_forward_slash_folds(self, monkeypatch): + monkeypatch.setenv("HOME", r"C:\Users\tester") + dangerous, key, _ = detect_dangerous_command( + "cat key >> C:/Users/tester/.ssh/authorized_keys" + ) + assert dangerous is True + assert key is not None + + def test_windows_hermes_home_config_folds(self, monkeypatch): + # Hermes home nests under the user home on Windows; it must fold before + # the user-home rewrite eats its prefix. + monkeypatch.setenv("HOME", r"C:\Users\tester") + monkeypatch.setenv("HERMES_HOME", r"C:\Users\tester\.hermes") + dangerous, key, _ = detect_dangerous_command( + r"sed -i 's/manual/off/' C:\Users\tester\.hermes\config.yaml" + ) + assert dangerous is True + assert key is not None + + def test_windows_unrelated_path_not_flagged(self, monkeypatch): + monkeypatch.setenv("HOME", r"C:\Users\tester") + dangerous, key, _ = detect_dangerous_command( + r"cp report.txt C:\Users\tester\notes.txt" + ) + assert dangerous is False + assert key is None + + class TestProjectSensitiveTeePattern: def test_tee_to_local_dotenv_requires_approval(self): dangerous, key, desc = detect_dangerous_command("printenv | tee .env.local") diff --git a/tools/approval.py b/tools/approval.py index 116cf80ddb8..e26aa423f5f 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -10,6 +10,7 @@ This module is the single source of truth for the dangerous command system: import contextvars import fnmatch +import functools import logging import os import re @@ -571,87 +572,136 @@ def _normalize_command_for_detection(command: str) -> str: command = command.replace('\x00', '') # Normalize Unicode (fullwidth Latin, halfwidth Katakana, etc.) command = unicodedata.normalize('NFKC', command) + # Fold absolute home / active-profile-home prefixes into their canonical + # ~/ and ~/.hermes/ forms so static user-sensitive patterns catch + # /home/alice/.bashrc and C:\Users\alice\.bashrc the same way they catch + # ~/.bashrc. Resolve at detection time (not via an import-time snapshot) so + # it tracks HOME / HERMES_HOME even when those are set after this module is + # imported — as the hermetic test conftest and profile/session launchers do. + # + # This MUST run before the backslash-escape strip below: on Windows the home + # prefix is separated by backslashes (C:\Users\alice\...), which that strip + # would otherwise dissolve (-> C:Usersalice) and make the fold impossible. + # The fold matches either separator, so POSIX paths are unaffected by order. + # + # Fold the (more specific) Hermes home first: on Windows it nests under the + # user home (C:\Users\alice\AppData\...\hermes), so folding the user home + # first would eat the prefix the Hermes-home fold needs. + command = _rewrite_resolved_hermes_home(command) + command = _rewrite_resolved_user_home(command) # Strip shell backslash-escapes: r\m → rm. Prevents \-injection bypass. command = re.sub(r'\\([^\n])', r'\1', command) # Strip empty-string literals that split tokens: r''m → rm, r"\"m → rm. command = re.sub(r"''|\"\"", '', command) - # Fold the current user's resolved absolute home path into ~/ at detection - # time so static user-sensitive patterns catch /home/alice/.bashrc the same - # way they catch ~/.bashrc. Do not snapshot this at import time: tests and - # profile/session launchers can set HOME after this module is imported. - command = _rewrite_resolved_user_home(command) - # Fold the resolved absolute active-profile home path into the canonical - # ~/.hermes/ form so the Hermes config/env patterns catch it. In Docker and - # gateway deployments the agent often references the resolved absolute path - # directly (e.g. `sed -i ... /home/hermes/.hermes/config.yaml`) rather than - # ~, $HOME, or $HERMES_HOME. Done at detection time (not via an import-time - # pattern snapshot) so it tracks the live HERMES_HOME even when that is set - # after this module is imported — as the hermetic test conftest does. - command = _rewrite_resolved_hermes_home(command) + return command + + +# Shell metacharacters, quotes, and whitespace that terminate a filesystem +# path token on a command line. Used to bound the path tail we normalize. +_PATH_TOKEN_STOP = r"""\s'"`;|&<>()""" +# One path segment (no separators, no terminators) preceded by a separator. +_PATH_TAIL = r"(?P(?:[/\\][^/\\" + _PATH_TOKEN_STOP + r"]*)+)" + + +@functools.lru_cache(maxsize=64) +def _home_prefix_fold_regex(path: str): + """Compile a regex matching *path* used as an absolute directory prefix. + + The home components are matched with either separator (``/`` or ``\\``) + between them, followed by the rest of the path token (the ``tail`` group), + so a Windows native path (``C:\\Users\\alice\\.ssh\\authorized_keys``), its + forward-slash form, and mixed-separator forms all fold — and the tail's + backslashes get normalized to ``/`` by the caller so multi-segment static + patterns (``~/.ssh/authorized_keys``) still match. The trailing tail is + required (``+``), so a bare home with no path under it is not folded. + + Returns ``None`` for an unset or degenerate path — one with fewer than two + components below the root — so a stray HOME / HERMES_HOME such as ``/``, + ``C:\\`` or ``""`` cannot rewrite unrelated filesystem prefixes. Cached + because the resolved home is stable across calls on this hot path. + """ + if not path: + return None + components = [c for c in re.split(r"[/\\]+", path) if c] + # Require at least two non-empty components below the root. For POSIX this + # mirrors the historical ``count("/") >= 2`` guard (``/home/alice`` folds, + # ``/home`` does not); for Windows it rejects a bare drive root (``C:\\``) + # while accepting a real home (``C:\\Users\\alice``). + if len(components) < 2: + return None + body = r"[/\\]+".join(re.escape(c) for c in components) + # Optional leading root separator (POSIX ``/`` or UNC ``\\``); a Windows + # drive letter is captured as the first component. + return re.compile(r"[/\\]*" + body + _PATH_TAIL) + + +def _fold_home_prefixes(command: str, paths, replacement: str) -> str: + """Fold each resolved home *path* prefix in *command* to *replacement*. + + *replacement* has no trailing separator (``~`` / ``~/.hermes``); the matched + path tail (with its backslashes normalized to ``/``) supplies it. Longest + candidate first so a deeper home (e.g. an explicit HOME under USERPROFILE) + folds before a shorter overlapping one that would otherwise clobber it. + """ + seen: set[str] = set() + for path in sorted((p for p in paths if p), key=len, reverse=True): + if path in seen: + continue + seen.add(path) + pattern = _home_prefix_fold_regex(path) + if pattern is not None: + command = pattern.sub( + lambda m: replacement + m.group("tail").replace("\\", "/"), + command, + ) return command def _rewrite_resolved_user_home(command: str) -> str: """Rewrite the current user's absolute home prefix to ``~/``. - Resolves HOME at detection time, including its symlink-resolved form, so - terminal commands targeting absolute home paths are checked by the same - static patterns as tilde and $HOME forms. No-op when HOME is unset or - degenerate. + Resolves the home at detection time — its expanduser form, symlink-resolved + form, and an explicitly set ``HOME`` — so absolute home paths are checked by + the same static patterns as tilde and ``$HOME`` forms. ``HOME`` is consulted + directly because Windows' ``os.path.expanduser`` resolves ``~`` from + ``USERPROFILE`` and ignores ``HOME``, unlike POSIX. Matches both POSIX + (``/home/alice``) and Windows (``C:\\Users\\alice`` or ``C:/Users/alice``) + separators. No-op when the home is unset or degenerate. """ try: home = os.path.expanduser("~") candidates = [ - home.rstrip("/"), - os.path.realpath(home).rstrip("/"), + home, + os.path.realpath(home), + os.environ.get("HOME", ""), ] except Exception: return command - seen: set[str] = set() - for path in candidates: - if not path or path in seen: - continue - seen.add(path) - # Require an absolute path below root so a bad HOME cannot rewrite the - # whole filesystem namespace. - normalized = path.rstrip("/") - if not normalized.startswith("/") or normalized.count("/") < 2: - continue - command = command.replace(normalized + "/", "~/") - return command + return _fold_home_prefixes(command, candidates, "~") def _rewrite_resolved_hermes_home(command: str) -> str: """Rewrite the resolved absolute Hermes home prefix to ``~/.hermes/``. Resolves the active ``HERMES_HOME`` at call time (and its symlink-resolved - form) and replaces an occurrence of ``/`` in *command* with + form) and folds an occurrence of ``/`` in *command* into ``~/.hermes/`` so the static ``_HERMES_CONFIG_PATH`` / ``_HERMES_ENV_PATH`` - patterns match. No-op when the path can't be resolved or doesn't appear. + patterns match. In Docker and gateway deployments the agent often references + the resolved absolute path directly (e.g. ``sed -i ... + /home/hermes/.hermes/config.yaml``) rather than ``~``, ``$HOME``, or + ``$HERMES_HOME``. Matches both POSIX and Windows separators. No-op when the + path can't be resolved or doesn't appear. """ try: from hermes_constants import get_hermes_home home = get_hermes_home().expanduser() candidates = [ - str(home).rstrip("/"), - str(home.resolve(strict=False)).rstrip("/"), + str(home), + str(home.resolve(strict=False)), ] except Exception: return command - seen: set[str] = set() - for path in candidates: - if not path or path in seen: - continue - seen.add(path) - # Guard against a degenerate HERMES_HOME (e.g. "/" or "") rewriting - # unrelated paths: require an absolute path with at least one non-root - # component. The active profile home is always a real directory like - # /home/hermes/.hermes or a per-test tempdir, never a bare root. - normalized = path.rstrip("/") - if not normalized.startswith("/") or normalized.count("/") < 2: - continue - command = command.replace(normalized + "/", "~/.hermes/") - return command + return _fold_home_prefixes(command, candidates, "~/.hermes") def detect_dangerous_command(command: str) -> tuple: