From fc918867b2bcc311ba8992b73b519d7c49626f3e Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 7 May 2026 17:51:57 -0700 Subject: [PATCH] fix(windows): quote cache paths in bash + augment PATH so rg/bash resolve on first launch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three interrelated bugs from teknium1's first interactive chat on Windows: 1. **Snapshot/cwd file paths unquoted in bash command strings.** The session bootstrap and per-command wrapper interpolated ``self._snapshot_path`` / ``self._cwd_file`` unquoted into bash commands like ``export -p > C:/Users/ryanc/.../hermes-snap-xxx.sh``. Git Bash's MSYS2 layer handles ``C:/...`` paths correctly ONLY when quoted; unquoted, the colon and forward-slash get glob-parsed and the redirect targets a bogus path. Symptom: every terminal command emitted two ``C:/Users/.../hermes-snap-*.sh (No such file or directory)`` lines that bled into stdout (``stderr=STDOUT`` on the local backend) and corrupted file contents when the agent wrote to scratch paths via the terminal tool. Fix: ``shlex.quote()`` every interpolation of ``_snapshot_path`` and ``_cwd_file`` in base.py — no-op on POSIX (the paths contain no shell-metachars), critical on Windows. 2. **Stale PATH on first hermes launch after install.** ``install.ps1`` adds the PortableGit ``cmd`` / ``bin`` / ``usr\bin`` directories to the Windows **User** PATH via ``SetEnvironmentVariable(..., "User")``. That write propagates to newly *spawned* processes only — already-running shells (including the one the user types ``hermes`` into immediately after install) retain their old PATH. So hermes starts with a PATH that doesn't include bash, rg, grep, ssh — and ``search_files`` reports "rg/find not available" when the user clearly just installed them. Fix: new ``_augment_path_with_known_tools()`` helper called from ``configure_windows_stdio()`` on startup. Prepends the Hermes-managed Git directories + the WinGet Links directory (where ripgrep lands) to ``os.environ['PATH']`` if they exist on disk but aren't already in PATH. Subsequent subprocess calls (including bash spawns via ``_find_bash()``) inherit the augmented PATH and find everything. No-op on POSIX and when the directories don't exist. 3. **Root cause of "file content corruption".** #1 was the proximate cause. Errors like ``C:/Users/.../hermes-snap-xxx.sh: No such file or directory`` were emitted on stderr by the failed redirect, captured into stdout via ``stderr=subprocess.STDOUT``, and if the agent used terminal commands like ``cat > file`` the leaked error bytes became part of the file. Fixing #1 eliminates this entirely. ## Tests All 77 Windows-compat tests still pass on Linux (POSIX path is shlex.quote('/tmp/foo.sh') → '/tmp/foo.sh' — unchanged). ## Not addressed here (would need a bigger design) - Python file tools (``write_file``, ``read_file``) and the bash-backed terminal tool see DIFFERENT views of ``/tmp`` on Windows. Python treats ``/tmp`` as ``C:\tmp`` (drive-relative), Git Bash's MSYS2 treats it as a virtual mount to the PortableGit install's ``tmp\``. Would need a translation shim in the Python tools to resolve bash-virtual paths to their native-Windows equivalents. Workaround for users today: use absolute native paths (``C:\Users\you\...``) instead of ``/tmp/...`` when crossing between terminal and Python file tools. --- hermes_cli/stdio.py | 72 ++++++++++++++++++++++++++++++++++++++ tools/environments/base.py | 36 +++++++++++++------ 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/hermes_cli/stdio.py b/hermes_cli/stdio.py index e010304d98..51c3f7ba53 100644 --- a/hermes_cli/stdio.py +++ b/hermes_cli/stdio.py @@ -127,6 +127,17 @@ def configure_windows_stdio() -> bool: if _default_editor and not os.environ.get("EDITOR") and not os.environ.get("VISUAL"): os.environ["EDITOR"] = _default_editor + # Augment PATH with the Hermes-managed Git install directories so + # subprocess calls (bash, rg, grep, etc.) resolve even in sessions + # that started before the User PATH broadcast reached them. When + # install.ps1 adds these to User PATH via SetEnvironmentVariable, + # already-running shells don't see the change — which means hermes + # launched from the install session won't find rg / bash / grep + # even though they're "installed". Prepending the known paths here + # closes that gap. No-op when the paths don't exist (e.g. system-Git + # install without Hermes-managed PortableGit). + _augment_path_with_known_tools() + # Flip the console code page first so that any subprocess that # inherits the console (e.g. a launched shell) also sees CP_UTF8. _flip_console_code_page_to_utf8() @@ -178,3 +189,64 @@ def _default_windows_editor() -> str: # On the extreme off-chance notepad is missing (WinPE, Nano Server), fall # back to nothing and let prompt_toolkit's silent no-op do its thing. return "" + + + +def _augment_path_with_known_tools() -> None: + """Prepend well-known Hermes-managed tool directories to os.environ['PATH']. + + Fixes the "User PATH was just updated but my process can't see it" gap on + Windows. When install.ps1 runs, it adds entries like + ``%LOCALAPPDATA%\\hermes\\git\\bin`` to the User PATH via + ``SetEnvironmentVariable(..., "User")``. That write propagates to newly + *spawned* processes only — already-running shells (including the one the + user invokes ``hermes`` from right after install) retain their old PATH. + + Any subprocess Hermes spawns — bash, ``rg``, ``grep``, ``npm`` — inherits + that stale PATH and reports commands as missing even though they're on + disk. Symptom: ``search_files`` reports "rg/find not available" when + the user clearly just installed ripgrep. + + Patch-up strategy: add the known Hermes-managed tool directories to our + PATH at startup so subprocess calls resolve correctly. No-op on POSIX + and when the directories don't exist. The User PATH broadcast still + happens in the background for future shells; this just smooths over + the first-launch gap. + """ + if not is_windows(): + return + + import shutil as _shutil + + local_appdata = os.environ.get("LOCALAPPDATA", "") + if not local_appdata: + return + + # Known tool dirs installed by scripts/install.ps1. Kept in sync with + # the PATH entries that installer adds to User scope — the two lists + # should match so this prefill fully mirrors what a fresh shell would + # see on next launch. + candidate_dirs = [ + os.path.join(local_appdata, "hermes", "git", "cmd"), + os.path.join(local_appdata, "hermes", "git", "bin"), + os.path.join(local_appdata, "hermes", "git", "usr", "bin"), + # Hermes venv Scripts directory — host of the hermes.exe shim itself, + # also where any pip-installed console scripts land. Usually already + # on PATH when the user invokes hermes, but harmless to include. + os.path.join(local_appdata, "hermes", "hermes-agent", "venv", "Scripts"), + # WinGet packages directory — where ``winget install`` drops CLI + # shims by default (ripgrep lands here as rg.exe). Covers the case + # of a system-Git install + ripgrep-via-winget that isn't yet on + # the spawning shell's PATH. + os.path.join(local_appdata, "Microsoft", "WinGet", "Links"), + ] + + existing = os.environ.get("PATH", "") + existing_lower = {p.lower() for p in existing.split(os.pathsep) if p} + prepend = [] + for d in candidate_dirs: + if os.path.isdir(d) and d.lower() not in existing_lower: + prepend.append(d) + + if prepend: + os.environ["PATH"] = os.pathsep.join([*prepend, existing]) diff --git a/tools/environments/base.py b/tools/environments/base.py index f0264ba3c9..2420b06b1b 100644 --- a/tools/environments/base.py +++ b/tools/environments/base.py @@ -339,15 +339,24 @@ class BaseEnvironment(ABC): # change the working directory (e.g. bashrc `cd ~`). Without this, # pwd -P captures the profile's directory, not terminal.cwd. _quoted_cwd = shlex.quote(self.cwd) + # Quote the snapshot / cwd-file paths so Git Bash on Windows handles + # ``C:/Users/...``-shaped paths without glob-splitting the colon or + # tripping on drive letters. On POSIX this is a no-op (no colons / + # special chars in a /tmp path). Previously unquoted interpolation + # caused ``C:/Users/.../hermes-snap-*.sh: No such file or directory`` + # errors on Windows, leaking via stderr (merged into stdout on Linux + # backends) into every terminal-tool response. + _quoted_snap = shlex.quote(self._snapshot_path) + _quoted_cwd_file = shlex.quote(self._cwd_file) bootstrap = ( - f"export -p > {self._snapshot_path}\n" - f"declare -f | grep -vE '^_[^_]' >> {self._snapshot_path}\n" - f"alias -p >> {self._snapshot_path}\n" - f"echo 'shopt -s expand_aliases' >> {self._snapshot_path}\n" - f"echo 'set +e' >> {self._snapshot_path}\n" - f"echo 'set +u' >> {self._snapshot_path}\n" + f"export -p > {_quoted_snap}\n" + f"declare -f | grep -vE '^_[^_]' >> {_quoted_snap}\n" + f"alias -p >> {_quoted_snap}\n" + f"echo 'shopt -s expand_aliases' >> {_quoted_snap}\n" + f"echo 'set +e' >> {_quoted_snap}\n" + f"echo 'set +u' >> {_quoted_snap}\n" f"builtin cd {_quoted_cwd} 2>/dev/null || true\n" - f"pwd -P > {self._cwd_file} 2>/dev/null || true\n" + f"pwd -P > {_quoted_cwd_file} 2>/dev/null || true\n" f"printf '\\n{self._cwd_marker}%s{self._cwd_marker}\\n' \"$(pwd -P)\"\n" ) try: @@ -389,6 +398,13 @@ class BaseEnvironment(ABC): re-dumps env vars, and emits CWD markers.""" escaped = command.replace("'", "'\\''") + # Quote the snapshot / cwd-file paths so Git Bash on Windows handles + # ``C:/Users/...``-shaped paths without glob-splitting the colon or + # tripping on drive letters. POSIX paths are unaffected. See + # :meth:`init_session` for the same fix on the bootstrap block. + _quoted_snap = shlex.quote(self._snapshot_path) + _quoted_cwd_file = shlex.quote(self._cwd_file) + parts = [] # Source snapshot (env vars from previous commands). @@ -399,7 +415,7 @@ class BaseEnvironment(ABC): # silent here, but the redirect is harmless. if self._snapshot_ready: parts.append( - f"source {self._snapshot_path} >/dev/null 2>&1 || true" + f"source {_quoted_snap} >/dev/null 2>&1 || true" ) # Preserve bare ``~`` expansion, but rewrite ``~/...`` through @@ -414,10 +430,10 @@ class BaseEnvironment(ABC): # Re-dump env vars to snapshot (last-writer-wins for concurrent calls) if self._snapshot_ready: - parts.append(f"export -p > {self._snapshot_path} 2>/dev/null || true") + parts.append(f"export -p > {_quoted_snap} 2>/dev/null || true") # Write CWD to file (local reads this) and stdout marker (remote parses this) - parts.append(f"pwd -P > {self._cwd_file} 2>/dev/null || true") + parts.append(f"pwd -P > {_quoted_cwd_file} 2>/dev/null || true") # Use a distinct line for the marker. The leading \n ensures # the marker starts on its own line even if the command doesn't # end with a newline (e.g. printf 'exact'). We'll strip this