From fc918867b2bcc311ba8992b73b519d7c49626f3e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Thu, 7 May 2026 17:51:57 -0700
Subject: [PATCH] fix(windows): quote cache paths in bash + augment PATH so
 rg/bash resolve on first launch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three interrelated bugs from teknium1's first interactive chat on Windows:

1. **Snapshot/cwd file paths unquoted in bash command strings.**  The session
   bootstrap and per-command wrapper interpolated
   ``self._snapshot_path`` / ``self._cwd_file`` unquoted into bash commands
   like ``export -p > C:/Users/ryanc/.../hermes-snap-xxx.sh``.  Git Bash's
   MSYS2 layer handles ``C:/...`` paths correctly ONLY when quoted; unquoted,
   the colon and forward-slash get glob-parsed and the redirect targets a
   bogus path.  Symptom: every terminal command emitted two
   ``C:/Users/.../hermes-snap-*.sh (No such file or directory)`` lines that
   bled into stdout (``stderr=STDOUT`` on the local backend) and corrupted
   file contents when the agent wrote to scratch paths via the terminal
   tool.  Fix: ``shlex.quote()`` every interpolation of ``_snapshot_path``
   and ``_cwd_file`` in base.py — no-op on POSIX (the paths contain no
   shell-metachars), critical on Windows.

2. **Stale PATH on first hermes launch after install.**  ``install.ps1``
   adds the PortableGit ``cmd`` / ``bin`` / ``usr\bin`` directories to the
   Windows **User** PATH via ``SetEnvironmentVariable(..., "User")``.  That
   write propagates to newly *spawned* processes only — already-running
   shells (including the one the user types ``hermes`` into immediately
   after install) retain their old PATH.  So hermes starts with a PATH that
   doesn't include bash, rg, grep, ssh — and ``search_files`` reports
   "rg/find not available" when the user clearly just installed them.

   Fix: new ``_augment_path_with_known_tools()`` helper called from
   ``configure_windows_stdio()`` on startup.  Prepends the Hermes-managed
   Git directories + the WinGet Links directory (where ripgrep lands) to
   ``os.environ['PATH']`` if they exist on disk but aren't already in
   PATH.  Subsequent subprocess calls (including bash spawns via
   ``_find_bash()``) inherit the augmented PATH and find everything.
   No-op on POSIX and when the directories don't exist.

3. **Root cause of "file content corruption".**  #1 was the proximate cause.
   Errors like ``C:/Users/.../hermes-snap-xxx.sh: No such file or directory``
   were emitted on stderr by the failed redirect, captured into stdout via
   ``stderr=subprocess.STDOUT``, and if the agent used terminal commands
   like ``cat > file`` the leaked error bytes became part of the file.
   Fixing #1 eliminates this entirely.

## Tests

All 77 Windows-compat tests still pass on Linux (POSIX path is
shlex.quote('/tmp/foo.sh') → '/tmp/foo.sh' — unchanged).

## Not addressed here (would need a bigger design)

- Python file tools (``write_file``, ``read_file``) and the bash-backed
  terminal tool see DIFFERENT views of ``/tmp`` on Windows.  Python treats
  ``/tmp`` as ``C:\tmp`` (drive-relative), Git Bash's MSYS2 treats it as
  a virtual mount to the PortableGit install's ``tmp\``.  Would need a
  translation shim in the Python tools to resolve bash-virtual paths to
  their native-Windows equivalents.  Workaround for users today: use
  absolute native paths (``C:\Users\you\...``) instead of ``/tmp/...``
  when crossing between terminal and Python file tools.
---
 hermes_cli/stdio.py        | 72 ++++++++++++++++++++++++++++++++++++++
 tools/environments/base.py | 36 +++++++++++++------
 2 files changed, 98 insertions(+), 10 deletions(-)

diff --git a/hermes_cli/stdio.py b/hermes_cli/stdio.py
index e010304d98..51c3f7ba53 100644
--- a/hermes_cli/stdio.py
+++ b/hermes_cli/stdio.py
@@ -127,6 +127,17 @@ def configure_windows_stdio() -> bool:
     if _default_editor and not os.environ.get("EDITOR") and not os.environ.get("VISUAL"):
         os.environ["EDITOR"] = _default_editor
 
+    # Augment PATH with the Hermes-managed Git install directories so
+    # subprocess calls (bash, rg, grep, etc.) resolve even in sessions
+    # that started before the User PATH broadcast reached them.  When
+    # install.ps1 adds these to User PATH via SetEnvironmentVariable,
+    # already-running shells don't see the change — which means hermes
+    # launched from the install session won't find rg / bash / grep
+    # even though they're "installed".  Prepending the known paths here
+    # closes that gap.  No-op when the paths don't exist (e.g. system-Git
+    # install without Hermes-managed PortableGit).
+    _augment_path_with_known_tools()
+
     # Flip the console code page first so that any subprocess that
     # inherits the console (e.g. a launched shell) also sees CP_UTF8.
     _flip_console_code_page_to_utf8()
@@ -178,3 +189,64 @@ def _default_windows_editor() -> str:
     # On the extreme off-chance notepad is missing (WinPE, Nano Server), fall
     # back to nothing and let prompt_toolkit's silent no-op do its thing.
     return ""
+
+
+
+def _augment_path_with_known_tools() -> None:
+    """Prepend well-known Hermes-managed tool directories to os.environ['PATH'].
+
+    Fixes the "User PATH was just updated but my process can't see it" gap on
+    Windows.  When install.ps1 runs, it adds entries like
+    ``%LOCALAPPDATA%\\hermes\\git\\bin`` to the User PATH via
+    ``SetEnvironmentVariable(..., "User")``.  That write propagates to newly
+    *spawned* processes only — already-running shells (including the one the
+    user invokes ``hermes`` from right after install) retain their old PATH.
+
+    Any subprocess Hermes spawns — bash, ``rg``, ``grep``, ``npm`` — inherits
+    that stale PATH and reports commands as missing even though they're on
+    disk.  Symptom: ``search_files`` reports "rg/find not available" when
+    the user clearly just installed ripgrep.
+
+    Patch-up strategy: add the known Hermes-managed tool directories to our
+    PATH at startup so subprocess calls resolve correctly.  No-op on POSIX
+    and when the directories don't exist.  The User PATH broadcast still
+    happens in the background for future shells; this just smooths over
+    the first-launch gap.
+    """
+    if not is_windows():
+        return
+
+    import shutil as _shutil
+
+    local_appdata = os.environ.get("LOCALAPPDATA", "")
+    if not local_appdata:
+        return
+
+    # Known tool dirs installed by scripts/install.ps1.  Kept in sync with
+    # the PATH entries that installer adds to User scope — the two lists
+    # should match so this prefill fully mirrors what a fresh shell would
+    # see on next launch.
+    candidate_dirs = [
+        os.path.join(local_appdata, "hermes", "git", "cmd"),
+        os.path.join(local_appdata, "hermes", "git", "bin"),
+        os.path.join(local_appdata, "hermes", "git", "usr", "bin"),
+        # Hermes venv Scripts directory — host of the hermes.exe shim itself,
+        # also where any pip-installed console scripts land.  Usually already
+        # on PATH when the user invokes hermes, but harmless to include.
+        os.path.join(local_appdata, "hermes", "hermes-agent", "venv", "Scripts"),
+        # WinGet packages directory — where ``winget install`` drops CLI
+        # shims by default (ripgrep lands here as rg.exe).  Covers the case
+        # of a system-Git install + ripgrep-via-winget that isn't yet on
+        # the spawning shell's PATH.
+        os.path.join(local_appdata, "Microsoft", "WinGet", "Links"),
+    ]
+
+    existing = os.environ.get("PATH", "")
+    existing_lower = {p.lower() for p in existing.split(os.pathsep) if p}
+    prepend = []
+    for d in candidate_dirs:
+        if os.path.isdir(d) and d.lower() not in existing_lower:
+            prepend.append(d)
+
+    if prepend:
+        os.environ["PATH"] = os.pathsep.join([*prepend, existing])
diff --git a/tools/environments/base.py b/tools/environments/base.py
index f0264ba3c9..2420b06b1b 100644
--- a/tools/environments/base.py
+++ b/tools/environments/base.py
@@ -339,15 +339,24 @@ class BaseEnvironment(ABC):
         # change the working directory (e.g. bashrc `cd ~`).  Without this,
         # pwd -P captures the profile's directory, not terminal.cwd.
         _quoted_cwd = shlex.quote(self.cwd)
+        # Quote the snapshot / cwd-file paths so Git Bash on Windows handles
+        # ``C:/Users/...``-shaped paths without glob-splitting the colon or
+        # tripping on drive letters.  On POSIX this is a no-op (no colons /
+        # special chars in a /tmp path).  Previously unquoted interpolation
+        # caused ``C:/Users/.../hermes-snap-*.sh: No such file or directory``
+        # errors on Windows, leaking via stderr (merged into stdout on Linux
+        # backends) into every terminal-tool response.
+        _quoted_snap = shlex.quote(self._snapshot_path)
+        _quoted_cwd_file = shlex.quote(self._cwd_file)
         bootstrap = (
-            f"export -p > {self._snapshot_path}\n"
-            f"declare -f | grep -vE '^_[^_]' >> {self._snapshot_path}\n"
-            f"alias -p >> {self._snapshot_path}\n"
-            f"echo 'shopt -s expand_aliases' >> {self._snapshot_path}\n"
-            f"echo 'set +e' >> {self._snapshot_path}\n"
-            f"echo 'set +u' >> {self._snapshot_path}\n"
+            f"export -p > {_quoted_snap}\n"
+            f"declare -f | grep -vE '^_[^_]' >> {_quoted_snap}\n"
+            f"alias -p >> {_quoted_snap}\n"
+            f"echo 'shopt -s expand_aliases' >> {_quoted_snap}\n"
+            f"echo 'set +e' >> {_quoted_snap}\n"
+            f"echo 'set +u' >> {_quoted_snap}\n"
             f"builtin cd {_quoted_cwd} 2>/dev/null || true\n"
-            f"pwd -P > {self._cwd_file} 2>/dev/null || true\n"
+            f"pwd -P > {_quoted_cwd_file} 2>/dev/null || true\n"
             f"printf '\\n{self._cwd_marker}%s{self._cwd_marker}\\n' \"$(pwd -P)\"\n"
         )
         try:
@@ -389,6 +398,13 @@ class BaseEnvironment(ABC):
         re-dumps env vars, and emits CWD markers."""
         escaped = command.replace("'", "'\\''")
 
+        # Quote the snapshot / cwd-file paths so Git Bash on Windows handles
+        # ``C:/Users/...``-shaped paths without glob-splitting the colon or
+        # tripping on drive letters.  POSIX paths are unaffected.  See
+        # :meth:`init_session` for the same fix on the bootstrap block.
+        _quoted_snap = shlex.quote(self._snapshot_path)
+        _quoted_cwd_file = shlex.quote(self._cwd_file)
+
         parts = []
 
         # Source snapshot (env vars from previous commands).
@@ -399,7 +415,7 @@ class BaseEnvironment(ABC):
         # silent here, but the redirect is harmless.
         if self._snapshot_ready:
             parts.append(
-                f"source {self._snapshot_path} >/dev/null 2>&1 || true"
+                f"source {_quoted_snap} >/dev/null 2>&1 || true"
             )
 
         # Preserve bare ``~`` expansion, but rewrite ``~/...`` through
@@ -414,10 +430,10 @@ class BaseEnvironment(ABC):
 
         # Re-dump env vars to snapshot (last-writer-wins for concurrent calls)
         if self._snapshot_ready:
-            parts.append(f"export -p > {self._snapshot_path} 2>/dev/null || true")
+            parts.append(f"export -p > {_quoted_snap} 2>/dev/null || true")
 
         # Write CWD to file (local reads this) and stdout marker (remote parses this)
-        parts.append(f"pwd -P > {self._cwd_file} 2>/dev/null || true")
+        parts.append(f"pwd -P > {_quoted_cwd_file} 2>/dev/null || true")
         # Use a distinct line for the marker. The leading \n ensures
         # the marker starts on its own line even if the command doesn't
         # end with a newline (e.g. printf 'exact'). We'll strip this