fix(file-ops): follow terminal env's live cwd in _exec instead of init-time cached cwd (#11912)

ShellFileOperations captured the terminal env's cwd at __init__ time and used that stale value for every subsequent _exec() call. When the user ran `cd` via the terminal tool, `env.cwd` updated but `ops.cwd` did not. Relative paths passed to patch_replace / read_file / write_file / search then targeted the ORIGINAL directory instead of the current one. Observed symptom in agent sessions: terminal: cd .worktrees/my-branch patch hermes_cli/main.py <old> <new> → returns {"success": true} with a plausible unified diff → but `git diff` in the worktree shows nothing → the patch landed in the main repo's checkout of main.py instead The diff looked legitimate because patch_replace computes it from the IN-MEMORY content vs new_content, not by re-reading the file. The write itself DID succeed — it just wrote to the wrong directory's copy of the same-named file. Fix: _exec() now resolves cwd from live sources in this order: 1. Explicit `cwd` arg (if provided by the caller) 2. Live `self.env.cwd` (tracks `cd` commands run via terminal) 3. Init-time `self.cwd` (fallback when env has no cwd attribute) Includes a 5-test regression suite covering: - cd followed by relative read follows live cwd - the exact reported bug: patch_replace with relative path after cd - explicit cwd= arg still wins over env.cwd - env without cwd attribute falls back to init-time cwd - patch_replace success reflects real file state (safety rail) Co-authored-by: teknium1 <teknium@nousresearch.com>
2026-04-25 00:51:20 +00:00 · 2026-04-17 19:26:40 -07:00 · 2026-04-17 19:26:40 -07:00 · d7ef562a05
commit d7ef562a05
parent 47010e0757
2 changed files with 211 additions and 6 deletions
--- a/tests/tools/test_file_ops_cwd_tracking.py
+++ b/tests/tools/test_file_ops_cwd_tracking.py
@ -0,0 +1,178 @@
+"""Regression tests for cwd-staleness in ShellFileOperations.
+
+The bug: ShellFileOperations captured the terminal env's cwd at __init__
+time and used that stale value for every subsequent _exec() call.  When
+a user ran ``cd`` via the terminal tool, ``env.cwd`` updated but
+``ops.cwd`` did not.  Relative paths passed to patch/read/write/search
+then targeted the wrong directory — typically the session's start dir
+instead of the current working directory.
+
+Observed symptom: patch_replace() returned ``success=True`` with a
+plausible diff, but the user's ``git diff`` showed no change (because
+the patch landed in a different directory's copy of the same file).
+
+Fix: _exec() now prefers the LIVE ``env.cwd`` over the init-time
+``self.cwd``.  Explicit ``cwd`` arg to _exec still wins over both.
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+
+import pytest
+
+from tools.file_operations import ShellFileOperations
+
+
+class _FakeEnv:
+    """Minimal terminal env that tracks cwd across execute() calls.
+
+    Matches the real ``BaseEnvironment`` contract: ``cwd`` attribute plus
+    an ``execute(command, cwd=...)`` method whose return dict carries
+    ``output`` and ``returncode``.  Commands are executed in a real
+    subdirectory so file system effects match production.
+    """
+
+    def __init__(self, start_cwd: str):
+        self.cwd = start_cwd
+        self.calls: list[dict] = []
+
+    def execute(self, command: str, cwd: str = None, **kwargs) -> dict:
+        import subprocess
+        self.calls.append({"command": command, "cwd": cwd})
+        # Simulate cd by updating self.cwd (the real env does the same
+        # via _extract_cwd_from_output after a successful command)
+        if command.strip().startswith("cd "):
+            new = command.strip()[3:].strip()
+            self.cwd = new
+            return {"output": "", "returncode": 0}
+        # Actually run the command — handle stdin via subprocess
+        stdin_data = kwargs.get("stdin_data")
+        proc = subprocess.run(
+            ["bash", "-c", command],
+            cwd=cwd or self.cwd,
+            input=stdin_data,
+            capture_output=True,
+            text=True,
+        )
+        return {
+            "output": proc.stdout + proc.stderr,
+            "returncode": proc.returncode,
+        }
+
+
+class TestShellFileOpsCwdTracking:
+    """_exec() must use live env.cwd, not the init-time cached cwd."""
+
+    def test_exec_follows_env_cwd_after_cd(self, tmp_path):
+        dir_a = tmp_path / "a"
+        dir_b = tmp_path / "b"
+        dir_a.mkdir()
+        dir_b.mkdir()
+        (dir_a / "target.txt").write_text("content-a\n")
+        (dir_b / "target.txt").write_text("content-b\n")
+
+        env = _FakeEnv(start_cwd=str(dir_a))
+        ops = ShellFileOperations(env, cwd=str(dir_a))
+        assert ops.cwd == str(dir_a)  # init-time
+
+        # Simulate the user running `cd b` in terminal
+        env.execute(f"cd {dir_b}")
+        assert env.cwd == str(dir_b)
+        assert ops.cwd == str(dir_a), "ops.cwd is still init-time (fallback only)"
+
+        # Reading a relative path must now hit dir_b, not dir_a
+        result = ops._exec("cat target.txt")
+        assert result.exit_code == 0
+        assert "content-b" in result.stdout, (
+            f"Expected dir_b content, got {result.stdout!r}. "
+            "Stale ops.cwd leaked through — _exec must prefer env.cwd."
+        )
+
+    def test_patch_replace_targets_live_cwd_not_init_cwd(self, tmp_path):
+        """The exact bug reported: patch lands in wrong dir after cd."""
+        dir_a = tmp_path / "main"
+        dir_b = tmp_path / "worktree"
+        dir_a.mkdir()
+        dir_b.mkdir()
+        (dir_a / "t.txt").write_text("shared text\n")
+        (dir_b / "t.txt").write_text("shared text\n")
+
+        env = _FakeEnv(start_cwd=str(dir_a))
+        ops = ShellFileOperations(env, cwd=str(dir_a))
+
+        # Emulate user cd'ing into the worktree
+        env.execute(f"cd {dir_b}")
+        assert env.cwd == str(dir_b)
+
+        # Patch with a RELATIVE path — must target the worktree, not main
+        result = ops.patch_replace("t.txt", "shared text\n", "PATCHED\n")
+        assert result.success is True
+
+        assert (dir_b / "t.txt").read_text() == "PATCHED\n", (
+            "patch must land in the live-cwd dir (worktree)"
+        )
+        assert (dir_a / "t.txt").read_text() == "shared text\n", (
+            "patch must NOT land in the init-time dir (main)"
+        )
+
+    def test_explicit_cwd_arg_still_wins(self, tmp_path):
+        """An explicit cwd= arg to _exec must override both env.cwd and self.cwd."""
+        dir_a = tmp_path / "a"
+        dir_b = tmp_path / "b"
+        dir_c = tmp_path / "c"
+        for d in (dir_a, dir_b, dir_c):
+            d.mkdir()
+        (dir_a / "target.txt").write_text("from-a\n")
+        (dir_b / "target.txt").write_text("from-b\n")
+        (dir_c / "target.txt").write_text("from-c\n")
+
+        env = _FakeEnv(start_cwd=str(dir_a))
+        ops = ShellFileOperations(env, cwd=str(dir_a))
+        env.execute(f"cd {dir_b}")
+
+        # Explicit cwd=dir_c should win over env.cwd (dir_b) and self.cwd (dir_a)
+        result = ops._exec("cat target.txt", cwd=str(dir_c))
+        assert "from-c" in result.stdout
+
+    def test_env_without_cwd_attribute_falls_back_to_self_cwd(self, tmp_path):
+        """Backends without a cwd attribute still work via init-time cwd."""
+        dir_a = tmp_path / "fixed"
+        dir_a.mkdir()
+        (dir_a / "target.txt").write_text("fixed-content\n")
+
+        class _NoCwdEnv:
+            def execute(self, command, cwd=None, **kwargs):
+                import subprocess
+                proc = subprocess.run(["bash", "-c", command], cwd=cwd,
+                                      capture_output=True, text=True)
+                return {"output": proc.stdout, "returncode": proc.returncode}
+
+        env = _NoCwdEnv()
+        ops = ShellFileOperations(env, cwd=str(dir_a))
+        result = ops._exec("cat target.txt")
+        assert result.exit_code == 0
+        assert "fixed-content" in result.stdout
+
+    def test_patch_returns_success_only_when_file_actually_written(self, tmp_path):
+        """Safety rail: patch_replace success must reflect the real file state.
+
+        This test doesn't trigger the bug directly (it would require manual
+        corruption of the write), but it pins the invariant: when
+        patch_replace returns success=True, the file on disk matches the
+        intended content.  If a future write_file change ever regresses,
+        this test catches it.
+        """
+        target = tmp_path / "file.txt"
+        target.write_text("old content\n")
+
+        env = _FakeEnv(start_cwd=str(tmp_path))
+        ops = ShellFileOperations(env, cwd=str(tmp_path))
+
+        result = ops.patch_replace(str(target), "old content\n", "new content\n")
+        assert result.success is True
+        assert result.error is None
+        assert target.read_text() == "new content\n", (
+            "patch_replace claimed success but file wasn't written correctly"
+        )
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@ -330,11 +330,26 @@ class ShellFileOperations(FileOperations):
    def __init__(self, terminal_env, cwd: str = None):
        """
        Initialize file operations with a terminal environment.
-        
+
        Args:
            terminal_env: Any object with execute(command, cwd) method.
                         Returns {"output": str, "returncode": int}
-            cwd: Working directory (defaults to env's cwd or current directory)
+            cwd: Optional explicit fallback cwd when the terminal env has
+                 no cwd attribute (rare — most backends track cwd live).
+
+        Note:
+            Every _exec() call prefers the LIVE ``terminal_env.cwd`` over
+            ``self.cwd`` so ``cd`` commands run via the terminal tool are
+            picked up immediately.  ``self.cwd`` is only used as a fallback
+            when the env has no cwd at all — it is NOT the authoritative
+            cwd, despite being settable at init time.
+
+            Historical bug (fixed): prior versions of this class used the
+            init-time cwd for every _exec() call, which caused relative
+            paths passed to patch/read/write to target the wrong directory
+            after the user ran ``cd`` in the terminal.  Patches would
+            claim success and return a plausible diff but land in the
+            original directory, producing apparent silent failures.
        """
        self.env = terminal_env
        # Determine cwd from various possible sources.
@ -343,25 +358,37 @@ class ShellFileOperations(FileOperations):
        # If nothing provides a cwd, use "/" as a safe universal default.
        self.cwd = cwd or getattr(terminal_env, 'cwd', None) or \
                   getattr(getattr(terminal_env, 'config', None), 'cwd', None) or "/"
-        
+
        # Cache for command availability checks
        self._command_cache: Dict[str, bool] = {}
    
    def _exec(self, command: str, cwd: str = None, timeout: int = None,
              stdin_data: str = None) -> ExecuteResult:
        """Execute command via terminal backend.
-        
+
        Args:
            stdin_data: If provided, piped to the process's stdin instead of
                        embedding in the command string. Bypasses ARG_MAX.
+
+        Cwd resolution order (critical — see class docstring):
+          1. Explicit ``cwd`` arg (if provided)
+          2. Live ``self.env.cwd`` (tracks ``cd`` commands run via terminal)
+          3. Init-time ``self.cwd`` (fallback when env has no cwd attribute)
+
+        This ordering ensures relative paths in file operations follow the
+        terminal's current directory — not the directory this file_ops was
+        originally created in.  See test_file_ops_cwd_tracking.py.
        """
        kwargs = {}
        if timeout:
            kwargs['timeout'] = timeout
        if stdin_data is not None:
            kwargs['stdin_data'] = stdin_data
-        
-        result = self.env.execute(command, cwd=cwd or self.cwd, **kwargs)
+
+        # Resolve cwd from the live env so `cd` commands are picked up.
+        # Fall through to init-time self.cwd only if the env doesn't track cwd.
+        effective_cwd = cwd or getattr(self.env, 'cwd', None) or self.cwd
+        result = self.env.execute(command, cwd=effective_cwd, **kwargs)
        return ExecuteResult(
            stdout=result.get("output", ""),
            exit_code=result.get("returncode", 0)