fix(file-ops): make rg/grep search error guard reachable and preserve partial matches (#39858)

The error guard in _search_with_rg/_search_with_grep was unreachable and, if it had fired, would have discarded valid results. Two root causes: 1. Unreachable. Both methods pipe the search through `| head` with no pipefail, so the pipeline reported head's exit code (0), masking rg/grep's error code (2). The guard never fired. Worse, because _exec merges stderr into stdout (stderr=subprocess.STDOUT), the error text was then parsed as bogus match lines instead of being surfaced — the user got garbage matches with no indication the search failed. 2. Latent results-dropping. The original `not result.stdout.strip()` check was always False on error (error text lives in stdout), and the `hasattr(result, 'stderr')` branch was dead code (ExecuteResult has no stderr field). A naive broadening to `exit_code == 2` would have nuked real matches whenever rg/grep also hit a non-fatal error (e.g. one unreadable file in a tree that otherwise matched), which both tools signal with exit 2. Fix: - Prefix the piped command with `set -o pipefail` so rg/grep's real exit status propagates. rg exits 0 on a truncating head; grep exits 141 (SIGPIPE), so the strict `== 2` guard ignores truncated-success. - Add _split_tool_diagnostics() to separate tool diagnostics from match output by tool prefix and output shape. Diagnostics never become matches; on a hard error they are the message to surface. - Only surface an error when exit==2 AND no usable match payload remains, so partial errors keep their real matches. Tests: tests/tools/test_search_error_guard.py drives both methods through the real local backend (hard error surfaced, partial error keeps matches, truncation no false error, files_only/count exclude diagnostics) plus unit coverage for the splitter. Supersedes #39710.
2026-07-24 16:54:43 +00:00 · 2026-06-05 17:44:52 -07:00 · 2026-06-05 17:44:52 -07:00 · ea266f43e9
commit ea266f43e9
parent 66a6b9c930
2 changed files with 266 additions and 18 deletions
--- a/tests/tools/test_search_error_guard.py
+++ b/tests/tools/test_search_error_guard.py
@ -0,0 +1,160 @@
+"""Regression tests for the rg/grep error guard in content search.
+
+The guard in ``_search_with_rg`` / ``_search_with_grep`` had two defects on
+``origin/main`` (see PR replacing #39710):
+
+1. **Unreachable on a hard error.** Both methods pipe the search through
+   ``| head`` with no ``pipefail``, so the pipeline reported head's exit code
+   (0), masking rg/grep's error code (2). The guard never fired, and the
+   error text — merged into stdout by ``_exec`` (``stderr=subprocess.STDOUT``)
+   — was parsed as bogus match lines instead of being surfaced.
+
+2. **Would have nuked partial results if it ever did fire.** A broad
+   ``exit_code == 2`` check discards real matches whenever rg/grep also hit a
+   non-fatal error (e.g. one unreadable file in a tree that otherwise
+   matched), which both tools signal with exit 2.
+
+The fix adds ``set -o pipefail`` so the real exit code propagates, splits
+tool diagnostics from match output by *shape*, and only surfaces an error
+when exit==2 AND no usable match payload remains.
+
+These tests drive the real methods through the real local terminal backend.
+"""
+
+import os
+import shutil
+
+import pytest
+
+from tools.file_operations import (
+    ShellFileOperations,
+    _split_tool_diagnostics,
+)
+from tools.environments.local import LocalEnvironment
+
+
+def _ops(root):
+    return ShellFileOperations(LocalEnvironment(cwd=str(root)), cwd=str(root))
+
+
+@pytest.fixture
+def match_tree(tmp_path):
+    """A tree with several files all containing 'needle'."""
+    for i in range(5):
+        (tmp_path / f"f{i}.txt").write_text(f"needle line {i}\n")
+    return tmp_path
+
+
+@pytest.fixture
+def partial_error_tree(tmp_path):
+    """A tree with matches plus one unreadable file (forces exit 2 + matches)."""
+    for i in range(4):
+        (tmp_path / f"f{i}.txt").write_text(f"needle line {i}\n")
+    sub = tmp_path / "sub"
+    sub.mkdir()
+    locked = sub / "locked.txt"
+    locked.write_text("needle in locked\n")
+    os.chmod(locked, 0o000)
+    yield tmp_path
+    os.chmod(locked, 0o755)  # let pytest clean up tmp_path
+
+
+# Run every test once per available backend method.
+_METHODS = ["_search_with_grep"]
+if shutil.which("rg"):
+    _METHODS.append("_search_with_rg")
+
+
+def _search(ops, method, pattern, path, **kw):
+    fn = getattr(ops, method)
+    return fn(pattern, str(path), kw.get("file_glob"), kw.get("limit", 50),
+              kw.get("offset", 0), kw.get("output_mode", "content"),
+              kw.get("context", 0))
+
+
+@pytest.mark.parametrize("method", _METHODS)
+class TestSearchErrorGuard:
+    def test_happy_path_returns_matches(self, method, match_tree):
+        res = _search(_ops(match_tree), method, "needle", match_tree)
+        assert res.error is None
+        assert len(res.matches) == 5
+
+    def test_hard_error_is_surfaced(self, method, match_tree):
+        # An invalid regex makes rg/grep exit 2 with only diagnostics in
+        # stdout. The guard MUST surface it — not return empty matches.
+        res = _search(_ops(match_tree), method, "[", match_tree)
+        assert res.error is not None, "search error was silently swallowed"
+        assert "Search failed" in res.error
+        assert not res.matches
+
+    def test_partial_error_keeps_matches(self, method, partial_error_tree):
+        # rg/grep exit 2 because of the unreadable file, but the readable
+        # files matched. Those matches must be preserved, not discarded.
+        res = _search(_ops(partial_error_tree), method, "needle", partial_error_tree)
+        assert res.error is None, f"partial error wrongly surfaced: {res.error!r}"
+        assert len(res.matches) >= 4
+
+    def test_no_match_is_empty_not_error(self, method, match_tree):
+        res = _search(_ops(match_tree), method, "zzznomatchzzz", match_tree)
+        assert res.error is None
+        assert not res.matches
+
+    def test_truncation_no_false_error(self, method, tmp_path):
+        # head truncates a large result set. With pipefail, grep exits 141
+        # (SIGPIPE) on truncation; the strict `== 2` guard must ignore it.
+        big = tmp_path / "big.txt"
+        big.write_text("".join(f"needle {i}\n" for i in range(3000)))
+        res = _search(_ops(tmp_path), method, "needle", tmp_path, limit=5)
+        assert res.error is None, f"truncated success wrongly errored: {res.error!r}"
+        assert len(res.matches) == 5
+
+    def test_files_only_excludes_diagnostics(self, method, partial_error_tree):
+        # files_only mode must not list a diagnostic line as a fake file path.
+        res = _search(_ops(partial_error_tree), method, "needle",
+                      partial_error_tree, output_mode="files_only")
+        assert res.error is None
+        assert res.files, "expected matching files"
+        assert all("Permission denied" not in f and "locked.txt" not in f
+                   for f in res.files), f"diagnostic leaked into files: {res.files}"
+
+    def test_count_mode_with_partial_error(self, method, partial_error_tree):
+        res = _search(_ops(partial_error_tree), method, "needle",
+                      partial_error_tree, output_mode="count")
+        assert res.error is None
+        assert res.total_count >= 4
+
+
+class TestSplitToolDiagnostics:
+    """Unit coverage for the shape-based diagnostic/payload splitter."""
+
+    def test_pure_error_has_empty_payload(self):
+        out = "rg: regex parse error:\n    (?:[)\n       ^\nerror: unclosed character class\n"
+        diagnostics, payload = _split_tool_diagnostics(out)
+        assert payload.strip() == ""
+        assert "regex parse error" in diagnostics
+
+    def test_partial_error_separates_matches(self):
+        out = ("rg: sub/locked.txt: Permission denied (os error 13)\n"
+               "a.txt:1:needle here\nb.txt:2:needle there\n")
+        diagnostics, payload = _split_tool_diagnostics(out)
+        assert "Permission denied" in diagnostics
+        assert "a.txt:1:needle here" in payload
+        assert "b.txt:2:needle there" in payload
+        assert "Permission denied" not in payload
+
+    def test_files_only_is_payload(self):
+        diagnostics, payload = _split_tool_diagnostics("src/a.py\nsrc/b.py\n")
+        assert diagnostics == ""
+        assert payload == "src/a.py\nsrc/b.py"
+
+    def test_count_lines_are_payload(self):
+        diagnostics, payload = _split_tool_diagnostics("src/a.py:3\nsrc/b.py:1\n")
+        assert diagnostics == ""
+        assert "src/a.py:3" in payload
+
+    def test_context_lines_and_separator_are_payload(self):
+        out = "a.py:5:hit\na.py-6-after\n--\nb.py:9:hit\n"
+        diagnostics, payload = _split_tool_diagnostics(out)
+        assert diagnostics == ""
+        assert "--" in payload
+        assert "a.py-6-after" in payload
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@ -285,6 +285,63 @@ class ExecuteResult:
    exit_code: int = 0


+def _split_tool_diagnostics(output: str) -> tuple[str, str]:
+    """Separate rg/grep diagnostic lines from real match output.
+
+    ``_exec`` runs commands with ``stderr=subprocess.STDOUT``, so error and
+    warning text from ``rg``/``grep`` is interleaved with match lines in a
+    single stream. Diagnostics must not be parsed as matches, and on a hard
+    failure they are the error message to surface.
+
+    Returns ``(diagnostics, payload)`` where ``payload`` contains only lines
+    that look like real search output — a match line (``file:line:content``),
+    a files-only path, a count line, or a context line/separator. Everything
+    else (tool-prefixed errors, rg's multi-line ``regex parse error`` block
+    with its indented carets, blank lines) is folded into ``diagnostics``.
+
+    Classifying by *shape* rather than by error prefix is what lets the
+    exit-2 guard distinguish a pure failure (no usable payload → surface the
+    error) from a partial failure (some files matched, one was unreadable →
+    keep the matches). It also means error text can never be mis-parsed as a
+    match, a latent bug that predates the exit-code fix.
+    """
+    diagnostics: list[str] = []
+    payload: list[str] = []
+    for line in output.split('\n'):
+        if not line.strip():
+            continue
+        # Tool diagnostics always carry the "<tool>: " prefix (e.g.
+        # "rg: <file>: Permission denied", "grep: Invalid regular
+        # expression", "rg: regex parse error:"). Check this first: a real
+        # match path can legitimately contain "-<digit>" (e.g. a tmp dir like
+        # ".../pytest-686/..."), which the shape regex would otherwise treat
+        # as a match line.
+        stripped = line.lstrip()
+        if stripped.startswith("rg: ") or stripped.startswith("grep: "):
+            diagnostics.append(line)
+            continue
+        # Otherwise classify by output shape. rg's regex-parse-error block
+        # also emits an indented caret line and a trailing "error: ..." line
+        # with no tool prefix; neither matches a search-output shape, so they
+        # fall through to diagnostics.
+        #   match / count : "<path>:<...>"   (has a colon; rg -c uses path:count)
+        #   files_only    : "<path>"         (no whitespace, no leading colon)
+        #   context line  : "<path>-<line>-" or the "--" group separator
+        if line == "--" or _SEARCH_OUTPUT_RE.match(line):
+            payload.append(line)
+        else:
+            diagnostics.append(line)
+    return '\n'.join(diagnostics), '\n'.join(payload)
+
+
+# A real rg/grep output line starts with a path token and is followed by a
+# ``:`` (match/count), a ``-`` (context), or nothing (files_only). Tool
+# diagnostics ("rg: ...", "grep: ...", "error: ...", indented carets) never
+# match because the path token forbids whitespace and a leading tool prefix
+# like "rg" is followed by ": " (space) which the negated class rejects.
+_SEARCH_OUTPUT_RE = re.compile(r'^([A-Za-z]:)?[^\s:][^\n]*?[:\-]\d|^[^\s:][^\s]*$')
+
+
 def _parse_search_context_line(line: str) -> tuple[str, int, str] | None:
    """Parse grep/rg context output in ``path-line-content`` format.

@ -2038,24 +2095,40 @@ class ShellFileOperations(FileOperations):
        fetch_limit = limit + offset + 200 if context > 0 else limit + offset
        cmd_parts.extend(["|", "head", "-n", str(fetch_limit)])
        
-        cmd = " ".join(cmd_parts)
+        # `set -o pipefail` so rg's exit status propagates through `| head`.
+        # Without it the pipeline reports head's status (0), masking rg's
+        # error code (2) and making the guard below unreachable. rg handles a
+        # truncating head cleanly (exit 0 on SIGPIPE), so pipefail does not
+        # introduce false errors on a successful-but-truncated search.
+        cmd = "set -o pipefail; " + " ".join(cmd_parts)
        result = self._exec(cmd, timeout=60)
-        
-        # rg exit codes: 0=matches found, 1=no matches, 2=error
-        if result.exit_code == 2 and not result.stdout.strip():
-            error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error"
+
+        # _exec merges stderr into stdout (stderr=subprocess.STDOUT), so rg's
+        # diagnostic lines ("rg: <file>: <error>", "rg: regex parse error:")
+        # are interleaved with match output. Split them out: diagnostics must
+        # not be parsed as matches, and on a hard error they ARE the message.
+        diagnostics, payload = _split_tool_diagnostics(result.stdout)
+
+        # rg exit codes: 0=matches found, 1=no matches, 2=error. rg returns 2
+        # even on partial errors (e.g. one unreadable file in a tree that
+        # otherwise matched), so only surface an error when exit==2 AND no
+        # usable match payload remains. Otherwise we keep the real matches.
+        if result.exit_code == 2 and not payload.strip():
+            error_msg = diagnostics.strip() or result.stdout.strip() or "Search error"
            return SearchResult(error=f"Search failed: {error_msg}", total_count=0)
-        
+
+        # Parse the diagnostic-free payload so error text never becomes a match.
+        stdout = payload
        # Parse results based on output mode
        if output_mode == "files_only":
-            all_files = [f for f in result.stdout.strip().split('\n') if f]
+            all_files = [f for f in stdout.strip().split('\n') if f]
            total = len(all_files)
            page = all_files[offset:offset + limit]
            return SearchResult(files=page, total_count=total)
        
        elif output_mode == "count":
            counts = {}
-            for line in result.stdout.strip().split('\n'):
+            for line in stdout.strip().split('\n'):
                if ':' in line:
                    parts = line.rsplit(':', 1)
                    if len(parts) == 2:
@ -2074,7 +2147,7 @@ class ShellFileOperations(FileOperations):
            # so naive split(":") breaks. Use regex to handle both platforms.
            _match_re = re.compile(r'^([A-Za-z]:)?(.*?):(\d+):(.*)$')
            matches = []
-            for line in result.stdout.strip().split('\n'):
+            for line in stdout.strip().split('\n'):
                if not line or line == "--":
                    continue
                
@ -2138,23 +2211,38 @@ class ShellFileOperations(FileOperations):
        fetch_limit = limit + offset + (200 if context > 0 else 0)
        cmd_parts.extend(["|", "head", "-n", str(fetch_limit)])
        
-        cmd = " ".join(cmd_parts)
+        # `set -o pipefail` so grep's exit status propagates through `| head`
+        # (without it the pipeline reports head's 0, masking grep's error 2).
+        # A truncating head makes grep exit 141 (SIGPIPE) on an otherwise
+        # successful search; the strict `== 2` guard below ignores that, so
+        # pipefail does not turn truncated results into false errors.
+        cmd = "set -o pipefail; " + " ".join(cmd_parts)
        result = self._exec(cmd, timeout=60)
-        
-        # grep exit codes: 0=matches found, 1=no matches, 2=error
-        if result.exit_code == 2 and not result.stdout.strip():
-            error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error"
+
+        # _exec merges stderr into stdout, so grep's diagnostic lines
+        # ("grep: <file>: <error>") are interleaved with matches. Split them
+        # out so they're never parsed as matches and so a hard error has a
+        # clean message.
+        diagnostics, payload = _split_tool_diagnostics(result.stdout)
+
+        # grep exit codes: 0=matches found, 1=no matches, 2=error. grep
+        # returns 2 on partial errors (e.g. an unreadable file) even when
+        # other files matched, so only surface an error when exit==2 AND no
+        # usable match payload remains.
+        if result.exit_code == 2 and not payload.strip():
+            error_msg = diagnostics.strip() or result.stdout.strip() or "Search error"
            return SearchResult(error=f"Search failed: {error_msg}", total_count=0)
-        
+
+        stdout = payload
        if output_mode == "files_only":
-            all_files = [f for f in result.stdout.strip().split('\n') if f]
+            all_files = [f for f in stdout.strip().split('\n') if f]
            total = len(all_files)
            page = all_files[offset:offset + limit]
            return SearchResult(files=page, total_count=total)
        
        elif output_mode == "count":
            counts = {}
-            for line in result.stdout.strip().split('\n'):
+            for line in stdout.strip().split('\n'):
                if ':' in line:
                    parts = line.rsplit(':', 1)
                    if len(parts) == 2:
@ -2172,7 +2260,7 @@ class ShellFileOperations(FileOperations):
            # so naive split(":") breaks. Use regex to handle both platforms.
            _match_re = re.compile(r'^([A-Za-z]:)?(.*?):(\d+):(.*)$')
            matches = []
-            for line in result.stdout.strip().split('\n'):
+            for line in stdout.strip().split('\n'):
                if not line or line == "--":
                    continue