diff --git a/tests/tools/test_search_error_guard.py b/tests/tools/test_search_error_guard.py new file mode 100644 index 00000000000..aa76dba6cc3 --- /dev/null +++ b/tests/tools/test_search_error_guard.py @@ -0,0 +1,160 @@ +"""Regression tests for the rg/grep error guard in content search. + +The guard in ``_search_with_rg`` / ``_search_with_grep`` had two defects on +``origin/main`` (see PR replacing #39710): + +1. **Unreachable on a hard error.** Both methods pipe the search through + ``| head`` with no ``pipefail``, so the pipeline reported head's exit code + (0), masking rg/grep's error code (2). The guard never fired, and the + error text — merged into stdout by ``_exec`` (``stderr=subprocess.STDOUT``) + — was parsed as bogus match lines instead of being surfaced. + +2. **Would have nuked partial results if it ever did fire.** A broad + ``exit_code == 2`` check discards real matches whenever rg/grep also hit a + non-fatal error (e.g. one unreadable file in a tree that otherwise + matched), which both tools signal with exit 2. + +The fix adds ``set -o pipefail`` so the real exit code propagates, splits +tool diagnostics from match output by *shape*, and only surfaces an error +when exit==2 AND no usable match payload remains. + +These tests drive the real methods through the real local terminal backend. +""" + +import os +import shutil + +import pytest + +from tools.file_operations import ( + ShellFileOperations, + _split_tool_diagnostics, +) +from tools.environments.local import LocalEnvironment + + +def _ops(root): + return ShellFileOperations(LocalEnvironment(cwd=str(root)), cwd=str(root)) + + +@pytest.fixture +def match_tree(tmp_path): + """A tree with several files all containing 'needle'.""" + for i in range(5): + (tmp_path / f"f{i}.txt").write_text(f"needle line {i}\n") + return tmp_path + + +@pytest.fixture +def partial_error_tree(tmp_path): + """A tree with matches plus one unreadable file (forces exit 2 + matches).""" + for i in range(4): + (tmp_path / f"f{i}.txt").write_text(f"needle line {i}\n") + sub = tmp_path / "sub" + sub.mkdir() + locked = sub / "locked.txt" + locked.write_text("needle in locked\n") + os.chmod(locked, 0o000) + yield tmp_path + os.chmod(locked, 0o755) # let pytest clean up tmp_path + + +# Run every test once per available backend method. +_METHODS = ["_search_with_grep"] +if shutil.which("rg"): + _METHODS.append("_search_with_rg") + + +def _search(ops, method, pattern, path, **kw): + fn = getattr(ops, method) + return fn(pattern, str(path), kw.get("file_glob"), kw.get("limit", 50), + kw.get("offset", 0), kw.get("output_mode", "content"), + kw.get("context", 0)) + + +@pytest.mark.parametrize("method", _METHODS) +class TestSearchErrorGuard: + def test_happy_path_returns_matches(self, method, match_tree): + res = _search(_ops(match_tree), method, "needle", match_tree) + assert res.error is None + assert len(res.matches) == 5 + + def test_hard_error_is_surfaced(self, method, match_tree): + # An invalid regex makes rg/grep exit 2 with only diagnostics in + # stdout. The guard MUST surface it — not return empty matches. + res = _search(_ops(match_tree), method, "[", match_tree) + assert res.error is not None, "search error was silently swallowed" + assert "Search failed" in res.error + assert not res.matches + + def test_partial_error_keeps_matches(self, method, partial_error_tree): + # rg/grep exit 2 because of the unreadable file, but the readable + # files matched. Those matches must be preserved, not discarded. + res = _search(_ops(partial_error_tree), method, "needle", partial_error_tree) + assert res.error is None, f"partial error wrongly surfaced: {res.error!r}" + assert len(res.matches) >= 4 + + def test_no_match_is_empty_not_error(self, method, match_tree): + res = _search(_ops(match_tree), method, "zzznomatchzzz", match_tree) + assert res.error is None + assert not res.matches + + def test_truncation_no_false_error(self, method, tmp_path): + # head truncates a large result set. With pipefail, grep exits 141 + # (SIGPIPE) on truncation; the strict `== 2` guard must ignore it. + big = tmp_path / "big.txt" + big.write_text("".join(f"needle {i}\n" for i in range(3000))) + res = _search(_ops(tmp_path), method, "needle", tmp_path, limit=5) + assert res.error is None, f"truncated success wrongly errored: {res.error!r}" + assert len(res.matches) == 5 + + def test_files_only_excludes_diagnostics(self, method, partial_error_tree): + # files_only mode must not list a diagnostic line as a fake file path. + res = _search(_ops(partial_error_tree), method, "needle", + partial_error_tree, output_mode="files_only") + assert res.error is None + assert res.files, "expected matching files" + assert all("Permission denied" not in f and "locked.txt" not in f + for f in res.files), f"diagnostic leaked into files: {res.files}" + + def test_count_mode_with_partial_error(self, method, partial_error_tree): + res = _search(_ops(partial_error_tree), method, "needle", + partial_error_tree, output_mode="count") + assert res.error is None + assert res.total_count >= 4 + + +class TestSplitToolDiagnostics: + """Unit coverage for the shape-based diagnostic/payload splitter.""" + + def test_pure_error_has_empty_payload(self): + out = "rg: regex parse error:\n (?:[)\n ^\nerror: unclosed character class\n" + diagnostics, payload = _split_tool_diagnostics(out) + assert payload.strip() == "" + assert "regex parse error" in diagnostics + + def test_partial_error_separates_matches(self): + out = ("rg: sub/locked.txt: Permission denied (os error 13)\n" + "a.txt:1:needle here\nb.txt:2:needle there\n") + diagnostics, payload = _split_tool_diagnostics(out) + assert "Permission denied" in diagnostics + assert "a.txt:1:needle here" in payload + assert "b.txt:2:needle there" in payload + assert "Permission denied" not in payload + + def test_files_only_is_payload(self): + diagnostics, payload = _split_tool_diagnostics("src/a.py\nsrc/b.py\n") + assert diagnostics == "" + assert payload == "src/a.py\nsrc/b.py" + + def test_count_lines_are_payload(self): + diagnostics, payload = _split_tool_diagnostics("src/a.py:3\nsrc/b.py:1\n") + assert diagnostics == "" + assert "src/a.py:3" in payload + + def test_context_lines_and_separator_are_payload(self): + out = "a.py:5:hit\na.py-6-after\n--\nb.py:9:hit\n" + diagnostics, payload = _split_tool_diagnostics(out) + assert diagnostics == "" + assert "--" in payload + assert "a.py-6-after" in payload diff --git a/tools/file_operations.py b/tools/file_operations.py index b9c5bcc2840..9815673eec1 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -285,6 +285,63 @@ class ExecuteResult: exit_code: int = 0 +def _split_tool_diagnostics(output: str) -> tuple[str, str]: + """Separate rg/grep diagnostic lines from real match output. + + ``_exec`` runs commands with ``stderr=subprocess.STDOUT``, so error and + warning text from ``rg``/``grep`` is interleaved with match lines in a + single stream. Diagnostics must not be parsed as matches, and on a hard + failure they are the error message to surface. + + Returns ``(diagnostics, payload)`` where ``payload`` contains only lines + that look like real search output — a match line (``file:line:content``), + a files-only path, a count line, or a context line/separator. Everything + else (tool-prefixed errors, rg's multi-line ``regex parse error`` block + with its indented carets, blank lines) is folded into ``diagnostics``. + + Classifying by *shape* rather than by error prefix is what lets the + exit-2 guard distinguish a pure failure (no usable payload → surface the + error) from a partial failure (some files matched, one was unreadable → + keep the matches). It also means error text can never be mis-parsed as a + match, a latent bug that predates the exit-code fix. + """ + diagnostics: list[str] = [] + payload: list[str] = [] + for line in output.split('\n'): + if not line.strip(): + continue + # Tool diagnostics always carry the ": " prefix (e.g. + # "rg: : Permission denied", "grep: Invalid regular + # expression", "rg: regex parse error:"). Check this first: a real + # match path can legitimately contain "-" (e.g. a tmp dir like + # ".../pytest-686/..."), which the shape regex would otherwise treat + # as a match line. + stripped = line.lstrip() + if stripped.startswith("rg: ") or stripped.startswith("grep: "): + diagnostics.append(line) + continue + # Otherwise classify by output shape. rg's regex-parse-error block + # also emits an indented caret line and a trailing "error: ..." line + # with no tool prefix; neither matches a search-output shape, so they + # fall through to diagnostics. + # match / count : ":<...>" (has a colon; rg -c uses path:count) + # files_only : "" (no whitespace, no leading colon) + # context line : "--" or the "--" group separator + if line == "--" or _SEARCH_OUTPUT_RE.match(line): + payload.append(line) + else: + diagnostics.append(line) + return '\n'.join(diagnostics), '\n'.join(payload) + + +# A real rg/grep output line starts with a path token and is followed by a +# ``:`` (match/count), a ``-`` (context), or nothing (files_only). Tool +# diagnostics ("rg: ...", "grep: ...", "error: ...", indented carets) never +# match because the path token forbids whitespace and a leading tool prefix +# like "rg" is followed by ": " (space) which the negated class rejects. +_SEARCH_OUTPUT_RE = re.compile(r'^([A-Za-z]:)?[^\s:][^\n]*?[:\-]\d|^[^\s:][^\s]*$') + + def _parse_search_context_line(line: str) -> tuple[str, int, str] | None: """Parse grep/rg context output in ``path-line-content`` format. @@ -2038,24 +2095,40 @@ class ShellFileOperations(FileOperations): fetch_limit = limit + offset + 200 if context > 0 else limit + offset cmd_parts.extend(["|", "head", "-n", str(fetch_limit)]) - cmd = " ".join(cmd_parts) + # `set -o pipefail` so rg's exit status propagates through `| head`. + # Without it the pipeline reports head's status (0), masking rg's + # error code (2) and making the guard below unreachable. rg handles a + # truncating head cleanly (exit 0 on SIGPIPE), so pipefail does not + # introduce false errors on a successful-but-truncated search. + cmd = "set -o pipefail; " + " ".join(cmd_parts) result = self._exec(cmd, timeout=60) - - # rg exit codes: 0=matches found, 1=no matches, 2=error - if result.exit_code == 2 and not result.stdout.strip(): - error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error" + + # _exec merges stderr into stdout (stderr=subprocess.STDOUT), so rg's + # diagnostic lines ("rg: : ", "rg: regex parse error:") + # are interleaved with match output. Split them out: diagnostics must + # not be parsed as matches, and on a hard error they ARE the message. + diagnostics, payload = _split_tool_diagnostics(result.stdout) + + # rg exit codes: 0=matches found, 1=no matches, 2=error. rg returns 2 + # even on partial errors (e.g. one unreadable file in a tree that + # otherwise matched), so only surface an error when exit==2 AND no + # usable match payload remains. Otherwise we keep the real matches. + if result.exit_code == 2 and not payload.strip(): + error_msg = diagnostics.strip() or result.stdout.strip() or "Search error" return SearchResult(error=f"Search failed: {error_msg}", total_count=0) - + + # Parse the diagnostic-free payload so error text never becomes a match. + stdout = payload # Parse results based on output mode if output_mode == "files_only": - all_files = [f for f in result.stdout.strip().split('\n') if f] + all_files = [f for f in stdout.strip().split('\n') if f] total = len(all_files) page = all_files[offset:offset + limit] return SearchResult(files=page, total_count=total) elif output_mode == "count": counts = {} - for line in result.stdout.strip().split('\n'): + for line in stdout.strip().split('\n'): if ':' in line: parts = line.rsplit(':', 1) if len(parts) == 2: @@ -2074,7 +2147,7 @@ class ShellFileOperations(FileOperations): # so naive split(":") breaks. Use regex to handle both platforms. _match_re = re.compile(r'^([A-Za-z]:)?(.*?):(\d+):(.*)$') matches = [] - for line in result.stdout.strip().split('\n'): + for line in stdout.strip().split('\n'): if not line or line == "--": continue @@ -2138,23 +2211,38 @@ class ShellFileOperations(FileOperations): fetch_limit = limit + offset + (200 if context > 0 else 0) cmd_parts.extend(["|", "head", "-n", str(fetch_limit)]) - cmd = " ".join(cmd_parts) + # `set -o pipefail` so grep's exit status propagates through `| head` + # (without it the pipeline reports head's 0, masking grep's error 2). + # A truncating head makes grep exit 141 (SIGPIPE) on an otherwise + # successful search; the strict `== 2` guard below ignores that, so + # pipefail does not turn truncated results into false errors. + cmd = "set -o pipefail; " + " ".join(cmd_parts) result = self._exec(cmd, timeout=60) - - # grep exit codes: 0=matches found, 1=no matches, 2=error - if result.exit_code == 2 and not result.stdout.strip(): - error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error" + + # _exec merges stderr into stdout, so grep's diagnostic lines + # ("grep: : ") are interleaved with matches. Split them + # out so they're never parsed as matches and so a hard error has a + # clean message. + diagnostics, payload = _split_tool_diagnostics(result.stdout) + + # grep exit codes: 0=matches found, 1=no matches, 2=error. grep + # returns 2 on partial errors (e.g. an unreadable file) even when + # other files matched, so only surface an error when exit==2 AND no + # usable match payload remains. + if result.exit_code == 2 and not payload.strip(): + error_msg = diagnostics.strip() or result.stdout.strip() or "Search error" return SearchResult(error=f"Search failed: {error_msg}", total_count=0) - + + stdout = payload if output_mode == "files_only": - all_files = [f for f in result.stdout.strip().split('\n') if f] + all_files = [f for f in stdout.strip().split('\n') if f] total = len(all_files) page = all_files[offset:offset + limit] return SearchResult(files=page, total_count=total) elif output_mode == "count": counts = {} - for line in result.stdout.strip().split('\n'): + for line in stdout.strip().split('\n'): if ':' in line: parts = line.rsplit(':', 1) if len(parts) == 2: @@ -2172,7 +2260,7 @@ class ShellFileOperations(FileOperations): # so naive split(":") breaks. Use regex to handle both platforms. _match_re = re.compile(r'^([A-Za-z]:)?(.*?):(\d+):(.*)$') matches = [] - for line in result.stdout.strip().split('\n'): + for line in stdout.strip().split('\n'): if not line or line == "--": continue