diff --git a/tests/tools/test_search_error_guard.py b/tests/tools/test_search_error_guard.py index aa76dba6cc3..e045c8c3d52 100644 --- a/tests/tools/test_search_error_guard.py +++ b/tests/tools/test_search_error_guard.py @@ -28,6 +28,7 @@ import pytest from tools.file_operations import ( ShellFileOperations, + _pattern_has_regex_newline, _split_tool_diagnostics, ) from tools.environments.local import LocalEnvironment @@ -124,6 +125,63 @@ class TestSearchErrorGuard: assert res.total_count >= 4 +class TestSearchContentNewlineWarning: + def test_odd_backslash_n_is_detected_as_regex_newline(self): + assert _pattern_has_regex_newline(r"needle\n") + assert _pattern_has_regex_newline(r"needle\\\n") + + def test_even_backslash_n_is_literal_and_not_detected(self): + assert not _pattern_has_regex_newline(r"needle\\n") + assert not _pattern_has_regex_newline(r"needle\\\\n") + + def test_zero_matches_with_regex_newline_adds_warning_not_error(self, match_tree): + res = _ops(match_tree).search( + r"absent\npattern", + path=str(match_tree), + target="content", + context=2, + ) + + assert res.error is None + assert res.total_count == 0 + assert res.warning is not None + assert "0 results found" in res.warning + assert "-U/--multiline" in res.warning + + def test_actual_newline_pattern_adds_warning_not_error(self, match_tree): + res = _ops(match_tree).search( + "absent\npattern", + path=str(match_tree), + target="content", + ) + + assert res.error is None + assert res.total_count == 0 + assert res.warning is not None + + def test_search_with_matching_alternative_and_regex_newline_warns(self, match_tree): + res = _ops(match_tree).search( + r"needle|absent\npattern", + path=str(match_tree), + target="content", + ) + + assert res.error is None + assert res.total_count == 0 + assert res.warning is not None + + def test_literal_backslash_n_pattern_does_not_warn(self, match_tree): + res = _ops(match_tree).search( + r"absent\\npattern", + path=str(match_tree), + target="content", + ) + + assert res.error is None + assert res.total_count == 0 + assert res.warning is None + + class TestSplitToolDiagnostics: """Unit coverage for the shape-based diagnostic/payload splitter.""" diff --git a/tools/file_operations.py b/tools/file_operations.py index c9374a4eff9..78bdd8d63ca 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -242,6 +242,7 @@ class SearchResult: total_count: int = 0 truncated: bool = False limit_reason: Optional[str] = None + warning: Optional[str] = None error: Optional[str] = None # Densify content-mode matches into a path-grouped text block above this @@ -302,6 +303,8 @@ class SearchResult: result["truncated"] = True if self.limit_reason: result["limit_reason"] = self.limit_reason + if self.warning: + result["warning"] = self.warning if self.error: result["error"] = self.error return result @@ -719,6 +722,45 @@ def normalize_search_pagination(offset: Any = DEFAULT_SEARCH_OFFSET, return normalized_offset, normalized_limit +_REGEX_NEWLINE_ESCAPE_RE = re.compile(r"(? bool: + """Return True when a content-search regex tries to match a newline. + + ``search_files`` runs rg/grep in line-oriented mode, not rg + ``-U``/``--multiline`` mode, so newline regexes cannot match across + lines. Detect both a literal newline already decoded into the tool + argument and a regex ``\n`` escape (odd number of backslashes before + ``n``). Even backslashes, e.g. ``\\n``, mean a literal backslash+n + search and should not warn. + """ + return "\n" in pattern or bool(_REGEX_NEWLINE_ESCAPE_RE.search(pattern)) + + +def _is_line_oriented_newline_error(error: Optional[str]) -> bool: + """Return True for rg's hard error when multiline mode is required.""" + if not error: + return False + return "literal \"\\n\" is not allowed" in error and "--multiline" in error + + +def _maybe_warn_line_oriented_newline_pattern(result: SearchResult, pattern: str) -> SearchResult: + """Attach a newline-regex warning only when search found no usable results.""" + if result.total_count != 0 or not _pattern_has_regex_newline(pattern): + return result + if result.error and not _is_line_oriented_newline_error(result.error): + return result + result.error = None + result.warning = ( + "0 results found. Note: search_files content search is line-oriented " + "and does not run ripgrep with -U/--multiline, so `\\n` in the regex " + "does not match line breaks. Use context=N to inspect neighboring " + "lines, or escape as `\\\\n` when searching for a literal backslash+n." + ) + return result + + class ShellFileOperations(FileOperations): """ File operations implemented via shell commands. @@ -2117,17 +2159,19 @@ class ShellFileOperations(FileOperations): """Search for content inside files (grep-like).""" # Try ripgrep first (fast), fallback to grep (slower but works) if self._has_command('rg'): - return self._search_with_rg(pattern, path, file_glob, limit, offset, - output_mode, context) - elif self._has_command('grep'): - return self._search_with_grep(pattern, path, file_glob, limit, offset, + result = self._search_with_rg(pattern, path, file_glob, limit, offset, output_mode, context) + elif self._has_command('grep'): + result = self._search_with_grep(pattern, path, file_glob, limit, offset, + output_mode, context) else: # Neither rg nor grep available (Windows without Git Bash, etc.) return SearchResult( error="Content search requires ripgrep (rg) or grep. " "Install ripgrep: https://github.com/BurntSushi/ripgrep#installation" ) + + return _maybe_warn_line_oriented_newline_pattern(result, pattern) def _search_with_rg(self, pattern: str, path: str, file_glob: Optional[str], limit: int, offset: int, output_mode: str, context: int) -> SearchResult: diff --git a/tools/file_tools.py b/tools/file_tools.py index 42b6153cb95..1fc778e0d6c 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -1595,7 +1595,7 @@ PATCH_SCHEMA = { SEARCH_FILES_SCHEMA = { "name": "search_files", - "description": "Search file contents or find files by name. Use this instead of grep/rg/find/ls in terminal. Ripgrep-backed, faster than shell equivalents.\n\nContent search (target='content'): Regex search inside files. Output modes: full matches with line numbers, file paths only, or match counts. Content search is line-oriented: do not put \\n in regex patterns (even inside alternation); use context to inspect neighboring lines.\n\nFile search (target='files'): Find files by glob pattern (e.g., '*.py', '*config*'). Also use this instead of ls — results sorted by modification time.", + "description": "Search file contents or find files by name. Use this instead of grep/rg/find/ls in terminal. Ripgrep-backed, faster than shell equivalents.\n\nContent search (target='content'): Regex search inside files. Output modes: full matches with line numbers, file paths only, or match counts.\n\nFile search (target='files'): Find files by glob pattern (e.g., '*.py', '*config*'). Also use this instead of ls — results sorted by modification time.", "parameters": { "type": "object", "properties": {