fix: warn on line-oriented newline search patterns

This commit is contained in:
Andres Sommerhoff 2026-06-15 07:16:57 -04:00 committed by Teknium
parent eb9a002284
commit 97563ab821
3 changed files with 107 additions and 5 deletions

View file

@ -28,6 +28,7 @@ import pytest
from tools.file_operations import (
ShellFileOperations,
_pattern_has_regex_newline,
_split_tool_diagnostics,
)
from tools.environments.local import LocalEnvironment
@ -124,6 +125,63 @@ class TestSearchErrorGuard:
assert res.total_count >= 4
class TestSearchContentNewlineWarning:
def test_odd_backslash_n_is_detected_as_regex_newline(self):
assert _pattern_has_regex_newline(r"needle\n")
assert _pattern_has_regex_newline(r"needle\\\n")
def test_even_backslash_n_is_literal_and_not_detected(self):
assert not _pattern_has_regex_newline(r"needle\\n")
assert not _pattern_has_regex_newline(r"needle\\\\n")
def test_zero_matches_with_regex_newline_adds_warning_not_error(self, match_tree):
res = _ops(match_tree).search(
r"absent\npattern",
path=str(match_tree),
target="content",
context=2,
)
assert res.error is None
assert res.total_count == 0
assert res.warning is not None
assert "0 results found" in res.warning
assert "-U/--multiline" in res.warning
def test_actual_newline_pattern_adds_warning_not_error(self, match_tree):
res = _ops(match_tree).search(
"absent\npattern",
path=str(match_tree),
target="content",
)
assert res.error is None
assert res.total_count == 0
assert res.warning is not None
def test_search_with_matching_alternative_and_regex_newline_warns(self, match_tree):
res = _ops(match_tree).search(
r"needle|absent\npattern",
path=str(match_tree),
target="content",
)
assert res.error is None
assert res.total_count == 0
assert res.warning is not None
def test_literal_backslash_n_pattern_does_not_warn(self, match_tree):
res = _ops(match_tree).search(
r"absent\\npattern",
path=str(match_tree),
target="content",
)
assert res.error is None
assert res.total_count == 0
assert res.warning is None
class TestSplitToolDiagnostics:
"""Unit coverage for the shape-based diagnostic/payload splitter."""

View file

@ -242,6 +242,7 @@ class SearchResult:
total_count: int = 0
truncated: bool = False
limit_reason: Optional[str] = None
warning: Optional[str] = None
error: Optional[str] = None
# Densify content-mode matches into a path-grouped text block above this
@ -302,6 +303,8 @@ class SearchResult:
result["truncated"] = True
if self.limit_reason:
result["limit_reason"] = self.limit_reason
if self.warning:
result["warning"] = self.warning
if self.error:
result["error"] = self.error
return result
@ -719,6 +722,45 @@ def normalize_search_pagination(offset: Any = DEFAULT_SEARCH_OFFSET,
return normalized_offset, normalized_limit
_REGEX_NEWLINE_ESCAPE_RE = re.compile(r"(?<!\\)(?:\\\\)*\\n")
def _pattern_has_regex_newline(pattern: str) -> bool:
"""Return True when a content-search regex tries to match a newline.
``search_files`` runs rg/grep in line-oriented mode, not rg
``-U``/``--multiline`` mode, so newline regexes cannot match across
lines. Detect both a literal newline already decoded into the tool
argument and a regex ``\n`` escape (odd number of backslashes before
``n``). Even backslashes, e.g. ``\\n``, mean a literal backslash+n
search and should not warn.
"""
return "\n" in pattern or bool(_REGEX_NEWLINE_ESCAPE_RE.search(pattern))
def _is_line_oriented_newline_error(error: Optional[str]) -> bool:
"""Return True for rg's hard error when multiline mode is required."""
if not error:
return False
return "literal \"\\n\" is not allowed" in error and "--multiline" in error
def _maybe_warn_line_oriented_newline_pattern(result: SearchResult, pattern: str) -> SearchResult:
"""Attach a newline-regex warning only when search found no usable results."""
if result.total_count != 0 or not _pattern_has_regex_newline(pattern):
return result
if result.error and not _is_line_oriented_newline_error(result.error):
return result
result.error = None
result.warning = (
"0 results found. Note: search_files content search is line-oriented "
"and does not run ripgrep with -U/--multiline, so `\\n` in the regex "
"does not match line breaks. Use context=N to inspect neighboring "
"lines, or escape as `\\\\n` when searching for a literal backslash+n."
)
return result
class ShellFileOperations(FileOperations):
"""
File operations implemented via shell commands.
@ -2117,17 +2159,19 @@ class ShellFileOperations(FileOperations):
"""Search for content inside files (grep-like)."""
# Try ripgrep first (fast), fallback to grep (slower but works)
if self._has_command('rg'):
return self._search_with_rg(pattern, path, file_glob, limit, offset,
output_mode, context)
elif self._has_command('grep'):
return self._search_with_grep(pattern, path, file_glob, limit, offset,
result = self._search_with_rg(pattern, path, file_glob, limit, offset,
output_mode, context)
elif self._has_command('grep'):
result = self._search_with_grep(pattern, path, file_glob, limit, offset,
output_mode, context)
else:
# Neither rg nor grep available (Windows without Git Bash, etc.)
return SearchResult(
error="Content search requires ripgrep (rg) or grep. "
"Install ripgrep: https://github.com/BurntSushi/ripgrep#installation"
)
return _maybe_warn_line_oriented_newline_pattern(result, pattern)
def _search_with_rg(self, pattern: str, path: str, file_glob: Optional[str],
limit: int, offset: int, output_mode: str, context: int) -> SearchResult:

View file

@ -1595,7 +1595,7 @@ PATCH_SCHEMA = {
SEARCH_FILES_SCHEMA = {
"name": "search_files",
"description": "Search file contents or find files by name. Use this instead of grep/rg/find/ls in terminal. Ripgrep-backed, faster than shell equivalents.\n\nContent search (target='content'): Regex search inside files. Output modes: full matches with line numbers, file paths only, or match counts. Content search is line-oriented: do not put \\n in regex patterns (even inside alternation); use context to inspect neighboring lines.\n\nFile search (target='files'): Find files by glob pattern (e.g., '*.py', '*config*'). Also use this instead of ls — results sorted by modification time.",
"description": "Search file contents or find files by name. Use this instead of grep/rg/find/ls in terminal. Ripgrep-backed, faster than shell equivalents.\n\nContent search (target='content'): Regex search inside files. Output modes: full matches with line numbers, file paths only, or match counts.\n\nFile search (target='files'): Find files by glob pattern (e.g., '*.py', '*config*'). Also use this instead of ls — results sorted by modification time.",
"parameters": {
"type": "object",
"properties": {