fix(file_ops): resolve search_files path/line collision for hyphenated numeric filenames

This commit is contained in:
Yoimex 2026-04-25 08:43:03 +03:00 committed by Teknium
parent fbc477df71
commit c050ee6573
2 changed files with 100 additions and 13 deletions

View file

@ -8,7 +8,7 @@ Covers:
import pytest
from unittest.mock import MagicMock, patch
from tools.file_operations import ShellFileOperations
from tools.file_operations import ShellFileOperations, _parse_search_context_line
# =========================================================================
@ -204,3 +204,67 @@ class TestPaginationBounds:
rg_commands = [cmd for cmd in commands if cmd.startswith("rg --files")]
assert rg_commands
assert "| head -n 1" in rg_commands[0]
# =========================================================================
# Search context parsing
# =========================================================================
class TestSearchContextParsing:
def test_parse_search_context_line_prefers_rightmost_numeric_separator(self):
parsed = _parse_search_context_line("dir/file-12-name.py-8-context here")
assert parsed == ("dir/file-12-name.py", 8, "context here")
def test_search_with_rg_context_handles_filename_with_dash_digits(self):
env = MagicMock()
env.cwd = "/tmp"
ops = ShellFileOperations(env)
with patch.object(ops, "_exec") as mock_exec:
mock_exec.return_value = MagicMock(
exit_code=0,
stdout="dir/file-12-name.py-8-context here\n",
)
result = ops._search_with_rg(
"needle",
path=".",
file_glob=None,
limit=10,
offset=0,
output_mode="content",
context=1,
)
assert result.error is None
assert result.total_count == 1
assert result.matches[0].path == "dir/file-12-name.py"
assert result.matches[0].line_number == 8
assert result.matches[0].content == "context here"
def test_search_with_grep_context_handles_filename_with_dash_digits(self):
env = MagicMock()
env.cwd = "/tmp"
ops = ShellFileOperations(env)
with patch.object(ops, "_exec") as mock_exec:
mock_exec.return_value = MagicMock(
exit_code=0,
stdout="dir/file-12-name.py-8-context here\n",
)
result = ops._search_with_grep(
"needle",
path=".",
file_glob=None,
limit=10,
offset=0,
output_mode="content",
context=1,
)
assert result.error is None
assert result.total_count == 1
assert result.matches[0].path == "dir/file-12-name.py"
assert result.matches[0].line_number == 8
assert result.matches[0].content == "context here"

View file

@ -215,6 +215,31 @@ class ExecuteResult:
exit_code: int = 0
def _parse_search_context_line(line: str) -> tuple[str, int, str] | None:
"""Parse grep/rg context output in ``path-line-content`` format.
Context lines are ambiguous because filenames may legitimately contain
``-<digits>-`` segments. Prefer the rightmost numeric separator so a path
like ``dir/file-12-name.py-8-context`` resolves to
``dir/file-12-name.py`` line ``8`` instead of truncating at ``file``.
"""
if not line or line == "--":
return None
match = None
for candidate in re.finditer(r'-(\d+)-', line):
match = candidate
if match is None:
return None
path = line[:match.start()]
if not path:
return None
return path, int(match.group(1)), line[match.end():]
# =============================================================================
# Abstract Interface
# =============================================================================
@ -1185,7 +1210,6 @@ class ShellFileOperations(FileOperations):
# Note: on Windows, paths contain drive letters (e.g. C:\path),
# so naive split(":") breaks. Use regex to handle both platforms.
_match_re = re.compile(r'^([A-Za-z]:)?(.*?):(\d+):(.*)$')
_ctx_re = re.compile(r'^([A-Za-z]:)?(.*?)-(\d+)-(.*)$')
matches = []
for line in result.stdout.strip().split('\n'):
if not line or line == "--":
@ -1204,12 +1228,12 @@ class ShellFileOperations(FileOperations):
# Try context line (dash-separated: file-line-content)
# Only attempt if context was requested to avoid false positives
if context > 0:
m = _ctx_re.match(line)
if m:
parsed = _parse_search_context_line(line)
if parsed:
matches.append(SearchMatch(
path=(m.group(1) or '') + m.group(2),
line_number=int(m.group(3)),
content=m.group(4)[:500]
path=parsed[0],
line_number=parsed[1],
content=parsed[2][:500]
))
total = len(matches)
@ -1284,7 +1308,6 @@ class ShellFileOperations(FileOperations):
# Note: on Windows, paths contain drive letters (e.g. C:\path),
# so naive split(":") breaks. Use regex to handle both platforms.
_match_re = re.compile(r'^([A-Za-z]:)?(.*?):(\d+):(.*)$')
_ctx_re = re.compile(r'^([A-Za-z]:)?(.*?)-(\d+)-(.*)$')
matches = []
for line in result.stdout.strip().split('\n'):
if not line or line == "--":
@ -1300,12 +1323,12 @@ class ShellFileOperations(FileOperations):
continue
if context > 0:
m = _ctx_re.match(line)
if m:
parsed = _parse_search_context_line(line)
if parsed:
matches.append(SearchMatch(
path=(m.group(1) or '') + m.group(2),
line_number=int(m.group(3)),
content=m.group(4)[:500]
path=parsed[0],
line_number=parsed[1],
content=parsed[2][:500]
))