fix(file_ops): resolve search_files path/line collision for hyphenated numeric filenames

2026-05-07 02:51:50 +00:00 · 2026-04-25 08:43:03 +03:00 · 2026-04-25 08:43:03 +03:00 · c050ee6573
commit c050ee6573
parent fbc477df71
2 changed files with 100 additions and 13 deletions
--- a/tests/tools/test_file_operations_edge_cases.py
+++ b/tests/tools/test_file_operations_edge_cases.py
@ -8,7 +8,7 @@ Covers:
 import pytest
 from unittest.mock import MagicMock, patch
-from tools.file_operations import ShellFileOperations
+from tools.file_operations import ShellFileOperations, _parse_search_context_line
 # =========================================================================
@ -204,3 +204,67 @@ class TestPaginationBounds:
        rg_commands = [cmd for cmd in commands if cmd.startswith("rg --files")]
        assert rg_commands
        assert "| head -n 1" in rg_commands[0]
 # =========================================================================
 # Search context parsing
 # =========================================================================
 class TestSearchContextParsing:
    def test_parse_search_context_line_prefers_rightmost_numeric_separator(self):
        parsed = _parse_search_context_line("dir/file-12-name.py-8-context here")
        assert parsed == ("dir/file-12-name.py", 8, "context here")
    def test_search_with_rg_context_handles_filename_with_dash_digits(self):
        env = MagicMock()
        env.cwd = "/tmp"
        ops = ShellFileOperations(env)
        with patch.object(ops, "_exec") as mock_exec:
            mock_exec.return_value = MagicMock(
                exit_code=0,
                stdout="dir/file-12-name.py-8-context here\n",
            )
            result = ops._search_with_rg(
                "needle",
                path=".",
                file_glob=None,
                limit=10,
                offset=0,
                output_mode="content",
                context=1,
            )
        assert result.error is None
        assert result.total_count == 1
        assert result.matches[0].path == "dir/file-12-name.py"
        assert result.matches[0].line_number == 8
        assert result.matches[0].content == "context here"
    def test_search_with_grep_context_handles_filename_with_dash_digits(self):
        env = MagicMock()
        env.cwd = "/tmp"
        ops = ShellFileOperations(env)
        with patch.object(ops, "_exec") as mock_exec:
            mock_exec.return_value = MagicMock(
                exit_code=0,
                stdout="dir/file-12-name.py-8-context here\n",
            )
            result = ops._search_with_grep(
                "needle",
                path=".",
                file_glob=None,
                limit=10,
                offset=0,
                output_mode="content",
                context=1,
            )
        assert result.error is None
        assert result.total_count == 1
        assert result.matches[0].path == "dir/file-12-name.py"
        assert result.matches[0].line_number == 8
        assert result.matches[0].content == "context here"
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@ -215,6 +215,31 @@ class ExecuteResult:
    exit_code: int = 0
 def _parse_search_context_line(line: str) -> tuple[str, int, str] | None:
    """Parse grep/rg context output in ``path-line-content`` format.
    Context lines are ambiguous because filenames may legitimately contain
    ``-<digits>-`` segments. Prefer the rightmost numeric separator so a path
    like ``dir/file-12-name.py-8-context`` resolves to
    ``dir/file-12-name.py`` line ``8`` instead of truncating at ``file``.
    """
    if not line or line == "--":
        return None
    match = None
    for candidate in re.finditer(r'-(\d+)-', line):
        match = candidate
    if match is None:
        return None
    path = line[:match.start()]
    if not path:
        return None
    return path, int(match.group(1)), line[match.end():]
 # =============================================================================
 # Abstract Interface
 # =============================================================================
@ -1185,7 +1210,6 @@ class ShellFileOperations(FileOperations):
            # Note: on Windows, paths contain drive letters (e.g. C:\path),
            # so naive split(":") breaks. Use regex to handle both platforms.
            _match_re = re.compile(r'^([A-Za-z]:)?(.*?):(\d+):(.*)$')
            _ctx_re = re.compile(r'^([A-Za-z]:)?(.*?)-(\d+)-(.*)$')
            matches = []
            for line in result.stdout.strip().split('\n'):
                if not line or line == "--":
@ -1204,12 +1228,12 @@ class ShellFileOperations(FileOperations):
                # Try context line (dash-separated: file-line-content)
                # Only attempt if context was requested to avoid false positives
                if context > 0:
-                    m = _ctx_re.match(line)
+                    parsed = _parse_search_context_line(line)
-                    if m:
+                    if parsed:
                        matches.append(SearchMatch(
-                            path=(m.group(1) or '') + m.group(2),
+                            path=parsed[0],
-                            line_number=int(m.group(3)),
+                            line_number=parsed[1],
-                            content=m.group(4)[:500]
+                            content=parsed[2][:500]
                        ))
            total = len(matches)
@ -1284,7 +1308,6 @@ class ShellFileOperations(FileOperations):
            # Note: on Windows, paths contain drive letters (e.g. C:\path),
            # so naive split(":") breaks. Use regex to handle both platforms.
            _match_re = re.compile(r'^([A-Za-z]:)?(.*?):(\d+):(.*)$')
            _ctx_re = re.compile(r'^([A-Za-z]:)?(.*?)-(\d+)-(.*)$')
            matches = []
            for line in result.stdout.strip().split('\n'):
                if not line or line == "--":
@ -1300,12 +1323,12 @@ class ShellFileOperations(FileOperations):
                    continue
                if context > 0:
-                    m = _ctx_re.match(line)
+                    parsed = _parse_search_context_line(line)
-                    if m:
+                    if parsed:
                        matches.append(SearchMatch(
-                            path=(m.group(1) or '') + m.group(2),
+                            path=parsed[0],
-                            line_number=int(m.group(3)),
+                            line_number=parsed[1],
-                            content=m.group(4)[:500]
+                            content=parsed[2][:500]
                        ))