feat: improve file search UX — fuzzy @ completions, mtime sorting, better suggestions (#9467)

Three improvements to file search based on user feedback:

1. Fuzzy @ completions (commands.py):
   - Bare @query now does project-wide fuzzy file search instead of
     prefix-only directory listing
   - Uses rg --files with 5-second cache for responsive completions
   - Scoring: exact name (100) > prefix (80) > substring (60) >
     path contains (40) > subsequence with boundary bonus (35/25)
   - Bare @ with no query shows recently modified files first

2. Mtime-sorted file search (file_operations.py):
   - _search_files_rg now uses --sortr=modified (rg 13+) to surface
     recently edited files first
   - Falls back to unsorted on older rg versions

3. Improved file-not-found suggestions (file_operations.py):
   - Replaced crude character-set overlap with ranked scoring:
     same basename (90) > prefix (70) > substring (60) >
     reverse substring (40) > same extension (30)
   - search_files path-not-found now suggests similar directories
     from the parent
This commit is contained in:
Teknium 2026-04-13 23:54:45 -07:00 committed by GitHub
parent c7e2fe655a
commit eb44abd6b1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 218 additions and 55 deletions

View file

@ -556,27 +556,54 @@ class ShellFileOperations(FileOperations):
def _suggest_similar_files(self, path: str) -> ReadResult:
"""Suggest similar files when the requested file is not found."""
# Get directory and filename
dir_path = os.path.dirname(path) or "."
filename = os.path.basename(path)
# List files in directory
ls_cmd = f"ls -1 {self._escape_shell_arg(dir_path)} 2>/dev/null | head -20"
basename_no_ext = os.path.splitext(filename)[0]
ext = os.path.splitext(filename)[1].lower()
lower_name = filename.lower()
# List files in the target directory
ls_cmd = f"ls -1 {self._escape_shell_arg(dir_path)} 2>/dev/null | head -50"
ls_result = self._exec(ls_cmd)
similar = []
scored: list = [] # (score, filepath) — higher is better
if ls_result.exit_code == 0 and ls_result.stdout.strip():
files = ls_result.stdout.strip().split('\n')
# Simple similarity: files that share some characters with the target
for f in files:
# Check if filenames share significant overlap
common = set(filename.lower()) & set(f.lower())
if len(common) >= len(filename) * 0.5: # 50% character overlap
similar.append(os.path.join(dir_path, f))
for f in ls_result.stdout.strip().split('\n'):
if not f:
continue
lf = f.lower()
score = 0
# Exact match (shouldn't happen, but guard)
if lf == lower_name:
score = 100
# Same base name, different extension (e.g. config.yml vs config.yaml)
elif os.path.splitext(f)[0].lower() == basename_no_ext.lower():
score = 90
# Target is prefix of candidate or vice-versa
elif lf.startswith(lower_name) or lower_name.startswith(lf):
score = 70
# Substring match (candidate contains query)
elif lower_name in lf:
score = 60
# Reverse substring (query contains candidate name)
elif lf in lower_name and len(lf) > 2:
score = 40
# Same extension with some overlap
elif ext and os.path.splitext(f)[1].lower() == ext:
common = set(lower_name) & set(lf)
if len(common) >= max(len(lower_name), len(lf)) * 0.4:
score = 30
if score > 0:
scored.append((score, os.path.join(dir_path, f)))
scored.sort(key=lambda x: -x[0])
similar = [fp for _, fp in scored[:5]]
return ReadResult(
error=f"File not found: {path}",
similar_files=similar[:5] # Limit to 5 suggestions
similar_files=similar
)
def read_file_raw(self, path: str) -> ReadResult:
@ -845,8 +872,33 @@ class ShellFileOperations(FileOperations):
# Validate that the path exists before searching
check = self._exec(f"test -e {self._escape_shell_arg(path)} && echo exists || echo not_found")
if "not_found" in check.stdout:
# Try to suggest nearby paths
parent = os.path.dirname(path) or "."
basename_query = os.path.basename(path)
hint_parts = [f"Path not found: {path}"]
# Check if parent directory exists and list similar entries
parent_check = self._exec(
f"test -d {self._escape_shell_arg(parent)} && echo yes || echo no"
)
if "yes" in parent_check.stdout and basename_query:
ls_result = self._exec(
f"ls -1 {self._escape_shell_arg(parent)} 2>/dev/null | head -20"
)
if ls_result.exit_code == 0 and ls_result.stdout.strip():
lower_q = basename_query.lower()
candidates = []
for entry in ls_result.stdout.strip().split('\n'):
if not entry:
continue
le = entry.lower()
if lower_q in le or le in lower_q or le.startswith(lower_q[:3]):
candidates.append(os.path.join(parent, entry))
if candidates:
hint_parts.append(
"Similar paths: " + ", ".join(candidates[:5])
)
return SearchResult(
error=f"Path not found: {path}. Verify the path exists (use 'terminal' to check).",
error=". ".join(hint_parts),
total_count=0
)
@ -912,7 +964,8 @@ class ShellFileOperations(FileOperations):
rg --files respects .gitignore and excludes hidden directories by
default, and uses parallel directory traversal for ~200x speedup
over find on wide trees.
over find on wide trees. Results are sorted by modification time
(most recently edited first) when rg >= 13.0 supports --sortr.
"""
# rg --files -g uses glob patterns; wrap bare names so they match
# at any depth (equivalent to find -name).
@ -922,14 +975,25 @@ class ShellFileOperations(FileOperations):
glob_pattern = pattern
fetch_limit = limit + offset
cmd = (
f"rg --files -g {self._escape_shell_arg(glob_pattern)} "
# Try mtime-sorted first (rg 13+); fall back to unsorted if not supported.
cmd_sorted = (
f"rg --files --sortr=modified -g {self._escape_shell_arg(glob_pattern)} "
f"{self._escape_shell_arg(path)} 2>/dev/null "
f"| head -n {fetch_limit}"
)
result = self._exec(cmd, timeout=60)
result = self._exec(cmd_sorted, timeout=60)
all_files = [f for f in result.stdout.strip().split('\n') if f]
if not all_files:
# --sortr may have failed on older rg; retry without it.
cmd_plain = (
f"rg --files -g {self._escape_shell_arg(glob_pattern)} "
f"{self._escape_shell_arg(path)} 2>/dev/null "
f"| head -n {fetch_limit}"
)
result = self._exec(cmd_plain, timeout=60)
all_files = [f for f in result.stdout.strip().split('\n') if f]
page = all_files[offset:offset + limit]
return SearchResult(