mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: improve file search UX — fuzzy @ completions, mtime sorting, better suggestions (#9467)
Three improvements to file search based on user feedback:
1. Fuzzy @ completions (commands.py):
- Bare @query now does project-wide fuzzy file search instead of
prefix-only directory listing
- Uses rg --files with 5-second cache for responsive completions
- Scoring: exact name (100) > prefix (80) > substring (60) >
path contains (40) > subsequence with boundary bonus (35/25)
- Bare @ with no query shows recently modified files first
2. Mtime-sorted file search (file_operations.py):
- _search_files_rg now uses --sortr=modified (rg 13+) to surface
recently edited files first
- Falls back to unsorted on older rg versions
3. Improved file-not-found suggestions (file_operations.py):
- Replaced crude character-set overlap with ranked scoring:
same basename (90) > prefix (70) > substring (60) >
reverse substring (40) > same extension (30)
- search_files path-not-found now suggests similar directories
from the parent
This commit is contained in:
parent
c7e2fe655a
commit
eb44abd6b1
2 changed files with 218 additions and 55 deletions
|
|
@ -556,27 +556,54 @@ class ShellFileOperations(FileOperations):
|
|||
|
||||
def _suggest_similar_files(self, path: str) -> ReadResult:
|
||||
"""Suggest similar files when the requested file is not found."""
|
||||
# Get directory and filename
|
||||
dir_path = os.path.dirname(path) or "."
|
||||
filename = os.path.basename(path)
|
||||
|
||||
# List files in directory
|
||||
ls_cmd = f"ls -1 {self._escape_shell_arg(dir_path)} 2>/dev/null | head -20"
|
||||
basename_no_ext = os.path.splitext(filename)[0]
|
||||
ext = os.path.splitext(filename)[1].lower()
|
||||
lower_name = filename.lower()
|
||||
|
||||
# List files in the target directory
|
||||
ls_cmd = f"ls -1 {self._escape_shell_arg(dir_path)} 2>/dev/null | head -50"
|
||||
ls_result = self._exec(ls_cmd)
|
||||
|
||||
similar = []
|
||||
|
||||
scored: list = [] # (score, filepath) — higher is better
|
||||
if ls_result.exit_code == 0 and ls_result.stdout.strip():
|
||||
files = ls_result.stdout.strip().split('\n')
|
||||
# Simple similarity: files that share some characters with the target
|
||||
for f in files:
|
||||
# Check if filenames share significant overlap
|
||||
common = set(filename.lower()) & set(f.lower())
|
||||
if len(common) >= len(filename) * 0.5: # 50% character overlap
|
||||
similar.append(os.path.join(dir_path, f))
|
||||
|
||||
for f in ls_result.stdout.strip().split('\n'):
|
||||
if not f:
|
||||
continue
|
||||
lf = f.lower()
|
||||
score = 0
|
||||
|
||||
# Exact match (shouldn't happen, but guard)
|
||||
if lf == lower_name:
|
||||
score = 100
|
||||
# Same base name, different extension (e.g. config.yml vs config.yaml)
|
||||
elif os.path.splitext(f)[0].lower() == basename_no_ext.lower():
|
||||
score = 90
|
||||
# Target is prefix of candidate or vice-versa
|
||||
elif lf.startswith(lower_name) or lower_name.startswith(lf):
|
||||
score = 70
|
||||
# Substring match (candidate contains query)
|
||||
elif lower_name in lf:
|
||||
score = 60
|
||||
# Reverse substring (query contains candidate name)
|
||||
elif lf in lower_name and len(lf) > 2:
|
||||
score = 40
|
||||
# Same extension with some overlap
|
||||
elif ext and os.path.splitext(f)[1].lower() == ext:
|
||||
common = set(lower_name) & set(lf)
|
||||
if len(common) >= max(len(lower_name), len(lf)) * 0.4:
|
||||
score = 30
|
||||
|
||||
if score > 0:
|
||||
scored.append((score, os.path.join(dir_path, f)))
|
||||
|
||||
scored.sort(key=lambda x: -x[0])
|
||||
similar = [fp for _, fp in scored[:5]]
|
||||
|
||||
return ReadResult(
|
||||
error=f"File not found: {path}",
|
||||
similar_files=similar[:5] # Limit to 5 suggestions
|
||||
similar_files=similar
|
||||
)
|
||||
|
||||
def read_file_raw(self, path: str) -> ReadResult:
|
||||
|
|
@ -845,8 +872,33 @@ class ShellFileOperations(FileOperations):
|
|||
# Validate that the path exists before searching
|
||||
check = self._exec(f"test -e {self._escape_shell_arg(path)} && echo exists || echo not_found")
|
||||
if "not_found" in check.stdout:
|
||||
# Try to suggest nearby paths
|
||||
parent = os.path.dirname(path) or "."
|
||||
basename_query = os.path.basename(path)
|
||||
hint_parts = [f"Path not found: {path}"]
|
||||
# Check if parent directory exists and list similar entries
|
||||
parent_check = self._exec(
|
||||
f"test -d {self._escape_shell_arg(parent)} && echo yes || echo no"
|
||||
)
|
||||
if "yes" in parent_check.stdout and basename_query:
|
||||
ls_result = self._exec(
|
||||
f"ls -1 {self._escape_shell_arg(parent)} 2>/dev/null | head -20"
|
||||
)
|
||||
if ls_result.exit_code == 0 and ls_result.stdout.strip():
|
||||
lower_q = basename_query.lower()
|
||||
candidates = []
|
||||
for entry in ls_result.stdout.strip().split('\n'):
|
||||
if not entry:
|
||||
continue
|
||||
le = entry.lower()
|
||||
if lower_q in le or le in lower_q or le.startswith(lower_q[:3]):
|
||||
candidates.append(os.path.join(parent, entry))
|
||||
if candidates:
|
||||
hint_parts.append(
|
||||
"Similar paths: " + ", ".join(candidates[:5])
|
||||
)
|
||||
return SearchResult(
|
||||
error=f"Path not found: {path}. Verify the path exists (use 'terminal' to check).",
|
||||
error=". ".join(hint_parts),
|
||||
total_count=0
|
||||
)
|
||||
|
||||
|
|
@ -912,7 +964,8 @@ class ShellFileOperations(FileOperations):
|
|||
|
||||
rg --files respects .gitignore and excludes hidden directories by
|
||||
default, and uses parallel directory traversal for ~200x speedup
|
||||
over find on wide trees.
|
||||
over find on wide trees. Results are sorted by modification time
|
||||
(most recently edited first) when rg >= 13.0 supports --sortr.
|
||||
"""
|
||||
# rg --files -g uses glob patterns; wrap bare names so they match
|
||||
# at any depth (equivalent to find -name).
|
||||
|
|
@ -922,14 +975,25 @@ class ShellFileOperations(FileOperations):
|
|||
glob_pattern = pattern
|
||||
|
||||
fetch_limit = limit + offset
|
||||
cmd = (
|
||||
f"rg --files -g {self._escape_shell_arg(glob_pattern)} "
|
||||
# Try mtime-sorted first (rg 13+); fall back to unsorted if not supported.
|
||||
cmd_sorted = (
|
||||
f"rg --files --sortr=modified -g {self._escape_shell_arg(glob_pattern)} "
|
||||
f"{self._escape_shell_arg(path)} 2>/dev/null "
|
||||
f"| head -n {fetch_limit}"
|
||||
)
|
||||
result = self._exec(cmd, timeout=60)
|
||||
|
||||
result = self._exec(cmd_sorted, timeout=60)
|
||||
all_files = [f for f in result.stdout.strip().split('\n') if f]
|
||||
|
||||
if not all_files:
|
||||
# --sortr may have failed on older rg; retry without it.
|
||||
cmd_plain = (
|
||||
f"rg --files -g {self._escape_shell_arg(glob_pattern)} "
|
||||
f"{self._escape_shell_arg(path)} 2>/dev/null "
|
||||
f"| head -n {fetch_limit}"
|
||||
)
|
||||
result = self._exec(cmd_plain, timeout=60)
|
||||
all_files = [f for f in result.stdout.strip().split('\n') if f]
|
||||
|
||||
page = all_files[offset:offset + limit]
|
||||
|
||||
return SearchResult(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue