Merge pull request #14820 from NousResearch/bb/tui-at-fuzzy-match

fix(tui): @<name> fuzzy-matches filenames across the repo
This commit is contained in:
brooklyn! 2026-04-23 19:40:43 -05:00 committed by GitHub
commit 6fdbf2f2d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 385 additions and 5 deletions

View file

@ -1,22 +1,28 @@
"""Regression tests for the TUI gateway's `complete.path` handler. """Regression tests for the TUI gateway's `complete.path` handler.
Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder` Reported during the TUI v2 blitz retest:
with no colon yet) still surfaced files alongside directories in the - typing `@folder:` (and `@folder` with no colon yet) surfaced files
TUI composer, because the gateway-side completion lives in alongside directories the gateway-side completion lives in
`tui_gateway/server.py` and was never touched by the earlier fix to `tui_gateway/server.py` and was never touched by the earlier fix to
`hermes_cli/commands.py`. `hermes_cli/commands.py`.
- typing `@appChrome` required the full `@ui-tui/src/components/app`
path to find the file users expect Cmd-P-style fuzzy basename
matching across the repo, not a strict directory prefix filter.
Covers: Covers:
- `@folder:` only yields directories - `@folder:` only yields directories
- `@file:` only yields regular files - `@file:` only yields regular files
- Bare `@folder` / `@file` (no colon) lists cwd directly - Bare `@folder` / `@file` (no colon) lists cwd directly
- Explicit prefix is preserved in the completion text - Explicit prefix is preserved in the completion text
- `@<name>` with no slash fuzzy-matches basenames anywhere in the tree
""" """
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
import pytest
from tui_gateway import server from tui_gateway import server
@ -33,6 +39,15 @@ def _items(word: str):
return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]] return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]
@pytest.fixture(autouse=True)
def _reset_fuzzy_cache(monkeypatch):
# Each test walks a fresh tmp dir; clear the cached listing so prior
# roots can't leak through the TTL window.
server._fuzzy_cache.clear()
yield
server._fuzzy_cache.clear()
def test_at_folder_colon_only_dirs(tmp_path, monkeypatch): def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path) monkeypatch.chdir(tmp_path)
_fixture(tmp_path) _fixture(tmp_path)
@ -89,3 +104,176 @@ def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):
for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"): for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
assert expected in texts, f"missing static ref {expected!r} in {texts!r}" assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
# ── Fuzzy basename matching ──────────────────────────────────────────────
# Users shouldn't have to know the full path — typing `@appChrome` should
# find `ui-tui/src/components/appChrome.tsx`.
def _nested_fixture(tmp_path: Path):
(tmp_path / "readme.md").write_text("x")
(tmp_path / ".env").write_text("x")
(tmp_path / "ui-tui/src/components").mkdir(parents=True)
(tmp_path / "ui-tui/src/components/appChrome.tsx").write_text("x")
(tmp_path / "ui-tui/src/components/appLayout.tsx").write_text("x")
(tmp_path / "ui-tui/src/components/thinking.tsx").write_text("x")
(tmp_path / "ui-tui/src/hooks").mkdir(parents=True)
(tmp_path / "ui-tui/src/hooks/useCompletion.ts").write_text("x")
(tmp_path / "tui_gateway").mkdir()
(tmp_path / "tui_gateway/server.py").write_text("x")
def test_fuzzy_at_finds_file_without_directory_prefix(tmp_path, monkeypatch):
"""`@appChrome` — with no slash — should surface the nested file."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
entries = _items("@appChrome")
texts = [t for t, _, _ in entries]
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
# Display is the basename, meta is the containing directory, so the
# picker can show `appChrome.tsx ui-tui/src/components` on one row.
row = next(r for r in entries if r[0] == "@file:ui-tui/src/components/appChrome.tsx")
assert row[1] == "appChrome.tsx"
assert row[2] == "ui-tui/src/components"
def test_fuzzy_ranks_exact_before_prefix_before_subseq(tmp_path, monkeypatch):
"""Better matches sort before weaker matches regardless of path depth."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
(tmp_path / "server.py").write_text("x") # exact basename match at root
texts = [t for t, _, _ in _items("@server")]
# Exact `server.py` beats `tui_gateway/server.py` (prefix match) — both
# rank 1 on basename but exact basename wins on the sort key; shorter
# rel path breaks ties.
assert texts[0] == "@file:server.py", texts
assert "@file:tui_gateway/server.py" in texts
def test_fuzzy_camelcase_word_boundary(tmp_path, monkeypatch):
"""Mid-basename camelCase pieces match without substring scanning."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
texts = [t for t, _, _ in _items("@Chrome")]
# `Chrome` starts a camelCase word inside `appChrome.tsx`.
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
def test_fuzzy_subsequence_catches_sparse_queries(tmp_path, monkeypatch):
"""`@uCo` → `useCompletion.ts` via subsequence, last-resort tier."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
texts = [t for t, _, _ in _items("@uCo")]
assert "@file:ui-tui/src/hooks/useCompletion.ts" in texts, texts
def test_fuzzy_at_file_prefix_preserved(tmp_path, monkeypatch):
"""Explicit `@file:` prefix still wins the completion tag."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
texts = [t for t, _, _ in _items("@file:appChrome")]
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
def test_fuzzy_skipped_when_path_has_slash(tmp_path, monkeypatch):
"""Any `/` in the query = user is navigating; keep directory listing."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
texts = [t for t, _, _ in _items("@ui-tui/src/components/app")]
# Directory-listing mode prefixes with `@file:` / `@folder:` per entry.
# It should only surface direct children of the named dir — not the
# nested `useCompletion.ts`.
assert any("appChrome.tsx" in t for t in texts), texts
assert not any("useCompletion.ts" in t for t in texts), texts
def test_fuzzy_skipped_when_folder_tag(tmp_path, monkeypatch):
"""`@folder:<name>` still lists directories — fuzzy scanner only walks
files (git-tracked + untracked), so defer to the dir-listing path."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
texts = [t for t, _, _ in _items("@folder:ui")]
# Root has `ui-tui/` as a directory; the listing branch should surface it.
assert any(t.startswith("@folder:ui-tui") for t in texts), texts
def test_fuzzy_hides_dotfiles_unless_asked(tmp_path, monkeypatch):
"""`.env` doesn't leak into `@env` but does show for `@.env`."""
monkeypatch.chdir(tmp_path)
_nested_fixture(tmp_path)
assert not any(".env" in t for t, _, _ in _items("@env"))
assert any(t.endswith(".env") for t, _, _ in _items("@.env"))
def test_fuzzy_caps_results(tmp_path, monkeypatch):
"""The 30-item cap survives a big tree."""
monkeypatch.chdir(tmp_path)
for i in range(60):
(tmp_path / f"mod_{i:03d}.py").write_text("x")
items = _items("@mod")
assert len(items) == 30
def test_fuzzy_paths_relative_to_cwd_inside_subdir(tmp_path, monkeypatch):
"""When the gateway runs from a subdirectory of a git repo, fuzzy
completion paths must resolve under that cwd not under the repo root.
Without this, `@appChrome` from inside `apps/web/` would suggest
`@file:apps/web/src/foo.tsx` but the agent (resolving from cwd) would
look for `apps/web/apps/web/src/foo.tsx` and fail. We translate every
`git ls-files` result back to a `relpath(root)` and drop anything
outside `root` so the completion contract stays "paths are cwd-relative".
"""
import subprocess
subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True)
subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmp_path, check=True)
subprocess.run(["git", "config", "user.name", "test"], cwd=tmp_path, check=True)
(tmp_path / "apps" / "web" / "src").mkdir(parents=True)
(tmp_path / "apps" / "web" / "src" / "appChrome.tsx").write_text("x")
(tmp_path / "apps" / "api" / "src").mkdir(parents=True)
(tmp_path / "apps" / "api" / "src" / "server.ts").write_text("x")
(tmp_path / "README.md").write_text("x")
subprocess.run(["git", "add", "."], cwd=tmp_path, check=True)
subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=tmp_path, check=True)
# Run from `apps/web/` — completions should be relative to here, and
# files outside this subtree (apps/api, README.md at root) shouldn't
# appear at all.
monkeypatch.chdir(tmp_path / "apps" / "web")
texts = [t for t, _, _ in _items("@appChrome")]
assert "@file:src/appChrome.tsx" in texts, texts
assert not any("apps/web/" in t for t in texts), texts
server._fuzzy_cache.clear()
other_texts = [t for t, _, _ in _items("@server")]
assert not any("server.ts" in t for t in other_texts), other_texts
server._fuzzy_cache.clear()
readme_texts = [t for t, _, _ in _items("@README")]
assert not any("README.md" in t for t in readme_texts), readme_texts

View file

@ -3256,6 +3256,162 @@ def _(rid, params: dict) -> dict:
# ── Methods: complete ───────────────────────────────────────────────── # ── Methods: complete ─────────────────────────────────────────────────
_FUZZY_CACHE_TTL_S = 5.0
_FUZZY_CACHE_MAX_FILES = 20000
_FUZZY_FALLBACK_EXCLUDES = frozenset(
{
".git",
".hg",
".svn",
".next",
".cache",
".venv",
"venv",
"node_modules",
"__pycache__",
"dist",
"build",
"target",
".mypy_cache",
".pytest_cache",
".ruff_cache",
}
)
_fuzzy_cache_lock = threading.Lock()
_fuzzy_cache: dict[str, tuple[float, list[str]]] = {}
def _list_repo_files(root: str) -> list[str]:
"""Return file paths relative to ``root``.
Uses ``git ls-files`` from the repo top (resolved via
``rev-parse --show-toplevel``) so the listing covers tracked + untracked
files anywhere in the repo, then converts each path back to be relative
to ``root``. Files outside ``root`` (parent directories of cwd, sibling
subtrees) are excluded so the picker stays scoped to what's reachable
from the gateway's cwd. Falls back to a bounded ``os.walk(root)`` when
``root`` isn't inside a git repo. Result cached per-root for
``_FUZZY_CACHE_TTL_S`` so rapid keystrokes don't respawn git processes.
"""
now = time.monotonic()
with _fuzzy_cache_lock:
cached = _fuzzy_cache.get(root)
if cached and now - cached[0] < _FUZZY_CACHE_TTL_S:
return cached[1]
files: list[str] = []
try:
top_result = subprocess.run(
["git", "-C", root, "rev-parse", "--show-toplevel"],
capture_output=True,
timeout=2.0,
check=False,
)
if top_result.returncode == 0:
top = top_result.stdout.decode("utf-8", "replace").strip()
list_result = subprocess.run(
["git", "-C", top, "ls-files", "-z", "--cached", "--others", "--exclude-standard"],
capture_output=True,
timeout=2.0,
check=False,
)
if list_result.returncode == 0:
for p in list_result.stdout.decode("utf-8", "replace").split("\0"):
if not p:
continue
rel = os.path.relpath(os.path.join(top, p), root).replace(os.sep, "/")
# Skip parents/siblings of cwd — keep the picker scoped
# to root-and-below, matching Cmd-P workspace semantics.
if rel.startswith("../"):
continue
files.append(rel)
if len(files) >= _FUZZY_CACHE_MAX_FILES:
break
except (OSError, subprocess.TimeoutExpired):
pass
if not files:
# Fallback walk: skip vendor/build dirs + dot-dirs so the walk stays
# tractable. Dotfiles themselves survive — the ranker decides based
# on whether the query starts with `.`.
try:
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
dirnames[:] = [
d
for d in dirnames
if d not in _FUZZY_FALLBACK_EXCLUDES and not d.startswith(".")
]
rel_dir = os.path.relpath(dirpath, root)
for f in filenames:
rel = f if rel_dir == "." else f"{rel_dir}/{f}"
files.append(rel.replace(os.sep, "/"))
if len(files) >= _FUZZY_CACHE_MAX_FILES:
break
if len(files) >= _FUZZY_CACHE_MAX_FILES:
break
except OSError:
pass
with _fuzzy_cache_lock:
_fuzzy_cache[root] = (now, files)
return files
def _fuzzy_basename_rank(name: str, query: str) -> tuple[int, int] | None:
"""Rank ``name`` against ``query``; lower is better. Returns None to reject.
Tiers (kind):
0 exact basename
1 basename prefix (e.g. `app` `appChrome.tsx`)
2 word-boundary / camelCase hit (e.g. `chrome` `appChrome.tsx`)
3 substring anywhere in basename
4 subsequence match (every query char appears in order)
Secondary key is `len(name)` so shorter names win ties.
"""
if not query:
return (3, len(name))
nl = name.lower()
ql = query.lower()
if nl == ql:
return (0, len(name))
if nl.startswith(ql):
return (1, len(name))
# Word-boundary split: `foo-bar_baz.qux` → ["foo","bar","baz","qux"].
# camelCase split: `appChrome` → ["app","Chrome"]. Cheap approximation;
# falls through to substring/subsequence if it misses.
parts: list[str] = []
buf = ""
for ch in name:
if ch in "-_." or (ch.isupper() and buf and not buf[-1].isupper()):
if buf:
parts.append(buf)
buf = ch if ch not in "-_." else ""
else:
buf += ch
if buf:
parts.append(buf)
for p in parts:
if p.lower().startswith(ql):
return (2, len(name))
if ql in nl:
return (3, len(name))
i = 0
for ch in nl:
if ch == ql[i]:
i += 1
if i == len(ql):
return (4, len(name))
return None
@method("complete.path") @method("complete.path")
def _(rid, params: dict) -> dict: def _(rid, params: dict) -> dict:
@ -3291,6 +3447,42 @@ def _(rid, params: dict) -> dict:
prefix_tag = "" prefix_tag = ""
path_part = query if is_context else query path_part = query if is_context else query
# Fuzzy basename search across the repo when the user types a bare
# name with no path separator — `@appChrome` surfaces every file
# whose basename matches, regardless of directory depth. Matches what
# editors like Cursor / VS Code do for Cmd-P. Path-ish queries (with
# `/`, `./`, `~/`, `/abs`) fall through to the directory-listing
# path so explicit navigation intent is preserved.
if (
is_context
and path_part
and "/" not in path_part
and prefix_tag != "folder"
):
root = os.getcwd()
ranked: list[tuple[tuple[int, int], str, str]] = []
for rel in _list_repo_files(root):
basename = os.path.basename(rel)
if basename.startswith(".") and not path_part.startswith("."):
continue
rank = _fuzzy_basename_rank(basename, path_part)
if rank is None:
continue
ranked.append((rank, rel, basename))
ranked.sort(key=lambda r: (r[0], len(r[1]), r[1]))
tag = prefix_tag or "file"
for _, rel, basename in ranked[:30]:
items.append(
{
"text": f"@{tag}:{rel}",
"display": basename,
"meta": os.path.dirname(rel),
}
)
return _ok(rid, {"items": items})
expanded = _normalize_completion_path(path_part) if path_part else "." expanded = _normalize_completion_path(path_part) if path_part else "."
if expanded == "." or not expanded: if expanded == "." or not expanded:
search_dir, match = ".", "" search_dir, match = ".", ""