diff --git a/tests/gateway/test_complete_path_at_filter.py b/tests/gateway/test_complete_path_at_filter.py index 9e5031c0d..4a3e292b0 100644 --- a/tests/gateway/test_complete_path_at_filter.py +++ b/tests/gateway/test_complete_path_at_filter.py @@ -1,22 +1,28 @@ """Regression tests for the TUI gateway's `complete.path` handler. -Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder` -with no colon yet) still surfaced files alongside directories in the -TUI composer, because the gateway-side completion lives in -`tui_gateway/server.py` and was never touched by the earlier fix to -`hermes_cli/commands.py`. +Reported during the TUI v2 blitz retest: + - typing `@folder:` (and `@folder` with no colon yet) surfaced files + alongside directories — the gateway-side completion lives in + `tui_gateway/server.py` and was never touched by the earlier fix to + `hermes_cli/commands.py`. + - typing `@appChrome` required the full `@ui-tui/src/components/app…` + path to find the file — users expect Cmd-P-style fuzzy basename + matching across the repo, not a strict directory prefix filter. Covers: - `@folder:` only yields directories - `@file:` only yields regular files - Bare `@folder` / `@file` (no colon) lists cwd directly - Explicit prefix is preserved in the completion text + - `@` with no slash fuzzy-matches basenames anywhere in the tree """ from __future__ import annotations from pathlib import Path +import pytest + from tui_gateway import server @@ -33,6 +39,15 @@ def _items(word: str): return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]] +@pytest.fixture(autouse=True) +def _reset_fuzzy_cache(monkeypatch): + # Each test walks a fresh tmp dir; clear the cached listing so prior + # roots can't leak through the TTL window. + server._fuzzy_cache.clear() + yield + server._fuzzy_cache.clear() + + def test_at_folder_colon_only_dirs(tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) _fixture(tmp_path) @@ -89,3 +104,176 @@ def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch): for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"): assert expected in texts, f"missing static ref {expected!r} in {texts!r}" + + +# ── Fuzzy basename matching ────────────────────────────────────────────── +# Users shouldn't have to know the full path — typing `@appChrome` should +# find `ui-tui/src/components/appChrome.tsx`. + + +def _nested_fixture(tmp_path: Path): + (tmp_path / "readme.md").write_text("x") + (tmp_path / ".env").write_text("x") + (tmp_path / "ui-tui/src/components").mkdir(parents=True) + (tmp_path / "ui-tui/src/components/appChrome.tsx").write_text("x") + (tmp_path / "ui-tui/src/components/appLayout.tsx").write_text("x") + (tmp_path / "ui-tui/src/components/thinking.tsx").write_text("x") + (tmp_path / "ui-tui/src/hooks").mkdir(parents=True) + (tmp_path / "ui-tui/src/hooks/useCompletion.ts").write_text("x") + (tmp_path / "tui_gateway").mkdir() + (tmp_path / "tui_gateway/server.py").write_text("x") + + +def test_fuzzy_at_finds_file_without_directory_prefix(tmp_path, monkeypatch): + """`@appChrome` — with no slash — should surface the nested file.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + entries = _items("@appChrome") + texts = [t for t, _, _ in entries] + + assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts + + # Display is the basename, meta is the containing directory, so the + # picker can show `appChrome.tsx ui-tui/src/components` on one row. + row = next(r for r in entries if r[0] == "@file:ui-tui/src/components/appChrome.tsx") + assert row[1] == "appChrome.tsx" + assert row[2] == "ui-tui/src/components" + + +def test_fuzzy_ranks_exact_before_prefix_before_subseq(tmp_path, monkeypatch): + """Better matches sort before weaker matches regardless of path depth.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + (tmp_path / "server.py").write_text("x") # exact basename match at root + + texts = [t for t, _, _ in _items("@server")] + + # Exact `server.py` beats `tui_gateway/server.py` (prefix match) — both + # rank 1 on basename but exact basename wins on the sort key; shorter + # rel path breaks ties. + assert texts[0] == "@file:server.py", texts + assert "@file:tui_gateway/server.py" in texts + + +def test_fuzzy_camelcase_word_boundary(tmp_path, monkeypatch): + """Mid-basename camelCase pieces match without substring scanning.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@Chrome")] + + # `Chrome` starts a camelCase word inside `appChrome.tsx`. + assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts + + +def test_fuzzy_subsequence_catches_sparse_queries(tmp_path, monkeypatch): + """`@uCo` → `useCompletion.ts` via subsequence, last-resort tier.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@uCo")] + + assert "@file:ui-tui/src/hooks/useCompletion.ts" in texts, texts + + +def test_fuzzy_at_file_prefix_preserved(tmp_path, monkeypatch): + """Explicit `@file:` prefix still wins the completion tag.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@file:appChrome")] + + assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts + + +def test_fuzzy_skipped_when_path_has_slash(tmp_path, monkeypatch): + """Any `/` in the query = user is navigating; keep directory listing.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@ui-tui/src/components/app")] + + # Directory-listing mode prefixes with `@file:` / `@folder:` per entry. + # It should only surface direct children of the named dir — not the + # nested `useCompletion.ts`. + assert any("appChrome.tsx" in t for t in texts), texts + assert not any("useCompletion.ts" in t for t in texts), texts + + +def test_fuzzy_skipped_when_folder_tag(tmp_path, monkeypatch): + """`@folder:` still lists directories — fuzzy scanner only walks + files (git-tracked + untracked), so defer to the dir-listing path.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + texts = [t for t, _, _ in _items("@folder:ui")] + + # Root has `ui-tui/` as a directory; the listing branch should surface it. + assert any(t.startswith("@folder:ui-tui") for t in texts), texts + + +def test_fuzzy_hides_dotfiles_unless_asked(tmp_path, monkeypatch): + """`.env` doesn't leak into `@env` but does show for `@.env`.""" + monkeypatch.chdir(tmp_path) + _nested_fixture(tmp_path) + + assert not any(".env" in t for t, _, _ in _items("@env")) + assert any(t.endswith(".env") for t, _, _ in _items("@.env")) + + +def test_fuzzy_caps_results(tmp_path, monkeypatch): + """The 30-item cap survives a big tree.""" + monkeypatch.chdir(tmp_path) + for i in range(60): + (tmp_path / f"mod_{i:03d}.py").write_text("x") + + items = _items("@mod") + + assert len(items) == 30 + + +def test_fuzzy_paths_relative_to_cwd_inside_subdir(tmp_path, monkeypatch): + """When the gateway runs from a subdirectory of a git repo, fuzzy + completion paths must resolve under that cwd — not under the repo root. + + Without this, `@appChrome` from inside `apps/web/` would suggest + `@file:apps/web/src/foo.tsx` but the agent (resolving from cwd) would + look for `apps/web/apps/web/src/foo.tsx` and fail. We translate every + `git ls-files` result back to a `relpath(root)` and drop anything + outside `root` so the completion contract stays "paths are cwd-relative". + """ + import subprocess + + subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True) + subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmp_path, check=True) + subprocess.run(["git", "config", "user.name", "test"], cwd=tmp_path, check=True) + + (tmp_path / "apps" / "web" / "src").mkdir(parents=True) + (tmp_path / "apps" / "web" / "src" / "appChrome.tsx").write_text("x") + (tmp_path / "apps" / "api" / "src").mkdir(parents=True) + (tmp_path / "apps" / "api" / "src" / "server.ts").write_text("x") + (tmp_path / "README.md").write_text("x") + + subprocess.run(["git", "add", "."], cwd=tmp_path, check=True) + subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=tmp_path, check=True) + + # Run from `apps/web/` — completions should be relative to here, and + # files outside this subtree (apps/api, README.md at root) shouldn't + # appear at all. + monkeypatch.chdir(tmp_path / "apps" / "web") + + texts = [t for t, _, _ in _items("@appChrome")] + + assert "@file:src/appChrome.tsx" in texts, texts + assert not any("apps/web/" in t for t in texts), texts + + server._fuzzy_cache.clear() + other_texts = [t for t, _, _ in _items("@server")] + + assert not any("server.ts" in t for t in other_texts), other_texts + + server._fuzzy_cache.clear() + readme_texts = [t for t, _, _ in _items("@README")] + + assert not any("README.md" in t for t in readme_texts), readme_texts diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 52408ed9f..f0a870b6e 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -3256,6 +3256,162 @@ def _(rid, params: dict) -> dict: # ── Methods: complete ───────────────────────────────────────────────── +_FUZZY_CACHE_TTL_S = 5.0 +_FUZZY_CACHE_MAX_FILES = 20000 +_FUZZY_FALLBACK_EXCLUDES = frozenset( + { + ".git", + ".hg", + ".svn", + ".next", + ".cache", + ".venv", + "venv", + "node_modules", + "__pycache__", + "dist", + "build", + "target", + ".mypy_cache", + ".pytest_cache", + ".ruff_cache", + } +) +_fuzzy_cache_lock = threading.Lock() +_fuzzy_cache: dict[str, tuple[float, list[str]]] = {} + + +def _list_repo_files(root: str) -> list[str]: + """Return file paths relative to ``root``. + + Uses ``git ls-files`` from the repo top (resolved via + ``rev-parse --show-toplevel``) so the listing covers tracked + untracked + files anywhere in the repo, then converts each path back to be relative + to ``root``. Files outside ``root`` (parent directories of cwd, sibling + subtrees) are excluded so the picker stays scoped to what's reachable + from the gateway's cwd. Falls back to a bounded ``os.walk(root)`` when + ``root`` isn't inside a git repo. Result cached per-root for + ``_FUZZY_CACHE_TTL_S`` so rapid keystrokes don't respawn git processes. + """ + now = time.monotonic() + with _fuzzy_cache_lock: + cached = _fuzzy_cache.get(root) + if cached and now - cached[0] < _FUZZY_CACHE_TTL_S: + return cached[1] + + files: list[str] = [] + try: + top_result = subprocess.run( + ["git", "-C", root, "rev-parse", "--show-toplevel"], + capture_output=True, + timeout=2.0, + check=False, + ) + if top_result.returncode == 0: + top = top_result.stdout.decode("utf-8", "replace").strip() + list_result = subprocess.run( + ["git", "-C", top, "ls-files", "-z", "--cached", "--others", "--exclude-standard"], + capture_output=True, + timeout=2.0, + check=False, + ) + if list_result.returncode == 0: + for p in list_result.stdout.decode("utf-8", "replace").split("\0"): + if not p: + continue + rel = os.path.relpath(os.path.join(top, p), root).replace(os.sep, "/") + # Skip parents/siblings of cwd — keep the picker scoped + # to root-and-below, matching Cmd-P workspace semantics. + if rel.startswith("../"): + continue + files.append(rel) + if len(files) >= _FUZZY_CACHE_MAX_FILES: + break + except (OSError, subprocess.TimeoutExpired): + pass + + if not files: + # Fallback walk: skip vendor/build dirs + dot-dirs so the walk stays + # tractable. Dotfiles themselves survive — the ranker decides based + # on whether the query starts with `.`. + try: + for dirpath, dirnames, filenames in os.walk(root, followlinks=False): + dirnames[:] = [ + d + for d in dirnames + if d not in _FUZZY_FALLBACK_EXCLUDES and not d.startswith(".") + ] + rel_dir = os.path.relpath(dirpath, root) + for f in filenames: + rel = f if rel_dir == "." else f"{rel_dir}/{f}" + files.append(rel.replace(os.sep, "/")) + if len(files) >= _FUZZY_CACHE_MAX_FILES: + break + if len(files) >= _FUZZY_CACHE_MAX_FILES: + break + except OSError: + pass + + with _fuzzy_cache_lock: + _fuzzy_cache[root] = (now, files) + + return files + + +def _fuzzy_basename_rank(name: str, query: str) -> tuple[int, int] | None: + """Rank ``name`` against ``query``; lower is better. Returns None to reject. + + Tiers (kind): + 0 — exact basename + 1 — basename prefix (e.g. `app` → `appChrome.tsx`) + 2 — word-boundary / camelCase hit (e.g. `chrome` → `appChrome.tsx`) + 3 — substring anywhere in basename + 4 — subsequence match (every query char appears in order) + + Secondary key is `len(name)` so shorter names win ties. + """ + if not query: + return (3, len(name)) + + nl = name.lower() + ql = query.lower() + + if nl == ql: + return (0, len(name)) + + if nl.startswith(ql): + return (1, len(name)) + + # Word-boundary split: `foo-bar_baz.qux` → ["foo","bar","baz","qux"]. + # camelCase split: `appChrome` → ["app","Chrome"]. Cheap approximation; + # falls through to substring/subsequence if it misses. + parts: list[str] = [] + buf = "" + for ch in name: + if ch in "-_." or (ch.isupper() and buf and not buf[-1].isupper()): + if buf: + parts.append(buf) + buf = ch if ch not in "-_." else "" + else: + buf += ch + if buf: + parts.append(buf) + for p in parts: + if p.lower().startswith(ql): + return (2, len(name)) + + if ql in nl: + return (3, len(name)) + + i = 0 + for ch in nl: + if ch == ql[i]: + i += 1 + if i == len(ql): + return (4, len(name)) + + return None + @method("complete.path") def _(rid, params: dict) -> dict: @@ -3291,6 +3447,42 @@ def _(rid, params: dict) -> dict: prefix_tag = "" path_part = query if is_context else query + # Fuzzy basename search across the repo when the user types a bare + # name with no path separator — `@appChrome` surfaces every file + # whose basename matches, regardless of directory depth. Matches what + # editors like Cursor / VS Code do for Cmd-P. Path-ish queries (with + # `/`, `./`, `~/`, `/abs`) fall through to the directory-listing + # path so explicit navigation intent is preserved. + if ( + is_context + and path_part + and "/" not in path_part + and prefix_tag != "folder" + ): + root = os.getcwd() + ranked: list[tuple[tuple[int, int], str, str]] = [] + for rel in _list_repo_files(root): + basename = os.path.basename(rel) + if basename.startswith(".") and not path_part.startswith("."): + continue + rank = _fuzzy_basename_rank(basename, path_part) + if rank is None: + continue + ranked.append((rank, rel, basename)) + + ranked.sort(key=lambda r: (r[0], len(r[1]), r[1])) + tag = prefix_tag or "file" + for _, rel, basename in ranked[:30]: + items.append( + { + "text": f"@{tag}:{rel}", + "display": basename, + "meta": os.path.dirname(rel), + } + ) + + return _ok(rid, {"items": items}) + expanded = _normalize_completion_path(path_part) if path_part else "." if expanded == "." or not expanded: search_dir, match = ".", ""