mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: tolerate non-utf8 filenames in file discovery
This commit is contained in:
parent
d990fa52ed
commit
a7df9f0245
4 changed files with 75 additions and 5 deletions
|
|
@ -474,20 +474,25 @@ def _iter_visible_entries(path: Path, cwd: Path, limit: int) -> list[Path]:
|
|||
return output
|
||||
|
||||
|
||||
def _decode_fs_lines(data: bytes) -> list[str]:
|
||||
"""Decode subprocess file listings using filesystem semantics."""
|
||||
return [os.fsdecode(line) for line in data.splitlines() if line]
|
||||
|
||||
|
||||
def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["rg", "--files", str(path.relative_to(cwd))],
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
text=False,
|
||||
timeout=10,
|
||||
)
|
||||
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
|
||||
return None
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
files = [Path(line.strip()) for line in result.stdout.splitlines() if line.strip()]
|
||||
files = [Path(line) for line in _decode_fs_lines(result.stdout)]
|
||||
return files[:limit]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -991,11 +991,14 @@ class SlashCommandCompleter(Completer):
|
|||
continue
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=2,
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=False,
|
||||
timeout=2,
|
||||
cwd=cwd,
|
||||
)
|
||||
if proc.returncode == 0 and proc.stdout.strip():
|
||||
raw = proc.stdout.strip().split("\n")
|
||||
raw = _decode_fs_lines(proc.stdout)
|
||||
if proc.returncode == 0 and raw:
|
||||
# Store relative paths
|
||||
for p in raw[:5000]:
|
||||
rel = os.path.relpath(p, cwd) if os.path.isabs(p) else p
|
||||
|
|
@ -1324,6 +1327,14 @@ class SlashCommandAutoSuggest(AutoSuggest):
|
|||
return None
|
||||
|
||||
|
||||
def _decode_fs_lines(data: bytes) -> list[str]:
|
||||
"""Decode subprocess file listings using filesystem semantics.
|
||||
|
||||
This preserves non-UTF-8 filenames via surrogateescape instead of raising.
|
||||
"""
|
||||
return [os.fsdecode(line) for line in data.splitlines() if line]
|
||||
|
||||
|
||||
def _file_size_label(path: str) -> str:
|
||||
"""Return a compact human-readable file size, or '' on error."""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
|
@ -207,6 +209,33 @@ def test_binary_and_missing_files_become_warnings(sample_repo: Path):
|
|||
assert "not found" in result.message.lower()
|
||||
|
||||
|
||||
@pytest.mark.skipif(shutil.which("rg") is None, reason="rg is required for folder listing tests")
|
||||
def test_folder_listing_handles_non_utf8_filenames(tmp_path: Path):
|
||||
from agent.context_references import preprocess_context_references
|
||||
|
||||
workspace = tmp_path / "repo"
|
||||
workspace.mkdir()
|
||||
(workspace / "visible.txt").write_text("hello\n", encoding="utf-8")
|
||||
|
||||
bad_path = os.fsencode(workspace) + b"/bad-\xff.txt"
|
||||
fd = os.open(bad_path, os.O_WRONLY | os.O_CREAT, 0o644)
|
||||
try:
|
||||
os.write(fd, b"x")
|
||||
finally:
|
||||
os.close(fd)
|
||||
|
||||
result = preprocess_context_references(
|
||||
"Check @folder:.",
|
||||
cwd=workspace,
|
||||
context_length=100_000,
|
||||
)
|
||||
|
||||
assert result.expanded
|
||||
assert "visible.txt" in result.message
|
||||
assert "bad-" in result.message
|
||||
assert not result.blocked
|
||||
|
||||
|
||||
def test_soft_budget_warns_and_hard_budget_refuses(sample_repo: Path):
|
||||
from agent.context_references import preprocess_context_references
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
"""Tests for file path autocomplete in the CLI completer."""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
|
@ -163,6 +164,30 @@ class TestIntegration:
|
|||
# /etc/hosts should exist on Linux
|
||||
assert any("host" in n.lower() for n in names)
|
||||
|
||||
@pytest.mark.skipif(shutil.which("rg") is None, reason="rg is required for project file cache tests")
|
||||
def test_bare_at_completion_handles_non_utf8_filenames(self, completer, tmp_path):
|
||||
good = tmp_path / "good.txt"
|
||||
good.write_text("ok", encoding="utf-8")
|
||||
|
||||
bad_path = os.fsencode(tmp_path) + b"/bad-\xff.txt"
|
||||
fd = os.open(bad_path, os.O_WRONLY | os.O_CREAT, 0o644)
|
||||
try:
|
||||
os.write(fd, b"x")
|
||||
finally:
|
||||
os.close(fd)
|
||||
|
||||
old_cwd = os.getcwd()
|
||||
os.chdir(tmp_path)
|
||||
try:
|
||||
doc = Document("@", cursor_position=1)
|
||||
event = MagicMock()
|
||||
completions = list(completer.get_completions(doc, event))
|
||||
texts = [completion.text for completion in completions]
|
||||
assert "@file:good.txt" in texts
|
||||
assert any(text.startswith("@file:bad-") for text in texts)
|
||||
finally:
|
||||
os.chdir(old_cwd)
|
||||
|
||||
|
||||
class TestFileSizeLabel:
|
||||
def test_bytes(self, tmp_path):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue