mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix: warn on line-oriented newline search patterns
This commit is contained in:
parent
eb9a002284
commit
97563ab821
3 changed files with 107 additions and 5 deletions
|
|
@ -28,6 +28,7 @@ import pytest
|
|||
|
||||
from tools.file_operations import (
|
||||
ShellFileOperations,
|
||||
_pattern_has_regex_newline,
|
||||
_split_tool_diagnostics,
|
||||
)
|
||||
from tools.environments.local import LocalEnvironment
|
||||
|
|
@ -124,6 +125,63 @@ class TestSearchErrorGuard:
|
|||
assert res.total_count >= 4
|
||||
|
||||
|
||||
class TestSearchContentNewlineWarning:
|
||||
def test_odd_backslash_n_is_detected_as_regex_newline(self):
|
||||
assert _pattern_has_regex_newline(r"needle\n")
|
||||
assert _pattern_has_regex_newline(r"needle\\\n")
|
||||
|
||||
def test_even_backslash_n_is_literal_and_not_detected(self):
|
||||
assert not _pattern_has_regex_newline(r"needle\\n")
|
||||
assert not _pattern_has_regex_newline(r"needle\\\\n")
|
||||
|
||||
def test_zero_matches_with_regex_newline_adds_warning_not_error(self, match_tree):
|
||||
res = _ops(match_tree).search(
|
||||
r"absent\npattern",
|
||||
path=str(match_tree),
|
||||
target="content",
|
||||
context=2,
|
||||
)
|
||||
|
||||
assert res.error is None
|
||||
assert res.total_count == 0
|
||||
assert res.warning is not None
|
||||
assert "0 results found" in res.warning
|
||||
assert "-U/--multiline" in res.warning
|
||||
|
||||
def test_actual_newline_pattern_adds_warning_not_error(self, match_tree):
|
||||
res = _ops(match_tree).search(
|
||||
"absent\npattern",
|
||||
path=str(match_tree),
|
||||
target="content",
|
||||
)
|
||||
|
||||
assert res.error is None
|
||||
assert res.total_count == 0
|
||||
assert res.warning is not None
|
||||
|
||||
def test_search_with_matching_alternative_and_regex_newline_warns(self, match_tree):
|
||||
res = _ops(match_tree).search(
|
||||
r"needle|absent\npattern",
|
||||
path=str(match_tree),
|
||||
target="content",
|
||||
)
|
||||
|
||||
assert res.error is None
|
||||
assert res.total_count == 0
|
||||
assert res.warning is not None
|
||||
|
||||
def test_literal_backslash_n_pattern_does_not_warn(self, match_tree):
|
||||
res = _ops(match_tree).search(
|
||||
r"absent\\npattern",
|
||||
path=str(match_tree),
|
||||
target="content",
|
||||
)
|
||||
|
||||
assert res.error is None
|
||||
assert res.total_count == 0
|
||||
assert res.warning is None
|
||||
|
||||
|
||||
class TestSplitToolDiagnostics:
|
||||
"""Unit coverage for the shape-based diagnostic/payload splitter."""
|
||||
|
||||
|
|
|
|||
|
|
@ -242,6 +242,7 @@ class SearchResult:
|
|||
total_count: int = 0
|
||||
truncated: bool = False
|
||||
limit_reason: Optional[str] = None
|
||||
warning: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
# Densify content-mode matches into a path-grouped text block above this
|
||||
|
|
@ -302,6 +303,8 @@ class SearchResult:
|
|||
result["truncated"] = True
|
||||
if self.limit_reason:
|
||||
result["limit_reason"] = self.limit_reason
|
||||
if self.warning:
|
||||
result["warning"] = self.warning
|
||||
if self.error:
|
||||
result["error"] = self.error
|
||||
return result
|
||||
|
|
@ -719,6 +722,45 @@ def normalize_search_pagination(offset: Any = DEFAULT_SEARCH_OFFSET,
|
|||
return normalized_offset, normalized_limit
|
||||
|
||||
|
||||
_REGEX_NEWLINE_ESCAPE_RE = re.compile(r"(?<!\\)(?:\\\\)*\\n")
|
||||
|
||||
|
||||
def _pattern_has_regex_newline(pattern: str) -> bool:
|
||||
"""Return True when a content-search regex tries to match a newline.
|
||||
|
||||
``search_files`` runs rg/grep in line-oriented mode, not rg
|
||||
``-U``/``--multiline`` mode, so newline regexes cannot match across
|
||||
lines. Detect both a literal newline already decoded into the tool
|
||||
argument and a regex ``\n`` escape (odd number of backslashes before
|
||||
``n``). Even backslashes, e.g. ``\\n``, mean a literal backslash+n
|
||||
search and should not warn.
|
||||
"""
|
||||
return "\n" in pattern or bool(_REGEX_NEWLINE_ESCAPE_RE.search(pattern))
|
||||
|
||||
|
||||
def _is_line_oriented_newline_error(error: Optional[str]) -> bool:
|
||||
"""Return True for rg's hard error when multiline mode is required."""
|
||||
if not error:
|
||||
return False
|
||||
return "literal \"\\n\" is not allowed" in error and "--multiline" in error
|
||||
|
||||
|
||||
def _maybe_warn_line_oriented_newline_pattern(result: SearchResult, pattern: str) -> SearchResult:
|
||||
"""Attach a newline-regex warning only when search found no usable results."""
|
||||
if result.total_count != 0 or not _pattern_has_regex_newline(pattern):
|
||||
return result
|
||||
if result.error and not _is_line_oriented_newline_error(result.error):
|
||||
return result
|
||||
result.error = None
|
||||
result.warning = (
|
||||
"0 results found. Note: search_files content search is line-oriented "
|
||||
"and does not run ripgrep with -U/--multiline, so `\\n` in the regex "
|
||||
"does not match line breaks. Use context=N to inspect neighboring "
|
||||
"lines, or escape as `\\\\n` when searching for a literal backslash+n."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
class ShellFileOperations(FileOperations):
|
||||
"""
|
||||
File operations implemented via shell commands.
|
||||
|
|
@ -2117,17 +2159,19 @@ class ShellFileOperations(FileOperations):
|
|||
"""Search for content inside files (grep-like)."""
|
||||
# Try ripgrep first (fast), fallback to grep (slower but works)
|
||||
if self._has_command('rg'):
|
||||
return self._search_with_rg(pattern, path, file_glob, limit, offset,
|
||||
output_mode, context)
|
||||
elif self._has_command('grep'):
|
||||
return self._search_with_grep(pattern, path, file_glob, limit, offset,
|
||||
result = self._search_with_rg(pattern, path, file_glob, limit, offset,
|
||||
output_mode, context)
|
||||
elif self._has_command('grep'):
|
||||
result = self._search_with_grep(pattern, path, file_glob, limit, offset,
|
||||
output_mode, context)
|
||||
else:
|
||||
# Neither rg nor grep available (Windows without Git Bash, etc.)
|
||||
return SearchResult(
|
||||
error="Content search requires ripgrep (rg) or grep. "
|
||||
"Install ripgrep: https://github.com/BurntSushi/ripgrep#installation"
|
||||
)
|
||||
|
||||
return _maybe_warn_line_oriented_newline_pattern(result, pattern)
|
||||
|
||||
def _search_with_rg(self, pattern: str, path: str, file_glob: Optional[str],
|
||||
limit: int, offset: int, output_mode: str, context: int) -> SearchResult:
|
||||
|
|
|
|||
|
|
@ -1595,7 +1595,7 @@ PATCH_SCHEMA = {
|
|||
|
||||
SEARCH_FILES_SCHEMA = {
|
||||
"name": "search_files",
|
||||
"description": "Search file contents or find files by name. Use this instead of grep/rg/find/ls in terminal. Ripgrep-backed, faster than shell equivalents.\n\nContent search (target='content'): Regex search inside files. Output modes: full matches with line numbers, file paths only, or match counts. Content search is line-oriented: do not put \\n in regex patterns (even inside alternation); use context to inspect neighboring lines.\n\nFile search (target='files'): Find files by glob pattern (e.g., '*.py', '*config*'). Also use this instead of ls — results sorted by modification time.",
|
||||
"description": "Search file contents or find files by name. Use this instead of grep/rg/find/ls in terminal. Ripgrep-backed, faster than shell equivalents.\n\nContent search (target='content'): Regex search inside files. Output modes: full matches with line numbers, file paths only, or match counts.\n\nFile search (target='files'): Find files by glob pattern (e.g., '*.py', '*config*'). Also use this instead of ls — results sorted by modification time.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue