mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
fix(search): sanitize ":" in FTS5 queries so colon searches don't silently return empty
":" is FTS5's column-filter operator. With a single-column "content" FTS table, an unquoted query like "TODO: fix" parses as "column:term" and raises "no such column: TODO". search_messages() catches that OperationalError at the execute site and returns [], so colon queries silently yield zero hits even when the content is present. This hits both the session_search tool and the dashboard search. Add ":" to the Step 2 metacharacter strip in _sanitize_fts5_query(), mirroring how the other FTS5 syntax characters are already stripped. Colons inside quoted phrases are preserved (Step 1 protects them). Adds a regression test asserting a colon query still finds matching content, plus unit assertions on the sanitizer.
This commit is contained in:
parent
e8c837c921
commit
d1771114ed
2 changed files with 35 additions and 5 deletions
|
|
@ -2709,9 +2709,10 @@ class SessionDB:
|
|||
"""Sanitize user input for safe use in FTS5 MATCH queries.
|
||||
|
||||
FTS5 has its own query syntax where characters like ``"``, ``(``, ``)``,
|
||||
``+``, ``*``, ``{``, ``}`` and bare boolean operators (``AND``, ``OR``,
|
||||
``NOT``) have special meaning. Passing raw user input directly to
|
||||
MATCH can cause ``sqlite3.OperationalError``.
|
||||
``+``, ``*``, ``{``, ``}``, the column-filter operator ``:`` and bare
|
||||
boolean operators (``AND``, ``OR``, ``NOT``) have special meaning.
|
||||
Passing raw user input directly to MATCH can cause
|
||||
``sqlite3.OperationalError``.
|
||||
|
||||
Strategy:
|
||||
- Preserve properly paired quoted phrases (``"exact phrase"``)
|
||||
|
|
@ -2730,8 +2731,12 @@ class SessionDB:
|
|||
|
||||
sanitized = re.sub(r'"[^"]*"', _preserve_quoted, query)
|
||||
|
||||
# Step 2: Strip remaining (unmatched) FTS5-special characters
|
||||
sanitized = re.sub(r'[+{}()\"^]', " ", sanitized)
|
||||
# Step 2: Strip remaining (unmatched) FTS5-special characters. ``:`` is
|
||||
# FTS5's column-filter operator (``col:term``); since the FTS table has a
|
||||
# single ``content`` column, an unquoted colon query like ``TODO: fix``
|
||||
# parses as ``column:term`` and raises "no such column" — swallowed at
|
||||
# the execute site into zero results. Strip it like the others.
|
||||
sanitized = re.sub(r'[+{}():\"^]', " ", sanitized)
|
||||
|
||||
# Step 3: Collapse repeated * (e.g. "***") into a single one,
|
||||
# and remove leading * (prefix-only needs at least one char before *)
|
||||
|
|
|
|||
|
|
@ -934,6 +934,27 @@ class TestFTS5Search:
|
|||
assert isinstance(results2, list)
|
||||
assert len(results2) >= 1
|
||||
|
||||
def test_search_colon_query_still_finds_content(self, db):
|
||||
"""Queries containing ':' must not silently return empty.
|
||||
|
||||
':' is FTS5's column-filter operator. With a single-column FTS table an
|
||||
unquoted query like 'TODO: fix' parses as 'column:term', raises
|
||||
"no such column: TODO", and the swallowed error turns into zero results
|
||||
even though the content is present. Regression for that silent-empty bug.
|
||||
"""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message("s1", role="user", content="TODO fix the deployment script")
|
||||
|
||||
# Control: the same content is found without the colon.
|
||||
assert len(db.search_messages("deployment")) >= 1
|
||||
|
||||
# The colon query must find the message, not silently return [].
|
||||
results = db.search_messages("TODO: fix")
|
||||
assert isinstance(results, list)
|
||||
assert len(results) >= 1
|
||||
assert any("deployment" in (r.get("snippet") or r.get("content", "")).lower()
|
||||
for r in results)
|
||||
|
||||
def test_search_quoted_phrase_preserved(self, db):
|
||||
"""User-provided quoted phrases should be preserved for exact matching."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
|
|
@ -963,6 +984,10 @@ class TestFTS5Search:
|
|||
assert s('***') == ''
|
||||
# Valid prefix kept
|
||||
assert s('deploy*') == 'deploy*'
|
||||
# Colon (FTS5 column-filter operator) stripped, both terms preserved
|
||||
assert ':' not in s('TODO: fix')
|
||||
assert s('TODO: fix').split() == ['TODO', 'fix']
|
||||
assert ':' not in s('error:timeout')
|
||||
|
||||
def test_sanitize_fts5_preserves_quoted_phrases(self):
|
||||
"""Properly paired double-quoted phrases should be preserved."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue