fix(search): sanitize ":" in FTS5 queries so colon searches don't silently return empty

":" is FTS5's column-filter operator. With a single-column "content" FTS table,
an unquoted query like "TODO: fix" parses as "column:term" and raises
"no such column: TODO". search_messages() catches that OperationalError at the
execute site and returns [], so colon queries silently yield zero hits even when
the content is present. This hits both the session_search tool and the dashboard
search.

Add ":" to the Step 2 metacharacter strip in _sanitize_fts5_query(), mirroring
how the other FTS5 syntax characters are already stripped. Colons inside quoted
phrases are preserved (Step 1 protects them). Adds a regression test asserting a
colon query still finds matching content, plus unit assertions on the sanitizer.
This commit is contained in:
Dusk1e 2026-06-06 17:37:31 +03:00 committed by Teknium
parent e8c837c921
commit d1771114ed
2 changed files with 35 additions and 5 deletions

View file

@ -2709,9 +2709,10 @@ class SessionDB:
"""Sanitize user input for safe use in FTS5 MATCH queries.
FTS5 has its own query syntax where characters like ``"``, ``(``, ``)``,
``+``, ``*``, ``{``, ``}`` and bare boolean operators (``AND``, ``OR``,
``NOT``) have special meaning. Passing raw user input directly to
MATCH can cause ``sqlite3.OperationalError``.
``+``, ``*``, ``{``, ``}``, the column-filter operator ``:`` and bare
boolean operators (``AND``, ``OR``, ``NOT``) have special meaning.
Passing raw user input directly to MATCH can cause
``sqlite3.OperationalError``.
Strategy:
- Preserve properly paired quoted phrases (``"exact phrase"``)
@ -2730,8 +2731,12 @@ class SessionDB:
sanitized = re.sub(r'"[^"]*"', _preserve_quoted, query)
# Step 2: Strip remaining (unmatched) FTS5-special characters
sanitized = re.sub(r'[+{}()\"^]', " ", sanitized)
# Step 2: Strip remaining (unmatched) FTS5-special characters. ``:`` is
# FTS5's column-filter operator (``col:term``); since the FTS table has a
# single ``content`` column, an unquoted colon query like ``TODO: fix``
# parses as ``column:term`` and raises "no such column" — swallowed at
# the execute site into zero results. Strip it like the others.
sanitized = re.sub(r'[+{}():\"^]', " ", sanitized)
# Step 3: Collapse repeated * (e.g. "***") into a single one,
# and remove leading * (prefix-only needs at least one char before *)

View file

@ -934,6 +934,27 @@ class TestFTS5Search:
assert isinstance(results2, list)
assert len(results2) >= 1
def test_search_colon_query_still_finds_content(self, db):
"""Queries containing ':' must not silently return empty.
':' is FTS5's column-filter operator. With a single-column FTS table an
unquoted query like 'TODO: fix' parses as 'column:term', raises
"no such column: TODO", and the swallowed error turns into zero results
even though the content is present. Regression for that silent-empty bug.
"""
db.create_session(session_id="s1", source="cli")
db.append_message("s1", role="user", content="TODO fix the deployment script")
# Control: the same content is found without the colon.
assert len(db.search_messages("deployment")) >= 1
# The colon query must find the message, not silently return [].
results = db.search_messages("TODO: fix")
assert isinstance(results, list)
assert len(results) >= 1
assert any("deployment" in (r.get("snippet") or r.get("content", "")).lower()
for r in results)
def test_search_quoted_phrase_preserved(self, db):
"""User-provided quoted phrases should be preserved for exact matching."""
db.create_session(session_id="s1", source="cli")
@ -963,6 +984,10 @@ class TestFTS5Search:
assert s('***') == ''
# Valid prefix kept
assert s('deploy*') == 'deploy*'
# Colon (FTS5 column-filter operator) stripped, both terms preserved
assert ':' not in s('TODO: fix')
assert s('TODO: fix').split() == ['TODO', 'fix']
assert ':' not in s('error:timeout')
def test_sanitize_fts5_preserves_quoted_phrases(self):
"""Properly paired double-quoted phrases should be preserved."""