From d1771114eda9f4982f2dd204e617b752c2544f21 Mon Sep 17 00:00:00 2001 From: Dusk1e Date: Sat, 6 Jun 2026 17:37:31 +0300 Subject: [PATCH] fix(search): sanitize ":" in FTS5 queries so colon searches don't silently return empty ":" is FTS5's column-filter operator. With a single-column "content" FTS table, an unquoted query like "TODO: fix" parses as "column:term" and raises "no such column: TODO". search_messages() catches that OperationalError at the execute site and returns [], so colon queries silently yield zero hits even when the content is present. This hits both the session_search tool and the dashboard search. Add ":" to the Step 2 metacharacter strip in _sanitize_fts5_query(), mirroring how the other FTS5 syntax characters are already stripped. Colons inside quoted phrases are preserved (Step 1 protects them). Adds a regression test asserting a colon query still finds matching content, plus unit assertions on the sanitizer. --- hermes_state.py | 15 ++++++++++----- tests/test_hermes_state.py | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/hermes_state.py b/hermes_state.py index 256bcc8c448..5a6aa8e8a62 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -2709,9 +2709,10 @@ class SessionDB: """Sanitize user input for safe use in FTS5 MATCH queries. FTS5 has its own query syntax where characters like ``"``, ``(``, ``)``, - ``+``, ``*``, ``{``, ``}`` and bare boolean operators (``AND``, ``OR``, - ``NOT``) have special meaning. Passing raw user input directly to - MATCH can cause ``sqlite3.OperationalError``. + ``+``, ``*``, ``{``, ``}``, the column-filter operator ``:`` and bare + boolean operators (``AND``, ``OR``, ``NOT``) have special meaning. + Passing raw user input directly to MATCH can cause + ``sqlite3.OperationalError``. Strategy: - Preserve properly paired quoted phrases (``"exact phrase"``) @@ -2730,8 +2731,12 @@ class SessionDB: sanitized = re.sub(r'"[^"]*"', _preserve_quoted, query) - # Step 2: Strip remaining (unmatched) FTS5-special characters - sanitized = re.sub(r'[+{}()\"^]', " ", sanitized) + # Step 2: Strip remaining (unmatched) FTS5-special characters. ``:`` is + # FTS5's column-filter operator (``col:term``); since the FTS table has a + # single ``content`` column, an unquoted colon query like ``TODO: fix`` + # parses as ``column:term`` and raises "no such column" — swallowed at + # the execute site into zero results. Strip it like the others. + sanitized = re.sub(r'[+{}():\"^]', " ", sanitized) # Step 3: Collapse repeated * (e.g. "***") into a single one, # and remove leading * (prefix-only needs at least one char before *) diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index 8d0b55775a5..52eab1bd99a 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -934,6 +934,27 @@ class TestFTS5Search: assert isinstance(results2, list) assert len(results2) >= 1 + def test_search_colon_query_still_finds_content(self, db): + """Queries containing ':' must not silently return empty. + + ':' is FTS5's column-filter operator. With a single-column FTS table an + unquoted query like 'TODO: fix' parses as 'column:term', raises + "no such column: TODO", and the swallowed error turns into zero results + even though the content is present. Regression for that silent-empty bug. + """ + db.create_session(session_id="s1", source="cli") + db.append_message("s1", role="user", content="TODO fix the deployment script") + + # Control: the same content is found without the colon. + assert len(db.search_messages("deployment")) >= 1 + + # The colon query must find the message, not silently return []. + results = db.search_messages("TODO: fix") + assert isinstance(results, list) + assert len(results) >= 1 + assert any("deployment" in (r.get("snippet") or r.get("content", "")).lower() + for r in results) + def test_search_quoted_phrase_preserved(self, db): """User-provided quoted phrases should be preserved for exact matching.""" db.create_session(session_id="s1", source="cli") @@ -963,6 +984,10 @@ class TestFTS5Search: assert s('***') == '' # Valid prefix kept assert s('deploy*') == 'deploy*' + # Colon (FTS5 column-filter operator) stripped, both terms preserved + assert ':' not in s('TODO: fix') + assert s('TODO: fix').split() == ['TODO', 'fix'] + assert ':' not in s('error:timeout') def test_sanitize_fts5_preserves_quoted_phrases(self): """Properly paired double-quoted phrases should be preserved."""