diff --git a/hermes_state.py b/hermes_state.py index 19c6a269b99..f54fbbd6af5 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -772,7 +772,18 @@ class SessionDB: @staticmethod def _is_fts5_unavailable_error(exc: sqlite3.OperationalError) -> bool: err = str(exc).lower() - return "no such module" in err and "fts5" in err + if "no such module" in err and "fts5" in err: + return True + # SQLite builds that have FTS5 but lack the optional trigram tokenizer + # raise "no such tokenizer: trigram" instead of "no such module". + if "no such tokenizer" in err: + return True + return False + + @staticmethod + def _is_tokenizer_unavailable_error(exc: sqlite3.OperationalError) -> bool: + """Check if the error is about a specific tokenizer (not the whole FTS5 module).""" + return "no such tokenizer" in str(exc).lower() def _warn_fts5_unavailable(self, exc: sqlite3.OperationalError) -> None: self._fts_enabled = False @@ -844,7 +855,9 @@ class SessionDB: return True except sqlite3.OperationalError as exc: if self._is_fts5_unavailable_error(exc): - self._warn_fts5_unavailable(exc) + # Only disable FTS entirely when the whole module is missing. + if not self._is_tokenizer_unavailable_error(exc): + self._warn_fts5_unavailable(exc) return None if "no such table" in str(exc).lower(): return False @@ -868,7 +881,11 @@ class SessionDB: except sqlite3.OperationalError as exc: if not self._is_fts5_unavailable_error(exc): raise - self._warn_fts5_unavailable(exc) + # Only disable FTS entirely when the whole FTS5 module is missing. + # A missing specific tokenizer (e.g. trigram) means only that + # particular table cannot be created — the base FTS5 table is fine. + if not self._is_tokenizer_unavailable_error(exc): + self._warn_fts5_unavailable(exc) return False def _execute_write(self, fn: Callable[[sqlite3.Connection], T]) -> T: @@ -1166,7 +1183,8 @@ class SessionDB: except sqlite3.OperationalError as exc: if not self._is_fts5_unavailable_error(exc): raise - self._warn_fts5_unavailable(exc) + if not self._is_tokenizer_unavailable_error(exc): + self._warn_fts5_unavailable(exc) fts5_available = False fts_migrations_complete = False break diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index 3644308401f..4bdc12d4642 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -50,6 +50,20 @@ class _NoFtsExistingTableConnection(sqlite3.Connection): return super().cursor(factory or _NoFtsExistingTableCursor) +class _NoTrigramCursor(sqlite3.Cursor): + """Simulate a SQLite build with FTS5 but without the trigram tokenizer.""" + + def executescript(self, sql_script): + if "tokenize='trigram'" in sql_script: + raise sqlite3.OperationalError("no such tokenizer: trigram") + return super().executescript(sql_script) + + +class _NoTrigramConnection(sqlite3.Connection): + def cursor(self, factory=None): + return super().cursor(factory or _NoTrigramCursor) + + @pytest.fixture() def db(tmp_path): """Create a SessionDB with a temp database file.""" @@ -330,6 +344,46 @@ class TestSessionLifecycle: finally: restored.close() + def test_is_fts5_unavailable_error_catches_trigram_tokenizer(self): + """Unit test: _is_fts5_unavailable_error matches 'no such tokenizer'.""" + fts5_err = sqlite3.OperationalError("no such module: fts5") + trigram_err = sqlite3.OperationalError("no such tokenizer: trigram") + unrelated_err = sqlite3.OperationalError("no such table: foo") + + assert SessionDB._is_fts5_unavailable_error(fts5_err) is True + assert SessionDB._is_fts5_unavailable_error(trigram_err) is True + assert SessionDB._is_fts5_unavailable_error(unrelated_err) is False + + def test_db_initializes_without_trigram_tokenizer(self, tmp_path, monkeypatch): + """SessionDB must not crash when FTS5 exists but trigram tokenizer is missing.""" + real_connect = sqlite3.connect + + def connect_without_trigram(*args, **kwargs): + kwargs["factory"] = _NoTrigramConnection + return real_connect(*args, **kwargs) + + monkeypatch.setattr("hermes_state.sqlite3.connect", connect_without_trigram) + + db = SessionDB(db_path=tmp_path / "state.db") + try: + # Base FTS5 should still work (trigram is optional). + assert db._fts_enabled is True + assert db._fts_table_exists("messages_fts") is True + # Trigram table should NOT have been created. + assert db._fts_table_exists("messages_fts_trigram") is False + + db.create_session(session_id="s1", source="cli") + db.append_message("s1", role="user", content="hello without trigram") + + messages = db.get_messages("s1") + assert len(messages) == 1 + assert messages[0]["content"] == "hello without trigram" + + # FTS5 keyword search should still work. + assert len(db.search_messages("hello")) == 1 + finally: + db.close() + # ========================================================================= # Message storage