fix(agent): handle missing trigram tokenizer without disabling FTS5

_is_fts5_unavailable_error only matched 'no such module: fts5', but
SQLite builds that ship FTS5 without the optional trigram tokenizer
raise 'no such tokenizer: trigram' instead. This caused SessionDB init
to crash on those builds.

Additionally, the trigram failure path called _warn_fts5_unavailable
which set _fts_enabled = False, globally disabling full-text search
even though the base FTS5 table was created successfully.

Fix:
- Extend _is_fts5_unavailable_error to also match 'no such tokenizer'
- Add _is_tokenizer_unavailable_error to distinguish tokenizer-specific
  failures from whole-module absence
- Only call _warn_fts5_unavailable for module-level failures; skip it
  for tokenizer-specific failures so base FTS5 remains usable

Fixes #47002
This commit is contained in:
liuhao1024 2026-06-16 12:13:39 +08:00 committed by Teknium
parent 2c6e266e88
commit 0403f41f9c
2 changed files with 76 additions and 4 deletions

View file

@ -772,7 +772,18 @@ class SessionDB:
@staticmethod
def _is_fts5_unavailable_error(exc: sqlite3.OperationalError) -> bool:
err = str(exc).lower()
return "no such module" in err and "fts5" in err
if "no such module" in err and "fts5" in err:
return True
# SQLite builds that have FTS5 but lack the optional trigram tokenizer
# raise "no such tokenizer: trigram" instead of "no such module".
if "no such tokenizer" in err:
return True
return False
@staticmethod
def _is_tokenizer_unavailable_error(exc: sqlite3.OperationalError) -> bool:
"""Check if the error is about a specific tokenizer (not the whole FTS5 module)."""
return "no such tokenizer" in str(exc).lower()
def _warn_fts5_unavailable(self, exc: sqlite3.OperationalError) -> None:
self._fts_enabled = False
@ -844,7 +855,9 @@ class SessionDB:
return True
except sqlite3.OperationalError as exc:
if self._is_fts5_unavailable_error(exc):
self._warn_fts5_unavailable(exc)
# Only disable FTS entirely when the whole module is missing.
if not self._is_tokenizer_unavailable_error(exc):
self._warn_fts5_unavailable(exc)
return None
if "no such table" in str(exc).lower():
return False
@ -868,7 +881,11 @@ class SessionDB:
except sqlite3.OperationalError as exc:
if not self._is_fts5_unavailable_error(exc):
raise
self._warn_fts5_unavailable(exc)
# Only disable FTS entirely when the whole FTS5 module is missing.
# A missing specific tokenizer (e.g. trigram) means only that
# particular table cannot be created — the base FTS5 table is fine.
if not self._is_tokenizer_unavailable_error(exc):
self._warn_fts5_unavailable(exc)
return False
def _execute_write(self, fn: Callable[[sqlite3.Connection], T]) -> T:
@ -1166,7 +1183,8 @@ class SessionDB:
except sqlite3.OperationalError as exc:
if not self._is_fts5_unavailable_error(exc):
raise
self._warn_fts5_unavailable(exc)
if not self._is_tokenizer_unavailable_error(exc):
self._warn_fts5_unavailable(exc)
fts5_available = False
fts_migrations_complete = False
break

View file

@ -50,6 +50,20 @@ class _NoFtsExistingTableConnection(sqlite3.Connection):
return super().cursor(factory or _NoFtsExistingTableCursor)
class _NoTrigramCursor(sqlite3.Cursor):
"""Simulate a SQLite build with FTS5 but without the trigram tokenizer."""
def executescript(self, sql_script):
if "tokenize='trigram'" in sql_script:
raise sqlite3.OperationalError("no such tokenizer: trigram")
return super().executescript(sql_script)
class _NoTrigramConnection(sqlite3.Connection):
def cursor(self, factory=None):
return super().cursor(factory or _NoTrigramCursor)
@pytest.fixture()
def db(tmp_path):
"""Create a SessionDB with a temp database file."""
@ -330,6 +344,46 @@ class TestSessionLifecycle:
finally:
restored.close()
def test_is_fts5_unavailable_error_catches_trigram_tokenizer(self):
"""Unit test: _is_fts5_unavailable_error matches 'no such tokenizer'."""
fts5_err = sqlite3.OperationalError("no such module: fts5")
trigram_err = sqlite3.OperationalError("no such tokenizer: trigram")
unrelated_err = sqlite3.OperationalError("no such table: foo")
assert SessionDB._is_fts5_unavailable_error(fts5_err) is True
assert SessionDB._is_fts5_unavailable_error(trigram_err) is True
assert SessionDB._is_fts5_unavailable_error(unrelated_err) is False
def test_db_initializes_without_trigram_tokenizer(self, tmp_path, monkeypatch):
"""SessionDB must not crash when FTS5 exists but trigram tokenizer is missing."""
real_connect = sqlite3.connect
def connect_without_trigram(*args, **kwargs):
kwargs["factory"] = _NoTrigramConnection
return real_connect(*args, **kwargs)
monkeypatch.setattr("hermes_state.sqlite3.connect", connect_without_trigram)
db = SessionDB(db_path=tmp_path / "state.db")
try:
# Base FTS5 should still work (trigram is optional).
assert db._fts_enabled is True
assert db._fts_table_exists("messages_fts") is True
# Trigram table should NOT have been created.
assert db._fts_table_exists("messages_fts_trigram") is False
db.create_session(session_id="s1", source="cli")
db.append_message("s1", role="user", content="hello without trigram")
messages = db.get_messages("s1")
assert len(messages) == 1
assert messages[0]["content"] == "hello without trigram"
# FTS5 keyword search should still work.
assert len(db.search_messages("hello")) == 1
finally:
db.close()
# =========================================================================
# Message storage