From 8d76d69d482c71fdf19762fa6b8239a891c54b8e Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 28 Apr 2026 01:30:06 -0700 Subject: [PATCH] fix(state): repair FTS5 delete trigger and add v11 migration for tool-call indexing Follow-up on top of the cherry-picked contributor commit for #16751: 1. Delete triggers: the original PR switched FTS5 from external to inline content mode and concatenated content || tool_name || tool_calls in the insert/update triggers, but left the delete triggers passing old.content to the FTS5 delete-command. FTS5 inline delete requires the content to match what was stored, so every DELETE on messages raised 'SQL logic error'. Replaced with plain DELETE FROM ... WHERE rowid = old.id on all four delete paths (normal + trigram, delete + update-delete). 2. v11 migration: existing DBs have the old external-content FTS tables and triggers. Because CREATE VIRTUAL TABLE IF NOT EXISTS / CREATE TRIGGER IF NOT EXISTS skip when the objects already exist, upgraders would have kept the broken behavior forever. Bumped SCHEMA_VERSION to 11 and added a migration that drops both FTS tables + all 6 old triggers, recreates them via FTS_SQL / FTS_TRIGRAM_SQL, and backfills from messages using the same concatenation expression. 3. Regression tests: 6 new tests cover INSERT / UPDATE / DELETE paths for tool_name + tool_calls indexing plus the full v10 -> v11 upgrade path on a hand-built legacy DB. --- hermes_state.py | 55 +++++++++- tests/test_hermes_state.py | 199 ++++++++++++++++++++++++++++++++++++- 2 files changed, 247 insertions(+), 7 deletions(-) diff --git a/hermes_state.py b/hermes_state.py index f1f44fd093..55895d4845 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -33,7 +33,7 @@ T = TypeVar("T") DEFAULT_DB_PATH = get_hermes_home() / "state.db" -SCHEMA_VERSION = 10 +SCHEMA_VERSION = 11 SCHEMA_SQL = """ CREATE TABLE IF NOT EXISTS schema_version ( @@ -113,11 +113,11 @@ CREATE TRIGGER IF NOT EXISTS messages_fts_insert AFTER INSERT ON messages BEGIN END; CREATE TRIGGER IF NOT EXISTS messages_fts_delete AFTER DELETE ON messages BEGIN - INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content); + DELETE FROM messages_fts WHERE rowid = old.id; END; CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN - INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content); + DELETE FROM messages_fts WHERE rowid = old.id; INSERT INTO messages_fts(rowid, content) VALUES ( new.id, COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '') @@ -143,11 +143,11 @@ CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_insert AFTER INSERT ON message END; CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_delete AFTER DELETE ON messages BEGIN - INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content); + DELETE FROM messages_fts_trigram WHERE rowid = old.id; END; CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_update AFTER UPDATE ON messages BEGIN - INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content); + DELETE FROM messages_fts_trigram WHERE rowid = old.id; INSERT INTO messages_fts_trigram(rowid, content) VALUES ( new.id, COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '') @@ -436,6 +436,51 @@ class SessionDB: "INSERT INTO messages_fts_trigram(rowid, content) " "SELECT id, content FROM messages WHERE content IS NOT NULL" ) + if current_version < 11: + # v11: re-index FTS5 tables to cover tool_name + tool_calls and + # switch from external-content to inline mode. Existing DBs have + # old-schema FTS tables and triggers that IF NOT EXISTS won't + # overwrite, so we drop them explicitly and let the post-migration + # existence checks (below) recreate them from FTS_SQL / + # FTS_TRIGRAM_SQL, then backfill every message row. Fixes #16751. + for _trig in ( + "messages_fts_insert", + "messages_fts_delete", + "messages_fts_update", + "messages_fts_trigram_insert", + "messages_fts_trigram_delete", + "messages_fts_trigram_update", + ): + try: + cursor.execute(f"DROP TRIGGER IF EXISTS {_trig}") + except sqlite3.OperationalError: + pass + for _tbl in ("messages_fts", "messages_fts_trigram"): + try: + cursor.execute(f"DROP TABLE IF EXISTS {_tbl}") + except sqlite3.OperationalError: + pass + # Recreate virtual tables + triggers with the new inline-mode + # schema that indexes content || tool_name || tool_calls. + cursor.executescript(FTS_SQL) + cursor.executescript(FTS_TRIGRAM_SQL) + # Backfill both indexes from every existing messages row. + cursor.execute( + "INSERT INTO messages_fts(rowid, content) " + "SELECT id, " + "COALESCE(content, '') || ' ' || " + "COALESCE(tool_name, '') || ' ' || " + "COALESCE(tool_calls, '') " + "FROM messages" + ) + cursor.execute( + "INSERT INTO messages_fts_trigram(rowid, content) " + "SELECT id, " + "COALESCE(content, '') || ' ' || " + "COALESCE(tool_name, '') || ' ' || " + "COALESCE(tool_calls, '') " + "FROM messages" + ) if current_version < SCHEMA_VERSION: cursor.execute( "UPDATE schema_version SET version = ?", diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index 244b87c12f..15a57a83ce 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -1316,7 +1316,7 @@ class TestSchemaInit: def test_schema_version(self, db): cursor = db._conn.execute("SELECT version FROM schema_version") version = cursor.fetchone()[0] - assert version == 10 + assert version == 11 def test_title_column_exists(self, db): """Verify the title column was created in the sessions table.""" @@ -1377,7 +1377,7 @@ class TestSchemaInit: # Verify migration cursor = migrated_db._conn.execute("SELECT version FROM schema_version") - assert cursor.fetchone()[0] == 10 + assert cursor.fetchone()[0] == 11 # Verify title column exists and is NULL for existing sessions session = migrated_db.get_session("existing") @@ -2290,3 +2290,198 @@ class TestAutoMaintenance: assert not (sessions_dir / "old.jsonl").exists() assert (sessions_dir / "active.jsonl").exists() + +# ========================================================================= +# FTS5 indexing of tool_calls / tool_name (#16751) +# ========================================================================= + +class TestFTS5ToolCallIndexing: + """Regression tests: search_messages must see tool_name and tool_calls. + + Before #16751's fix, `messages_fts` only indexed `messages.content`, so + tokens that only appeared in `tool_name` or the serialized `tool_calls` + JSON were invisible to session_search even though the row was in the DB. + """ + + def test_tool_name_is_searchable(self, db): + db.create_session(session_id="s1", source="cli") + db.append_message( + "s1", role="assistant", content="", + tool_name="UNIQUETOOLNAME", + ) + results = db.search_messages("UNIQUETOOLNAME") + assert len(results) == 1 + + def test_tool_calls_args_are_searchable(self, db): + db.create_session(session_id="s1", source="cli") + db.append_message( + "s1", role="assistant", content="", + tool_calls=[{ + "id": "c1", + "type": "function", + "function": { + "name": "web_search", + "arguments": '{"query": "UNIQUESEARCHTOKEN"}', + }, + }], + ) + results = db.search_messages("UNIQUESEARCHTOKEN") + assert len(results) == 1 + + def test_tool_function_name_in_tool_calls_is_searchable(self, db): + db.create_session(session_id="s1", source="cli") + db.append_message( + "s1", role="assistant", content="", + tool_calls=[{ + "id": "c1", + "type": "function", + "function": {"name": "UNIQUEFUNCNAME", "arguments": "{}"}, + }], + ) + results = db.search_messages("UNIQUEFUNCNAME") + assert len(results) == 1 + + def test_delete_message_row_does_not_crash(self, db): + """DELETE on messages must not raise when FTS rows reference tool fields. + + Previously the messages_fts_delete trigger passed old.content to the + FTS5 delete-command but the inserted row was the concatenation of + content || tool_name || tool_calls, so FTS5 rejected the delete with + 'SQL logic error' and every session delete path broke. + """ + db.create_session(session_id="s1", source="cli") + db.append_message( + "s1", role="assistant", content="hello", + tool_name="web_search", + tool_calls=[{ + "id": "c1", + "type": "function", + "function": {"name": "web_search", "arguments": '{"q": "x"}'}, + }], + ) + # end_session + end-time prune path would exercise DELETE; hit the + # row directly through the write helper to keep the regression focused. + def _delete(conn): + conn.execute("DELETE FROM messages WHERE session_id = ?", ("s1",)) + db._execute_write(_delete) # must not raise + + assert db.search_messages("hello") == [] + assert db.search_messages("web_search") == [] + + def test_update_message_reindexes_tool_fields(self, db): + """UPDATE must refresh the FTS row so old tokens drop out and new tokens appear.""" + db.create_session(session_id="s1", source="cli") + db.append_message( + "s1", role="assistant", content="", + tool_name="ORIGINALTOOL", + ) + assert len(db.search_messages("ORIGINALTOOL")) == 1 + + def _update(conn): + conn.execute( + "UPDATE messages SET tool_name = ? WHERE session_id = ?", + ("RENAMEDTOOL", "s1"), + ) + db._execute_write(_update) + + assert db.search_messages("ORIGINALTOOL") == [] + assert len(db.search_messages("RENAMEDTOOL")) == 1 + + +class TestFTS5ToolCallMigration: + """v11 migration: pre-existing state.db with old external-content FTS tables + must be re-indexed so tool_name / tool_calls become searchable after upgrade.""" + + def test_v10_to_v11_upgrade_backfills_tool_fields(self, tmp_path): + """Simulate an existing user: build a v10-shaped DB by hand, insert a + row with tool_calls, then open via SessionDB (which runs migrations). + After upgrade, the tool_calls token must be searchable.""" + import sqlite3 + + db_path = tmp_path / "legacy.db" + + # Build the pre-v11 schema by hand: external-content FTS tables + + # old triggers that only reference new.content. + conn = sqlite3.connect(str(db_path)) + conn.executescript(""" + CREATE TABLE schema_version (version INTEGER NOT NULL); + INSERT INTO schema_version (version) VALUES (10); + + CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + source TEXT, + started_at REAL, + ended_at REAL, + title TEXT, + parent_session_id TEXT, + message_count INTEGER DEFAULT 0, + tool_call_count INTEGER DEFAULT 0, + api_call_count INTEGER DEFAULT 0 + ); + CREATE TABLE messages ( + id INTEGER PRIMARY KEY, + session_id TEXT NOT NULL, + timestamp REAL NOT NULL, + role TEXT NOT NULL, + content TEXT, + tool_name TEXT, + tool_calls TEXT, + tool_call_id TEXT, + token_count INTEGER, + finish_reason TEXT, + reasoning TEXT, + reasoning_content TEXT, + reasoning_details TEXT, + codex_reasoning_items TEXT, + codex_message_items TEXT + ); + + CREATE VIRTUAL TABLE messages_fts USING fts5( + content, content=messages, content_rowid=id + ); + CREATE TRIGGER messages_fts_insert AFTER INSERT ON messages BEGIN + INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content); + END; + + CREATE VIRTUAL TABLE messages_fts_trigram USING fts5( + content, content=messages, content_rowid=id, tokenize='trigram' + ); + CREATE TRIGGER messages_fts_trigram_insert AFTER INSERT ON messages BEGIN + INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content); + END; + """) + conn.execute( + "INSERT INTO sessions (id, source, started_at) VALUES (?, ?, ?)", + ("s1", "cli", time.time()), + ) + conn.execute( + "INSERT INTO messages (session_id, timestamp, role, content, tool_name, tool_calls) " + "VALUES (?, ?, ?, ?, ?, ?)", + ("s1", time.time(), "assistant", "", "LEGACYTOOL", + '{"function":{"name":"web_search","arguments":"{\\"q\\":\\"LEGACYARG\\"}"}}'), + ) + conn.commit() + + # Verify the legacy FTS rows don't contain the tool tokens yet. + legacy_hits = conn.execute( + "SELECT rowid FROM messages_fts WHERE messages_fts MATCH 'LEGACYTOOL'" + ).fetchall() + assert legacy_hits == [], "sanity: legacy FTS must NOT contain tool_name" + conn.close() + + # Now open via SessionDB — migration runs. + session_db = SessionDB(db_path=db_path) + try: + assert len(session_db.search_messages("LEGACYTOOL")) == 1, \ + "v11 migration must backfill tool_name into FTS" + assert len(session_db.search_messages("LEGACYARG")) == 1, \ + "v11 migration must backfill tool_calls JSON into FTS" + # schema_version bumped + row = session_db._conn.execute( + "SELECT version FROM schema_version LIMIT 1" + ).fetchone() + version = row["version"] if hasattr(row, "keys") else row[0] + assert version == 11 + finally: + session_db.close() +