fix(state): repair FTS5 delete trigger and add v11 migration for tool-call indexing

Follow-up on top of the cherry-picked contributor commit for #16751:

1. Delete triggers: the original PR switched FTS5 from external to inline
   content mode and concatenated content || tool_name || tool_calls in
   the insert/update triggers, but left the delete triggers passing
   old.content to the FTS5 delete-command. FTS5 inline delete requires
   the content to match what was stored, so every DELETE on messages
   raised 'SQL logic error'. Replaced with plain DELETE FROM ... WHERE
   rowid = old.id on all four delete paths (normal + trigram, delete +
   update-delete).

2. v11 migration: existing DBs have the old external-content FTS tables
   and triggers. Because CREATE VIRTUAL TABLE IF NOT EXISTS / CREATE
   TRIGGER IF NOT EXISTS skip when the objects already exist, upgraders
   would have kept the broken behavior forever. Bumped SCHEMA_VERSION
   to 11 and added a migration that drops both FTS tables + all 6 old
   triggers, recreates them via FTS_SQL / FTS_TRIGRAM_SQL, and backfills
   from messages using the same concatenation expression.

3. Regression tests: 6 new tests cover INSERT / UPDATE / DELETE paths
   for tool_name + tool_calls indexing plus the full v10 -> v11 upgrade
   path on a hand-built legacy DB.
This commit is contained in:
Teknium 2026-04-28 01:30:06 -07:00 committed by Teknium
parent cfcad80ee1
commit 8d76d69d48
2 changed files with 247 additions and 7 deletions

View file

@ -1316,7 +1316,7 @@ class TestSchemaInit:
def test_schema_version(self, db):
cursor = db._conn.execute("SELECT version FROM schema_version")
version = cursor.fetchone()[0]
assert version == 10
assert version == 11
def test_title_column_exists(self, db):
"""Verify the title column was created in the sessions table."""
@ -1377,7 +1377,7 @@ class TestSchemaInit:
# Verify migration
cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
assert cursor.fetchone()[0] == 10
assert cursor.fetchone()[0] == 11
# Verify title column exists and is NULL for existing sessions
session = migrated_db.get_session("existing")
@ -2290,3 +2290,198 @@ class TestAutoMaintenance:
assert not (sessions_dir / "old.jsonl").exists()
assert (sessions_dir / "active.jsonl").exists()
# =========================================================================
# FTS5 indexing of tool_calls / tool_name (#16751)
# =========================================================================
class TestFTS5ToolCallIndexing:
"""Regression tests: search_messages must see tool_name and tool_calls.
Before #16751's fix, `messages_fts` only indexed `messages.content`, so
tokens that only appeared in `tool_name` or the serialized `tool_calls`
JSON were invisible to session_search even though the row was in the DB.
"""
def test_tool_name_is_searchable(self, db):
db.create_session(session_id="s1", source="cli")
db.append_message(
"s1", role="assistant", content="",
tool_name="UNIQUETOOLNAME",
)
results = db.search_messages("UNIQUETOOLNAME")
assert len(results) == 1
def test_tool_calls_args_are_searchable(self, db):
db.create_session(session_id="s1", source="cli")
db.append_message(
"s1", role="assistant", content="",
tool_calls=[{
"id": "c1",
"type": "function",
"function": {
"name": "web_search",
"arguments": '{"query": "UNIQUESEARCHTOKEN"}',
},
}],
)
results = db.search_messages("UNIQUESEARCHTOKEN")
assert len(results) == 1
def test_tool_function_name_in_tool_calls_is_searchable(self, db):
db.create_session(session_id="s1", source="cli")
db.append_message(
"s1", role="assistant", content="",
tool_calls=[{
"id": "c1",
"type": "function",
"function": {"name": "UNIQUEFUNCNAME", "arguments": "{}"},
}],
)
results = db.search_messages("UNIQUEFUNCNAME")
assert len(results) == 1
def test_delete_message_row_does_not_crash(self, db):
"""DELETE on messages must not raise when FTS rows reference tool fields.
Previously the messages_fts_delete trigger passed old.content to the
FTS5 delete-command but the inserted row was the concatenation of
content || tool_name || tool_calls, so FTS5 rejected the delete with
'SQL logic error' and every session delete path broke.
"""
db.create_session(session_id="s1", source="cli")
db.append_message(
"s1", role="assistant", content="hello",
tool_name="web_search",
tool_calls=[{
"id": "c1",
"type": "function",
"function": {"name": "web_search", "arguments": '{"q": "x"}'},
}],
)
# end_session + end-time prune path would exercise DELETE; hit the
# row directly through the write helper to keep the regression focused.
def _delete(conn):
conn.execute("DELETE FROM messages WHERE session_id = ?", ("s1",))
db._execute_write(_delete) # must not raise
assert db.search_messages("hello") == []
assert db.search_messages("web_search") == []
def test_update_message_reindexes_tool_fields(self, db):
"""UPDATE must refresh the FTS row so old tokens drop out and new tokens appear."""
db.create_session(session_id="s1", source="cli")
db.append_message(
"s1", role="assistant", content="",
tool_name="ORIGINALTOOL",
)
assert len(db.search_messages("ORIGINALTOOL")) == 1
def _update(conn):
conn.execute(
"UPDATE messages SET tool_name = ? WHERE session_id = ?",
("RENAMEDTOOL", "s1"),
)
db._execute_write(_update)
assert db.search_messages("ORIGINALTOOL") == []
assert len(db.search_messages("RENAMEDTOOL")) == 1
class TestFTS5ToolCallMigration:
"""v11 migration: pre-existing state.db with old external-content FTS tables
must be re-indexed so tool_name / tool_calls become searchable after upgrade."""
def test_v10_to_v11_upgrade_backfills_tool_fields(self, tmp_path):
"""Simulate an existing user: build a v10-shaped DB by hand, insert a
row with tool_calls, then open via SessionDB (which runs migrations).
After upgrade, the tool_calls token must be searchable."""
import sqlite3
db_path = tmp_path / "legacy.db"
# Build the pre-v11 schema by hand: external-content FTS tables +
# old triggers that only reference new.content.
conn = sqlite3.connect(str(db_path))
conn.executescript("""
CREATE TABLE schema_version (version INTEGER NOT NULL);
INSERT INTO schema_version (version) VALUES (10);
CREATE TABLE sessions (
id TEXT PRIMARY KEY,
source TEXT,
started_at REAL,
ended_at REAL,
title TEXT,
parent_session_id TEXT,
message_count INTEGER DEFAULT 0,
tool_call_count INTEGER DEFAULT 0,
api_call_count INTEGER DEFAULT 0
);
CREATE TABLE messages (
id INTEGER PRIMARY KEY,
session_id TEXT NOT NULL,
timestamp REAL NOT NULL,
role TEXT NOT NULL,
content TEXT,
tool_name TEXT,
tool_calls TEXT,
tool_call_id TEXT,
token_count INTEGER,
finish_reason TEXT,
reasoning TEXT,
reasoning_content TEXT,
reasoning_details TEXT,
codex_reasoning_items TEXT,
codex_message_items TEXT
);
CREATE VIRTUAL TABLE messages_fts USING fts5(
content, content=messages, content_rowid=id
);
CREATE TRIGGER messages_fts_insert AFTER INSERT ON messages BEGIN
INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
END;
CREATE VIRTUAL TABLE messages_fts_trigram USING fts5(
content, content=messages, content_rowid=id, tokenize='trigram'
);
CREATE TRIGGER messages_fts_trigram_insert AFTER INSERT ON messages BEGIN
INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
END;
""")
conn.execute(
"INSERT INTO sessions (id, source, started_at) VALUES (?, ?, ?)",
("s1", "cli", time.time()),
)
conn.execute(
"INSERT INTO messages (session_id, timestamp, role, content, tool_name, tool_calls) "
"VALUES (?, ?, ?, ?, ?, ?)",
("s1", time.time(), "assistant", "", "LEGACYTOOL",
'{"function":{"name":"web_search","arguments":"{\\"q\\":\\"LEGACYARG\\"}"}}'),
)
conn.commit()
# Verify the legacy FTS rows don't contain the tool tokens yet.
legacy_hits = conn.execute(
"SELECT rowid FROM messages_fts WHERE messages_fts MATCH 'LEGACYTOOL'"
).fetchall()
assert legacy_hits == [], "sanity: legacy FTS must NOT contain tool_name"
conn.close()
# Now open via SessionDB — migration runs.
session_db = SessionDB(db_path=db_path)
try:
assert len(session_db.search_messages("LEGACYTOOL")) == 1, \
"v11 migration must backfill tool_name into FTS"
assert len(session_db.search_messages("LEGACYARG")) == 1, \
"v11 migration must backfill tool_calls JSON into FTS"
# schema_version bumped
row = session_db._conn.execute(
"SELECT version FROM schema_version LIMIT 1"
).fetchone()
version = row["version"] if hasattr(row, "keys") else row[0]
assert version == 11
finally:
session_db.close()