From 7552e0f3c0723f56c7ee8d5e2fe6c53c7a1f6ac3 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Tue, 19 May 2026 20:01:59 +0530
Subject: [PATCH] fix(kanban): also hoist idx_events_run + drop redundant inner
 create
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extends the previous commit to cover the remaining additive-column index
that sits on the same migration trap:

- ``task_events.run_id`` -> ``idx_events_run`` was still in SCHEMA_SQL.
  A legacy ``task_events`` table predating #17805 (no ``run_id``) would
  still abort ``executescript`` before ``_migrate_add_optional_columns``
  could add the column. Hoisted out of SCHEMA_SQL and made unconditional
  in the migration alongside the other three indexes.

- Removed the now-redundant ``CREATE INDEX idx_tasks_idempotency`` that
  was nested inside the ``if "idempotency_key" not in cols`` branch.
  The unconditional create lower in the function makes it idempotent
  on both fresh and legacy DBs.

- Strengthened the regression test to cover all four indexes
  (``idx_tasks_session_id``, ``idx_tasks_tenant``, ``idx_tasks_idempotency``,
  ``idx_events_run``) and to seed a pre-#17805 ``task_events`` shape that
  exercises the ``run_id`` migration path.

The result: every ``CREATE INDEX`` that depends on an additive column now
runs after the migration ensures the column exists. Verified against a
realistic pre-#16081 board fixture (tasks + task_events both legacy
shape) — origin/main reproduces ``no such column: session_id``; this
branch migrates cleanly and creates all four indexes.
---
 hermes_cli/kanban_db.py            | 30 +++++++++++++++----------
 tests/hermes_cli/test_kanban_db.py | 36 ++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py
index 83feab95a08..9cbc010ac64 100644
--- a/hermes_cli/kanban_db.py
+++ b/hermes_cli/kanban_db.py
@@ -939,7 +939,6 @@ CREATE INDEX IF NOT EXISTS idx_links_child           ON task_links(child_id);
 CREATE INDEX IF NOT EXISTS idx_links_parent          ON task_links(parent_id);
 CREATE INDEX IF NOT EXISTS idx_comments_task         ON task_comments(task_id, created_at);
 CREATE INDEX IF NOT EXISTS idx_events_task           ON task_events(task_id, created_at);
-CREATE INDEX IF NOT EXISTS idx_events_run            ON task_events(run_id, id);
 CREATE INDEX IF NOT EXISTS idx_runs_task             ON task_runs(task_id, started_at);
 CREATE INDEX IF NOT EXISTS idx_runs_status           ON task_runs(status);
 CREATE INDEX IF NOT EXISTS idx_notify_task           ON kanban_notify_subs(task_id);
@@ -1079,10 +1078,9 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None:
         _add_column_if_missing(
             conn, "tasks", "idempotency_key", "idempotency_key TEXT"
         )
-        conn.execute(
-            "CREATE INDEX IF NOT EXISTS idx_tasks_idempotency "
-            "ON tasks(idempotency_key)"
-        )
+    # ``idx_tasks_idempotency`` is created unconditionally below alongside
+    # the other additive-column indexes — see the block after the
+    # legacy-column migration. Creating it here too would be redundant.
 
     # Refresh after early additive migrations above. Some existing DBs were
     # partially migrated in older releases and can already contain the later
@@ -1170,9 +1168,13 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None:
             conn, "tasks", "session_id", "session_id TEXT"
         )
 
-    # Indexes over additive task columns must be created after the columns
-    # exist. Keeping them in SCHEMA_SQL breaks legacy boards because
-    # CREATE TABLE IF NOT EXISTS does not add new columns to existing tables.
+    # Indexes over additive ``tasks`` columns must be created after the
+    # columns exist. Keeping them in SCHEMA_SQL breaks legacy boards: SQLite
+    # parses each statement in ``executescript`` against the live schema, so a
+    # ``CREATE INDEX`` over a missing column aborts initialization before the
+    # additive ``ALTER TABLE`` migrations below can run. Re-running them here
+    # is cheap thanks to ``IF NOT EXISTS`` and stays correct on fresh DBs
+    # (where the columns already exist from SCHEMA_SQL).
     conn.execute("CREATE INDEX IF NOT EXISTS idx_tasks_tenant ON tasks(tenant)")
     conn.execute(
         "CREATE INDEX IF NOT EXISTS idx_tasks_idempotency ON tasks(idempotency_key)"
@@ -1186,10 +1188,14 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None:
     ev_cols = {row["name"] for row in conn.execute("PRAGMA table_info(task_events)")}
     if "run_id" not in ev_cols:
         _add_column_if_missing(conn, "task_events", "run_id", "run_id INTEGER")
-        conn.execute(
-            "CREATE INDEX IF NOT EXISTS idx_events_run "
-            "ON task_events(run_id, id)"
-        )
+
+    # Same ordering rule as the additive ``tasks`` indexes above: create the
+    # index after the additive column migration so legacy ``task_events``
+    # tables don't fail during SCHEMA_SQL execution before ``run_id`` exists.
+    conn.execute(
+        "CREATE INDEX IF NOT EXISTS idx_events_run "
+        "ON task_events(run_id, id)"
+    )
 
     notify_table_exists = conn.execute(
         "SELECT name FROM sqlite_master WHERE type='table' AND name='kanban_notify_subs'"
diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py
index 9d2b551d0b8..64ed630db1c 100644
--- a/tests/hermes_cli/test_kanban_db.py
+++ b/tests/hermes_cli/test_kanban_db.py
@@ -49,8 +49,22 @@ def test_init_creates_expected_tables(kanban_home):
 
 
 def test_connect_migrates_legacy_db_before_optional_column_indexes(tmp_path):
+    """Legacy DBs missing additive indexed columns must migrate cleanly.
+
+    SCHEMA_SQL runs in ``connect()`` before ``_migrate_add_optional_columns``.
+    Indexes over additive columns therefore must be created after the
+    migration adds those columns, or boards predating the column fail to
+    open before migration can run.
+
+    Covers all four indexes that sit on additive columns:
+    - ``tasks.session_id``       -> ``idx_tasks_session_id``    (#28447)
+    - ``tasks.tenant``           -> ``idx_tasks_tenant``        (#16081)
+    - ``tasks.idempotency_key``  -> ``idx_tasks_idempotency``   (#17805)
+    - ``task_events.run_id``     -> ``idx_events_run``          (#17805)
+    """
     db_path = tmp_path / "legacy-kanban.db"
     conn = sqlite3.connect(str(db_path))
+    # Pre-#16081 ``tasks`` shape: missing tenant, idempotency_key, session_id.
     conn.execute("""
         CREATE TABLE tasks (
             id TEXT PRIMARY KEY,
@@ -69,6 +83,18 @@ def test_connect_migrates_legacy_db_before_optional_column_indexes(tmp_path):
             claim_expires INTEGER
         )
     """)
+    # Pre-#17805 ``task_events`` shape: missing run_id. Required because
+    # ``_migrate_add_optional_columns`` unconditionally runs PRAGMA on
+    # ``task_events`` for run_id back-fill.
+    conn.execute("""
+        CREATE TABLE task_events (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            task_id TEXT NOT NULL,
+            kind TEXT NOT NULL,
+            payload TEXT,
+            created_at INTEGER NOT NULL
+        )
+    """)
     conn.execute(
         "INSERT INTO tasks (id, title, status, created_at) "
         "VALUES ('legacy', 'old board task', 'ready', 1)"
@@ -80,6 +106,10 @@ def test_connect_migrates_legacy_db_before_optional_column_indexes(tmp_path):
         task_columns = {
             row["name"] for row in migrated.execute("PRAGMA table_info(tasks)")
         }
+        event_columns = {
+            row["name"]
+            for row in migrated.execute("PRAGMA table_info(task_events)")
+        }
         indexes = {
             row["name"]
             for row in migrated.execute(
@@ -87,10 +117,16 @@ def test_connect_migrates_legacy_db_before_optional_column_indexes(tmp_path):
             )
         }
 
+    # Additive columns added by migration:
     assert "session_id" in task_columns
+    assert "tenant" in task_columns
+    assert "idempotency_key" in task_columns
+    assert "run_id" in event_columns
+    # And their indexes — the regression scope of this test:
     assert "idx_tasks_session_id" in indexes
     assert "idx_tasks_tenant" in indexes
     assert "idx_tasks_idempotency" in indexes
+    assert "idx_events_run" in indexes
 
 
 # ---------------------------------------------------------------------------