From a2ff193050b8054b52f3bffd4139333a60058be7 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Wed, 6 May 2026 23:35:12 +0530 Subject: [PATCH] chore: follow-up cleanup for Kanban migration fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Expand migration comment to name the primary failure mode (missing column OperationalError from #20842) ahead of the secondary SQLite schema-reparse concern; also document the stale-cols-snapshot invariant - Add clarifying comments on from_row() legacy fallback branches noting they are belt-and-suspenders dead code post-migration - Add task_events comment in existing test explaining why the table is required by the migrator - Add test_legacy_migration_no_legacy_columns_at_all: Scenario A — explicitly asserts the exact #20842 crash no longer occurs and that consecutive_failures defaults to 0 on a DB that never had spawn_failures - Add test_legacy_migration_both_columns_already_present: Scenario D — asserts the migration is a no-op when both columns already exist, preserving the existing counter value --- hermes_cli/kanban_db.py | 26 +++- .../test_kanban_core_functionality.py | 125 ++++++++++++++++++ 2 files changed, 146 insertions(+), 5 deletions(-) diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 8c1d6243d6..2d2f1b2ecf 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -628,11 +628,16 @@ class Task: idempotency_key=row["idempotency_key"] if "idempotency_key" in keys else None, consecutive_failures=( row["consecutive_failures"] if "consecutive_failures" in keys + # Pre-migration fallback: ``_migrate_add_optional_columns`` always + # adds ``consecutive_failures`` now, so this branch is only reachable + # on a DB that was never opened since pre-#20410 code ran. Keep for + # belt-and-suspenders safety; in practice it is dead code post-migration. else (row["spawn_failures"] if "spawn_failures" in keys else 0) ), worker_pid=row["worker_pid"] if "worker_pid" in keys else None, last_failure_error=( row["last_failure_error"] if "last_failure_error" in keys + # Same belt-and-suspenders fallback as consecutive_failures above. else (row["last_spawn_error"] if "last_spawn_error" in keys else None) ), max_runtime_seconds=( @@ -954,11 +959,22 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None: "ON tasks(idempotency_key)" ) # Legacy column migration: ``spawn_failures`` → ``consecutive_failures`` - # and ``last_spawn_error`` → ``last_failure_error``. Avoid - # ``ALTER TABLE ... RENAME COLUMN`` here: existing board DBs may have - # related schema objects from older Kanban builds, and SQLite reparses - # the whole schema during a rename. Adding/copying is more tolerant and - # still preserves the historical counter/error values. + # and ``last_spawn_error`` → ``last_failure_error``. + # + # Avoid ``ALTER TABLE ... RENAME COLUMN`` for two reasons: + # 1. Primary: very old DBs may never have had ``spawn_failures`` at + # all, so RENAME raises OperationalError: no such column (the crash + # reported in issue #20842 after the #20410 update). + # 2. Secondary: SQLite reparses the whole schema on any RENAME, which + # fails if related objects (views, triggers) reference the old name. + # + # ADD-first-then-copy is tolerant of both shapes and preserves + # historical counter values when the legacy columns do exist. + # + # NOTE: ``cols`` reflects the schema at entry to this function and is + # not refreshed between ALTER TABLE calls. Every guard below checks + # the *original* snapshot; this is intentional and safe as long as + # no step depends on a column added by a previous step in the same call. if "consecutive_failures" not in cols: conn.execute( "ALTER TABLE tasks ADD COLUMN consecutive_failures " diff --git a/tests/hermes_cli/test_kanban_core_functionality.py b/tests/hermes_cli/test_kanban_core_functionality.py index 6a04ca2a92..1e286d7ce6 100644 --- a/tests/hermes_cli/test_kanban_core_functionality.py +++ b/tests/hermes_cli/test_kanban_core_functionality.py @@ -2687,6 +2687,9 @@ def test_legacy_spawn_failure_columns_are_copied_not_renamed(tmp_path): created_at INTEGER NOT NULL ) """) + # task_events is required: _migrate_add_optional_columns also runs a + # PRAGMA on it to back-fill the run_id column and raises + # OperationalError if the table is absent. conn.execute( "INSERT INTO tasks " "(id, title, body, assignee, status, priority, created_by, created_at, " @@ -2720,6 +2723,128 @@ def test_legacy_spawn_failure_columns_are_copied_not_renamed(tmp_path): conn.close() +def test_legacy_migration_no_legacy_columns_at_all(tmp_path): + """Scenario A: DB has neither spawn_failures nor consecutive_failures. + + This is the exact crash scenario from issue #20842 — a very old DB that + predates the spawn_failures column entirely. The old RENAME COLUMN path + raised ``sqlite3.OperationalError: no such column: spawn_failures``. + The ADD-first approach adds consecutive_failures with default 0. + """ + import sqlite3 + + db_path = tmp_path / "ancient.db" + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + conn.execute(""" + CREATE TABLE tasks ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + status TEXT NOT NULL, + created_at INTEGER NOT NULL + ) + """) + # task_events is required: _migrate_add_optional_columns also runs a + # PRAGMA on it to back-fill the run_id column and raises + # OperationalError if the table is absent. + conn.execute(""" + CREATE TABLE task_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + kind TEXT NOT NULL, + payload TEXT, + created_at INTEGER NOT NULL + ) + """) + conn.execute( + "INSERT INTO tasks (id, title, status, created_at) " + "VALUES ('t1', 'ancient task', 'ready', 1)" + ) + conn.commit() + + # Must not raise (this was the crash before this fix). + kb._migrate_add_optional_columns(conn) + + cols = {r[1] for r in conn.execute("PRAGMA table_info(tasks)")} + assert "consecutive_failures" in cols, "migration must add consecutive_failures" + assert "last_failure_error" in cols, "migration must add last_failure_error" + assert "spawn_failures" not in cols, "no legacy column should be synthesised" + + row = conn.execute("SELECT * FROM tasks WHERE id = 't1'").fetchone() + assert row["consecutive_failures"] == 0 + assert row["last_failure_error"] is None + + # Idempotent second run must not raise either. + kb._migrate_add_optional_columns(conn) + row_again = conn.execute("SELECT * FROM tasks WHERE id = 't1'").fetchone() + assert row_again["consecutive_failures"] == 0 + assert row_again["last_failure_error"] is None + conn.close() + + +def test_legacy_migration_both_columns_already_present(tmp_path): + """Scenario D: DB already has both spawn_failures AND consecutive_failures. + + Represents a partially-migrated DB (e.g. user recovered manually after the + #20842 crash). The migration must be a complete no-op and must not + zero-out the existing counter. + """ + import sqlite3 + + db_path = tmp_path / "partial.db" + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + conn.execute(""" + CREATE TABLE tasks ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + status TEXT NOT NULL, + created_at INTEGER NOT NULL, + spawn_failures INTEGER NOT NULL DEFAULT 0, + consecutive_failures INTEGER NOT NULL DEFAULT 0, + last_spawn_error TEXT, + last_failure_error TEXT + ) + """) + # task_events required for the run_id back-fill PRAGMA inside the migrator. + conn.execute(""" + CREATE TABLE task_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + kind TEXT NOT NULL, + payload TEXT, + created_at INTEGER NOT NULL + ) + """) + conn.execute( + "INSERT INTO tasks (id, title, status, created_at, spawn_failures, " + "consecutive_failures, last_spawn_error, last_failure_error) " + "VALUES ('t2', 'partial task', 'ready', 1, 2, 3, 'old error', 'new error')" + ) + conn.commit() + + kb._migrate_add_optional_columns(conn) + + row = conn.execute("SELECT * FROM tasks WHERE id = 't2'").fetchone() + # consecutive_failures must not be reset by the migration. + assert row["consecutive_failures"] == 3, "migration must not overwrite existing counter" + assert row["last_failure_error"] == "new error", "migration must not overwrite existing error" + # Legacy column is preserved harmlessly. + assert row["spawn_failures"] == 2 + + # Schema must be unchanged — no spurious ADD or DROP. + cols_after = {r[1] for r in conn.execute("PRAGMA table_info(tasks)")} + assert "consecutive_failures" in cols_after + assert "last_failure_error" in cols_after + assert "spawn_failures" in cols_after # legacy preserved + + # Idempotent second run must not modify values or raise. + kb._migrate_add_optional_columns(conn) + row_again = conn.execute("SELECT * FROM tasks WHERE id = 't2'").fetchone() + assert row_again["consecutive_failures"] == 3 + assert row_again["last_failure_error"] == "new error" + conn.close() + # --------------------------------------------------------------------------- # Gateway-embedded dispatcher: config, CLI warnings, daemon deprecation stub