From b1d420e75f42560738ed69d230a62feb1f7c7594 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Wed, 6 May 2026 11:29:10 -0600 Subject: [PATCH] fix(kanban): avoid fragile failure-column renames --- hermes_cli/kanban_db.py | 28 ++++---- .../test_kanban_core_functionality.py | 72 +++++++++++++++++++ 2 files changed, 85 insertions(+), 15 deletions(-) diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 8440113c25..8c1d6243d6 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -953,31 +953,29 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None: "CREATE INDEX IF NOT EXISTS idx_tasks_idempotency " "ON tasks(idempotency_key)" ) - # Legacy column rename: ``spawn_failures`` → ``consecutive_failures`` - # and ``last_spawn_error`` → ``last_failure_error``. The counter was - # originally spawn-only; it's now unified across spawn/timeout/ - # crash outcomes. Rename when only the legacy columns exist to - # preserve historical counter values across upgrades. Add fresh - # otherwise. + # Legacy column migration: ``spawn_failures`` → ``consecutive_failures`` + # and ``last_spawn_error`` → ``last_failure_error``. Avoid + # ``ALTER TABLE ... RENAME COLUMN`` here: existing board DBs may have + # related schema objects from older Kanban builds, and SQLite reparses + # the whole schema during a rename. Adding/copying is more tolerant and + # still preserves the historical counter/error values. if "consecutive_failures" not in cols: + conn.execute( + "ALTER TABLE tasks ADD COLUMN consecutive_failures " + "INTEGER NOT NULL DEFAULT 0" + ) if "spawn_failures" in cols: conn.execute( - "ALTER TABLE tasks RENAME COLUMN spawn_failures TO consecutive_failures" - ) - else: - conn.execute( - "ALTER TABLE tasks ADD COLUMN consecutive_failures " - "INTEGER NOT NULL DEFAULT 0" + "UPDATE tasks SET consecutive_failures = COALESCE(spawn_failures, 0)" ) if "worker_pid" not in cols: conn.execute("ALTER TABLE tasks ADD COLUMN worker_pid INTEGER") if "last_failure_error" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN last_failure_error TEXT") if "last_spawn_error" in cols: conn.execute( - "ALTER TABLE tasks RENAME COLUMN last_spawn_error TO last_failure_error" + "UPDATE tasks SET last_failure_error = last_spawn_error" ) - else: - conn.execute("ALTER TABLE tasks ADD COLUMN last_failure_error TEXT") if "max_runtime_seconds" not in cols: conn.execute("ALTER TABLE tasks ADD COLUMN max_runtime_seconds INTEGER") if "last_heartbeat_at" not in cols: diff --git a/tests/hermes_cli/test_kanban_core_functionality.py b/tests/hermes_cli/test_kanban_core_functionality.py index 95dfdae82d..6a04ca2a92 100644 --- a/tests/hermes_cli/test_kanban_core_functionality.py +++ b/tests/hermes_cli/test_kanban_core_functionality.py @@ -2648,6 +2648,78 @@ def test_legacy_db_without_skills_column_migrates(tmp_path): conn.close() +def test_legacy_spawn_failure_columns_are_copied_not_renamed(tmp_path): + """Legacy failure counters survive migration without fragile column renames.""" + import sqlite3 + db_path = tmp_path / "legacy-failures.db" + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + conn.execute(""" + CREATE TABLE tasks ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + body TEXT, + assignee TEXT, + status TEXT NOT NULL, + priority INTEGER DEFAULT 0, + created_by TEXT, + created_at INTEGER NOT NULL, + started_at INTEGER, + completed_at INTEGER, + workspace_kind TEXT NOT NULL DEFAULT 'scratch', + workspace_path TEXT, + claim_lock TEXT, + claim_expires INTEGER, + tenant TEXT, + result TEXT, + idempotency_key TEXT, + spawn_failures INTEGER NOT NULL DEFAULT 0, + worker_pid INTEGER, + last_spawn_error TEXT + ) + """) + conn.execute(""" + CREATE TABLE task_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + kind TEXT NOT NULL, + payload TEXT, + created_at INTEGER NOT NULL + ) + """) + conn.execute( + "INSERT INTO tasks " + "(id, title, body, assignee, status, priority, created_by, created_at, " + "started_at, completed_at, workspace_kind, workspace_path, claim_lock, " + "claim_expires, tenant, result, idempotency_key, spawn_failures, " + "worker_pid, last_spawn_error) " + "VALUES ('legacy', 'old task', NULL, 'default', 'ready', 0, NULL, 1, " + "NULL, NULL, 'scratch', NULL, NULL, NULL, NULL, NULL, NULL, 4, NULL, " + "'missing profile')" + ) + conn.commit() + + kb._migrate_add_optional_columns(conn) + cols = {r[1] for r in conn.execute("PRAGMA table_info(tasks)")} + assert "spawn_failures" in cols + assert "consecutive_failures" in cols + assert "last_spawn_error" in cols + assert "last_failure_error" in cols + + row = conn.execute("SELECT * FROM tasks WHERE id = 'legacy'").fetchone() + assert row["consecutive_failures"] == 4 + assert row["last_failure_error"] == "missing profile" + task = kb.Task.from_row(row) + assert task.consecutive_failures == 4 + assert task.last_failure_error == "missing profile" + + kb._migrate_add_optional_columns(conn) + row_again = conn.execute("SELECT * FROM tasks WHERE id = 'legacy'").fetchone() + assert row_again["consecutive_failures"] == 4 + assert row_again["last_failure_error"] == "missing profile" + conn.close() + + # --------------------------------------------------------------------------- # Gateway-embedded dispatcher: config, CLI warnings, daemon deprecation stub