mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-15 04:12:25 +00:00
chore: follow-up cleanup for Kanban migration fix
- Expand migration comment to name the primary failure mode (missing column OperationalError from #20842) ahead of the secondary SQLite schema-reparse concern; also document the stale-cols-snapshot invariant - Add clarifying comments on from_row() legacy fallback branches noting they are belt-and-suspenders dead code post-migration - Add task_events comment in existing test explaining why the table is required by the migrator - Add test_legacy_migration_no_legacy_columns_at_all: Scenario A — explicitly asserts the exact #20842 crash no longer occurs and that consecutive_failures defaults to 0 on a DB that never had spawn_failures - Add test_legacy_migration_both_columns_already_present: Scenario D — asserts the migration is a no-op when both columns already exist, preserving the existing counter value
This commit is contained in:
parent
b1d420e75f
commit
a2ff193050
2 changed files with 146 additions and 5 deletions
|
|
@ -628,11 +628,16 @@ class Task:
|
||||||
idempotency_key=row["idempotency_key"] if "idempotency_key" in keys else None,
|
idempotency_key=row["idempotency_key"] if "idempotency_key" in keys else None,
|
||||||
consecutive_failures=(
|
consecutive_failures=(
|
||||||
row["consecutive_failures"] if "consecutive_failures" in keys
|
row["consecutive_failures"] if "consecutive_failures" in keys
|
||||||
|
# Pre-migration fallback: ``_migrate_add_optional_columns`` always
|
||||||
|
# adds ``consecutive_failures`` now, so this branch is only reachable
|
||||||
|
# on a DB that was never opened since pre-#20410 code ran. Keep for
|
||||||
|
# belt-and-suspenders safety; in practice it is dead code post-migration.
|
||||||
else (row["spawn_failures"] if "spawn_failures" in keys else 0)
|
else (row["spawn_failures"] if "spawn_failures" in keys else 0)
|
||||||
),
|
),
|
||||||
worker_pid=row["worker_pid"] if "worker_pid" in keys else None,
|
worker_pid=row["worker_pid"] if "worker_pid" in keys else None,
|
||||||
last_failure_error=(
|
last_failure_error=(
|
||||||
row["last_failure_error"] if "last_failure_error" in keys
|
row["last_failure_error"] if "last_failure_error" in keys
|
||||||
|
# Same belt-and-suspenders fallback as consecutive_failures above.
|
||||||
else (row["last_spawn_error"] if "last_spawn_error" in keys else None)
|
else (row["last_spawn_error"] if "last_spawn_error" in keys else None)
|
||||||
),
|
),
|
||||||
max_runtime_seconds=(
|
max_runtime_seconds=(
|
||||||
|
|
@ -954,11 +959,22 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None:
|
||||||
"ON tasks(idempotency_key)"
|
"ON tasks(idempotency_key)"
|
||||||
)
|
)
|
||||||
# Legacy column migration: ``spawn_failures`` → ``consecutive_failures``
|
# Legacy column migration: ``spawn_failures`` → ``consecutive_failures``
|
||||||
# and ``last_spawn_error`` → ``last_failure_error``. Avoid
|
# and ``last_spawn_error`` → ``last_failure_error``.
|
||||||
# ``ALTER TABLE ... RENAME COLUMN`` here: existing board DBs may have
|
#
|
||||||
# related schema objects from older Kanban builds, and SQLite reparses
|
# Avoid ``ALTER TABLE ... RENAME COLUMN`` for two reasons:
|
||||||
# the whole schema during a rename. Adding/copying is more tolerant and
|
# 1. Primary: very old DBs may never have had ``spawn_failures`` at
|
||||||
# still preserves the historical counter/error values.
|
# all, so RENAME raises OperationalError: no such column (the crash
|
||||||
|
# reported in issue #20842 after the #20410 update).
|
||||||
|
# 2. Secondary: SQLite reparses the whole schema on any RENAME, which
|
||||||
|
# fails if related objects (views, triggers) reference the old name.
|
||||||
|
#
|
||||||
|
# ADD-first-then-copy is tolerant of both shapes and preserves
|
||||||
|
# historical counter values when the legacy columns do exist.
|
||||||
|
#
|
||||||
|
# NOTE: ``cols`` reflects the schema at entry to this function and is
|
||||||
|
# not refreshed between ALTER TABLE calls. Every guard below checks
|
||||||
|
# the *original* snapshot; this is intentional and safe as long as
|
||||||
|
# no step depends on a column added by a previous step in the same call.
|
||||||
if "consecutive_failures" not in cols:
|
if "consecutive_failures" not in cols:
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"ALTER TABLE tasks ADD COLUMN consecutive_failures "
|
"ALTER TABLE tasks ADD COLUMN consecutive_failures "
|
||||||
|
|
|
||||||
|
|
@ -2687,6 +2687,9 @@ def test_legacy_spawn_failure_columns_are_copied_not_renamed(tmp_path):
|
||||||
created_at INTEGER NOT NULL
|
created_at INTEGER NOT NULL
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
# task_events is required: _migrate_add_optional_columns also runs a
|
||||||
|
# PRAGMA on it to back-fill the run_id column and raises
|
||||||
|
# OperationalError if the table is absent.
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"INSERT INTO tasks "
|
"INSERT INTO tasks "
|
||||||
"(id, title, body, assignee, status, priority, created_by, created_at, "
|
"(id, title, body, assignee, status, priority, created_by, created_at, "
|
||||||
|
|
@ -2720,6 +2723,128 @@ def test_legacy_spawn_failure_columns_are_copied_not_renamed(tmp_path):
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_legacy_migration_no_legacy_columns_at_all(tmp_path):
|
||||||
|
"""Scenario A: DB has neither spawn_failures nor consecutive_failures.
|
||||||
|
|
||||||
|
This is the exact crash scenario from issue #20842 — a very old DB that
|
||||||
|
predates the spawn_failures column entirely. The old RENAME COLUMN path
|
||||||
|
raised ``sqlite3.OperationalError: no such column: spawn_failures``.
|
||||||
|
The ADD-first approach adds consecutive_failures with default 0.
|
||||||
|
"""
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
db_path = tmp_path / "ancient.db"
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE tasks (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
created_at INTEGER NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
# task_events is required: _migrate_add_optional_columns also runs a
|
||||||
|
# PRAGMA on it to back-fill the run_id column and raises
|
||||||
|
# OperationalError if the table is absent.
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE task_events (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
task_id TEXT NOT NULL,
|
||||||
|
kind TEXT NOT NULL,
|
||||||
|
payload TEXT,
|
||||||
|
created_at INTEGER NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO tasks (id, title, status, created_at) "
|
||||||
|
"VALUES ('t1', 'ancient task', 'ready', 1)"
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
# Must not raise (this was the crash before this fix).
|
||||||
|
kb._migrate_add_optional_columns(conn)
|
||||||
|
|
||||||
|
cols = {r[1] for r in conn.execute("PRAGMA table_info(tasks)")}
|
||||||
|
assert "consecutive_failures" in cols, "migration must add consecutive_failures"
|
||||||
|
assert "last_failure_error" in cols, "migration must add last_failure_error"
|
||||||
|
assert "spawn_failures" not in cols, "no legacy column should be synthesised"
|
||||||
|
|
||||||
|
row = conn.execute("SELECT * FROM tasks WHERE id = 't1'").fetchone()
|
||||||
|
assert row["consecutive_failures"] == 0
|
||||||
|
assert row["last_failure_error"] is None
|
||||||
|
|
||||||
|
# Idempotent second run must not raise either.
|
||||||
|
kb._migrate_add_optional_columns(conn)
|
||||||
|
row_again = conn.execute("SELECT * FROM tasks WHERE id = 't1'").fetchone()
|
||||||
|
assert row_again["consecutive_failures"] == 0
|
||||||
|
assert row_again["last_failure_error"] is None
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_legacy_migration_both_columns_already_present(tmp_path):
|
||||||
|
"""Scenario D: DB already has both spawn_failures AND consecutive_failures.
|
||||||
|
|
||||||
|
Represents a partially-migrated DB (e.g. user recovered manually after the
|
||||||
|
#20842 crash). The migration must be a complete no-op and must not
|
||||||
|
zero-out the existing counter.
|
||||||
|
"""
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
db_path = tmp_path / "partial.db"
|
||||||
|
conn = sqlite3.connect(str(db_path))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE tasks (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
title TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
created_at INTEGER NOT NULL,
|
||||||
|
spawn_failures INTEGER NOT NULL DEFAULT 0,
|
||||||
|
consecutive_failures INTEGER NOT NULL DEFAULT 0,
|
||||||
|
last_spawn_error TEXT,
|
||||||
|
last_failure_error TEXT
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
# task_events required for the run_id back-fill PRAGMA inside the migrator.
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE task_events (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
task_id TEXT NOT NULL,
|
||||||
|
kind TEXT NOT NULL,
|
||||||
|
payload TEXT,
|
||||||
|
created_at INTEGER NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute(
|
||||||
|
"INSERT INTO tasks (id, title, status, created_at, spawn_failures, "
|
||||||
|
"consecutive_failures, last_spawn_error, last_failure_error) "
|
||||||
|
"VALUES ('t2', 'partial task', 'ready', 1, 2, 3, 'old error', 'new error')"
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
kb._migrate_add_optional_columns(conn)
|
||||||
|
|
||||||
|
row = conn.execute("SELECT * FROM tasks WHERE id = 't2'").fetchone()
|
||||||
|
# consecutive_failures must not be reset by the migration.
|
||||||
|
assert row["consecutive_failures"] == 3, "migration must not overwrite existing counter"
|
||||||
|
assert row["last_failure_error"] == "new error", "migration must not overwrite existing error"
|
||||||
|
# Legacy column is preserved harmlessly.
|
||||||
|
assert row["spawn_failures"] == 2
|
||||||
|
|
||||||
|
# Schema must be unchanged — no spurious ADD or DROP.
|
||||||
|
cols_after = {r[1] for r in conn.execute("PRAGMA table_info(tasks)")}
|
||||||
|
assert "consecutive_failures" in cols_after
|
||||||
|
assert "last_failure_error" in cols_after
|
||||||
|
assert "spawn_failures" in cols_after # legacy preserved
|
||||||
|
|
||||||
|
# Idempotent second run must not modify values or raise.
|
||||||
|
kb._migrate_add_optional_columns(conn)
|
||||||
|
row_again = conn.execute("SELECT * FROM tasks WHERE id = 't2'").fetchone()
|
||||||
|
assert row_again["consecutive_failures"] == 3
|
||||||
|
assert row_again["last_failure_error"] == "new error"
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Gateway-embedded dispatcher: config, CLI warnings, daemon deprecation stub
|
# Gateway-embedded dispatcher: config, CLI warnings, daemon deprecation stub
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue