diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 9ad61f87817..227a943ea65 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -1481,12 +1481,22 @@ def write_txn(conn: sqlite3.Connection): Use for any multi-statement write (creating a task + link, claiming a task + recording an event, etc.). A claim CAS inside this context is atomic -- at most one concurrent writer can succeed. + + The explicit ROLLBACK on exception is wrapped in try/except so that + a SQLite auto-rollback (which leaves no active transaction) does not + shadow the original exception with a spurious rollback error. """ conn.execute("BEGIN IMMEDIATE") try: yield conn except Exception: - conn.execute("ROLLBACK") + try: + conn.execute("ROLLBACK") + except sqlite3.OperationalError: + # SQLite has already auto-rolled-back the transaction (typical + # under EIO, lock contention, or corruption). Nothing to undo; + # do not let this secondary failure shadow the real one. + pass raise else: conn.execute("COMMIT") diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py index 07e3b957e5f..a34c659fce8 100644 --- a/tests/hermes_cli/test_kanban_db.py +++ b/tests/hermes_cli/test_kanban_db.py @@ -3417,3 +3417,60 @@ def test_pragmas_not_accidentally_disabled_by_migrate_path(tmp_path): assert conn.execute("PRAGMA cell_size_check").fetchone()[0] == 1 assert conn.execute("PRAGMA synchronous").fetchone()[0] == 2 +# write_txn — rollback handler must not mask the original exception +# --------------------------------------------------------------------------- + + +def test_write_txn_preserves_original_exception_when_rollback_fails(kanban_home): + """When a write inside write_txn raises an OperationalError that SQLite + has already auto-rolled-back (e.g. ``disk I/O error``, + ``database is locked``, ``database disk image is malformed``), the + explicit ROLLBACK in ``write_txn.__exit__`` itself raises + ``cannot rollback - no transaction is active``. The original cause + must NOT be masked by the secondary rollback failure — operators rely + on the original cause to diagnose the underlying issue. + """ + + class FailingConnWrapper: + """Delegate to a real connection, simulating an EIO during an INSERT + that SQLite has already auto-rolled-back.""" + + def __init__(self, real): + self._real = real + self._fail_armed = True + + def execute(self, sql, *args, **kwargs): + if ( + self._fail_armed + and sql.lstrip().upper().startswith("INSERT") + and "task_events" in sql.lower() + ): + self._fail_armed = False # one-shot + # Simulate SQLite auto-rolling back the transaction by + # issuing a real ROLLBACK now. After this, BEGIN IMMEDIATE + # is no longer active and an explicit ROLLBACK would error. + try: + self._real.execute("ROLLBACK") + except sqlite3.OperationalError: + pass + raise sqlite3.OperationalError("disk I/O error") + return self._real.execute(sql, *args, **kwargs) + + def __getattr__(self, name): + return getattr(self._real, name) + + with kb.connect() as conn: + wrapper = FailingConnWrapper(conn) + with pytest.raises(sqlite3.OperationalError) as excinfo: + with kb.write_txn(wrapper): + kb._append_event(wrapper, "t_bogus", "promoted", None) + + msg = str(excinfo.value) + assert "disk I/O error" in msg, ( + f"write_txn masked the original exception with rollback failure; " + f"got {msg!r} (expected to contain 'disk I/O error')" + ) + assert "cannot rollback" not in msg, ( + f"write_txn surfaced the rollback failure instead of the original " + f"OperationalError; got {msg!r}" + )