mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-31 06:51:29 +00:00
fix(kanban): preserve original exception when write_txn rollback fails
When code inside a write_txn block raises an OperationalError that SQLite has already auto-rolled-back (typical for disk I/O error, database is locked, and database disk image is malformed), the explicit ROLLBACK in write_txn.__exit__ itself raises cannot rollback - no transaction is active and the secondary exception replaces the original in the traceback. Operators see a misleading error and lose the diagnostic information they need. Swallow the rollback-time OperationalError so the caller always sees the original cause. Confirmed reproducer: tests/hermes_cli/test_kanban_db.py:: test_write_txn_preserves_original_exception_when_rollback_fails
This commit is contained in:
parent
5c49cd0ed0
commit
e83252dc46
2 changed files with 68 additions and 1 deletions
|
|
@ -1481,12 +1481,22 @@ def write_txn(conn: sqlite3.Connection):
|
|||
Use for any multi-statement write (creating a task + link, claiming a
|
||||
task + recording an event, etc.). A claim CAS inside this context is
|
||||
atomic -- at most one concurrent writer can succeed.
|
||||
|
||||
The explicit ROLLBACK on exception is wrapped in try/except so that
|
||||
a SQLite auto-rollback (which leaves no active transaction) does not
|
||||
shadow the original exception with a spurious rollback error.
|
||||
"""
|
||||
conn.execute("BEGIN IMMEDIATE")
|
||||
try:
|
||||
yield conn
|
||||
except Exception:
|
||||
conn.execute("ROLLBACK")
|
||||
try:
|
||||
conn.execute("ROLLBACK")
|
||||
except sqlite3.OperationalError:
|
||||
# SQLite has already auto-rolled-back the transaction (typical
|
||||
# under EIO, lock contention, or corruption). Nothing to undo;
|
||||
# do not let this secondary failure shadow the real one.
|
||||
pass
|
||||
raise
|
||||
else:
|
||||
conn.execute("COMMIT")
|
||||
|
|
|
|||
|
|
@ -3417,3 +3417,60 @@ def test_pragmas_not_accidentally_disabled_by_migrate_path(tmp_path):
|
|||
assert conn.execute("PRAGMA cell_size_check").fetchone()[0] == 1
|
||||
assert conn.execute("PRAGMA synchronous").fetchone()[0] == 2
|
||||
|
||||
# write_txn — rollback handler must not mask the original exception
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_write_txn_preserves_original_exception_when_rollback_fails(kanban_home):
|
||||
"""When a write inside write_txn raises an OperationalError that SQLite
|
||||
has already auto-rolled-back (e.g. ``disk I/O error``,
|
||||
``database is locked``, ``database disk image is malformed``), the
|
||||
explicit ROLLBACK in ``write_txn.__exit__`` itself raises
|
||||
``cannot rollback - no transaction is active``. The original cause
|
||||
must NOT be masked by the secondary rollback failure — operators rely
|
||||
on the original cause to diagnose the underlying issue.
|
||||
"""
|
||||
|
||||
class FailingConnWrapper:
|
||||
"""Delegate to a real connection, simulating an EIO during an INSERT
|
||||
that SQLite has already auto-rolled-back."""
|
||||
|
||||
def __init__(self, real):
|
||||
self._real = real
|
||||
self._fail_armed = True
|
||||
|
||||
def execute(self, sql, *args, **kwargs):
|
||||
if (
|
||||
self._fail_armed
|
||||
and sql.lstrip().upper().startswith("INSERT")
|
||||
and "task_events" in sql.lower()
|
||||
):
|
||||
self._fail_armed = False # one-shot
|
||||
# Simulate SQLite auto-rolling back the transaction by
|
||||
# issuing a real ROLLBACK now. After this, BEGIN IMMEDIATE
|
||||
# is no longer active and an explicit ROLLBACK would error.
|
||||
try:
|
||||
self._real.execute("ROLLBACK")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
raise sqlite3.OperationalError("disk I/O error")
|
||||
return self._real.execute(sql, *args, **kwargs)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._real, name)
|
||||
|
||||
with kb.connect() as conn:
|
||||
wrapper = FailingConnWrapper(conn)
|
||||
with pytest.raises(sqlite3.OperationalError) as excinfo:
|
||||
with kb.write_txn(wrapper):
|
||||
kb._append_event(wrapper, "t_bogus", "promoted", None)
|
||||
|
||||
msg = str(excinfo.value)
|
||||
assert "disk I/O error" in msg, (
|
||||
f"write_txn masked the original exception with rollback failure; "
|
||||
f"got {msg!r} (expected to contain 'disk I/O error')"
|
||||
)
|
||||
assert "cannot rollback" not in msg, (
|
||||
f"write_txn surfaced the rollback failure instead of the original "
|
||||
f"OperationalError; got {msg!r}"
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue