fix(kanban): refuse corrupt db auto-init

This commit is contained in:
Nick 2026-05-22 22:22:27 -04:00 committed by Teknium
parent e97a4c8f37
commit 39fe4ecee3
2 changed files with 200 additions and 0 deletions

View file

@ -75,6 +75,7 @@ import json
import os
import re
import secrets
import shutil
import sqlite3
import subprocess
import sys
@ -82,6 +83,7 @@ import threading
import logging
import time
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Iterable, Optional
@ -1005,6 +1007,97 @@ def _validate_sqlite_header(path: Path) -> None:
)
class KanbanDbCorruptError(RuntimeError):
"""Raised when an existing kanban DB file fails integrity checks.
Fail-closed guard against silent recreation of a corrupt board file,
which would otherwise destroy the user's tasks. Carries both the
original path and the timestamped backup we made before refusing.
"""
def __init__(self, db_path: Path, backup_path: Optional[Path], reason: str):
self.db_path = db_path
self.backup_path = backup_path
self.reason = reason
backup_str = str(backup_path) if backup_path is not None else "<backup failed>"
super().__init__(
f"Refusing to open corrupt kanban DB at {db_path}: {reason}. "
f"Original preserved; backup at {backup_str}."
)
def _backup_corrupt_db(path: Path) -> Optional[Path]:
"""Copy a corrupt DB (and its WAL/SHM sidecars) to a timestamped backup.
Returns the backup path of the main DB file, or ``None`` if the copy
itself failed (the caller still raises loudly in that case).
"""
stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
candidate = path.with_name(f"{path.name}.corrupt.{stamp}.bak")
counter = 0
while candidate.exists():
counter += 1
candidate = path.with_name(
f"{path.name}.corrupt.{stamp}.{counter}.bak"
)
try:
shutil.copy2(path, candidate)
except OSError:
return None
for suffix in ("-wal", "-shm"):
sidecar = path.with_name(path.name + suffix)
if sidecar.exists():
try:
shutil.copy2(sidecar, candidate.with_name(candidate.name + suffix))
except OSError:
pass
return candidate
def _guard_existing_db_is_healthy(path: Path) -> None:
"""Run ``PRAGMA integrity_check`` on an existing non-empty DB file.
Opens the probe in read/write mode so SQLite can recover or
checkpoint a healthy WAL/hot-journal DB before we declare it
corrupt. If the file is malformed, copy it (and any WAL/SHM
sidecars) to a timestamped backup and raise
:class:`KanbanDbCorruptError` so callers cannot silently recreate
the schema on top of a damaged DB.
Transient lock/busy errors (``sqlite3.OperationalError``) are NOT
treated as corruption; they propagate raw so the caller sees a
normal lock failure and no spurious ``.corrupt`` backup is made.
No-op for missing files, zero-byte files (treated as fresh), and
paths already proven healthy this process (cache hit).
"""
try:
if not path.exists() or path.stat().st_size == 0:
return
except OSError:
return
if str(path.resolve()) in _INITIALIZED_PATHS:
return
reason: Optional[str] = None
try:
probe = sqlite3.connect(str(path), timeout=5, isolation_level=None)
try:
row = probe.execute("PRAGMA integrity_check").fetchone()
finally:
probe.close()
if not row or (row[0] or "").lower() != "ok":
reason = f"integrity_check returned {row[0] if row else '<no row>'!r}"
except sqlite3.OperationalError:
# Lock contention, busy, transient IO — not corruption. Let it propagate.
raise
except sqlite3.DatabaseError as exc:
reason = f"sqlite refused to open file: {exc}"
if reason is None:
return
backup = _backup_corrupt_db(path)
raise KanbanDbCorruptError(path, backup, reason)
def connect(
db_path: Optional[Path] = None,
*,
@ -1033,7 +1126,13 @@ def connect(
else:
path = kanban_db_path(board=board)
path.parent.mkdir(parents=True, exist_ok=True)
# Cheap byte-level check first — catches the #29507 TLS-overwrite shape
# and other invalid-header cases without opening a sqlite connection.
_validate_sqlite_header(path)
# Full integrity probe — catches corruption past the header (malformed
# pages, broken internal metadata). Cached per-path after first success
# via _INITIALIZED_PATHS so it only runs once per process per path.
_guard_existing_db_is_healthy(path)
resolved = str(path.resolve())
conn = sqlite3.connect(str(path), isolation_level=None, timeout=30)
try: