fix(state): F_FULLFSYNC barrier at WAL checkpoints on macOS (#30636)

On Darwin, synchronous=FULL (the WAL default) only issues a plain
fsync(), which Apple documents does NOT guarantee writes reach stable
storage or stay ordered. SQLite's WAL corruption-safety guarantee
assumes the OS honors the fsync barrier; macOS does not unless the app
uses F_FULLFSYNC. During a launchd *system* shutdown the page cache is
dropped (effectively power-loss for in-flight pages), so a WAL
checkpoint whose fsync 'reported' durable may never hit the platter —
corrupting state.db with a malformed image. That is the trigger in
#30636 ('SIGTERM during launchd shutdown under high load').

Apply PRAGMA checkpoint_fullfsync=1 (macOS-guarded) in
apply_wal_with_fallback. It forces the F_FULLFSYNC barrier only at
checkpoint boundaries (where WAL frames land in the main DB), so cost
amortizes to ~+0.1ms/commit vs ~+4ms for the broader fullfsync=1.
No-op off Darwin (F_FULLFSYNC is macOS-only).

Root-cause analysis by @catapreta on #30636. Supersedes #30654, whose
synchronous=FULL is a no-op (already FULL in WAL mode) and whose
TRUNCATE-on-close is already on main.

Co-authored-by: catapreta <catapreta@users.noreply.github.com>
This commit is contained in:
teknium1 2026-06-28 00:36:55 -07:00 committed by Teknium
parent 9229d0db17
commit 52d774f0f9
2 changed files with 128 additions and 0 deletions

View file

@ -19,6 +19,7 @@ import logging
import random
import re
import sqlite3
import sys
import threading
import time
from pathlib import Path
@ -241,6 +242,41 @@ def _on_disk_journal_mode(conn: sqlite3.Connection) -> Optional[str]:
return str(mode).strip().lower() if mode is not None else None
def _apply_macos_checkpoint_barrier(conn: sqlite3.Connection) -> None:
"""Enable ``PRAGMA checkpoint_fullfsync`` on macOS (no-op elsewhere).
On Darwin, ``synchronous=FULL`` (the WAL default) issues a plain
``fsync()``, which Apple documents does *not* guarantee that data
has reached stable storage or that writes are not reordered see
the ``fsync(2)`` man page. SQLite's WAL corruption-safety guarantee
assumes the OS honors the fsync write barrier; macOS does not unless
the app uses ``F_FULLFSYNC``.
During a launchd *system* shutdown/reboot the OS page cache is
dropped (effectively a power-loss event for in-flight pages), so a
WAL checkpoint whose ``fsync()`` "reported" durable may never have
hit the platter corrupting ``state.db`` with a malformed image.
This is the trigger in issue #30636 ("SIGTERM during launchd
shutdown under high load"), distinct from a plain in-session kill
(which the page cache survives and SQLite recovers from).
``checkpoint_fullfsync=1`` forces an ``F_FULLFSYNC`` barrier only at
checkpoint boundaries where WAL frames land in the main DB so the
cost amortizes to roughly +0.1 ms/commit (vs ~+4 ms for the broader
``fullfsync=1`` that flushes on every commit's WAL sync). Guarded by
``sys.platform == "darwin"`` because ``F_FULLFSYNC`` is macOS-only;
on other platforms the PRAGMA is a no-op, so we skip it entirely.
Best-effort: never raises.
"""
if sys.platform != "darwin":
return
try:
conn.execute("PRAGMA checkpoint_fullfsync=1")
except sqlite3.OperationalError:
pass
def apply_wal_with_fallback(
conn: sqlite3.Connection,
*,
@ -271,12 +307,14 @@ def apply_wal_with_fallback(
try:
current_mode = conn.execute("PRAGMA journal_mode").fetchone()
if current_mode and current_mode[0] == "wal":
_apply_macos_checkpoint_barrier(conn)
return "wal"
except sqlite3.OperationalError:
pass
try:
conn.execute("PRAGMA journal_mode=WAL")
_apply_macos_checkpoint_barrier(conn)
return "wal"
except sqlite3.OperationalError as exc:
msg = str(exc).lower()

View file

@ -4191,6 +4191,96 @@ class TestApplyWalProbe:
"set-pragma must fire on a fresh (non-WAL) connection"
)
def test_macos_checkpoint_fullsync_barrier_applied(self, tmp_path, monkeypatch):
"""On Darwin, apply_wal_with_fallback sets checkpoint_fullfsync=1 (issue #30636)."""
import sqlite3
import hermes_state
from hermes_state import apply_wal_with_fallback
class _TracingConn(sqlite3.Connection):
def __init__(self, *a, **kw):
super().__init__(*a, **kw)
self.executed = []
def execute(self, sql, params=()):
self.executed.append(sql)
return super().execute(sql, params)
monkeypatch.setattr(hermes_state.sys, "platform", "darwin")
db_path = tmp_path / "macos_fresh.db"
conn = _TracingConn(str(db_path))
try:
result = apply_wal_with_fallback(conn)
finally:
conn.close()
assert result == "wal"
assert any("checkpoint_fullfsync=1" in sql for sql in conn.executed), (
"checkpoint_fullfsync barrier must be applied on macOS"
)
def test_macos_barrier_applied_when_already_wal(self, tmp_path, monkeypatch):
"""The Darwin barrier fires on the already-WAL early-return path too."""
import sqlite3
import hermes_state
from hermes_state import apply_wal_with_fallback
class _TracingConn(sqlite3.Connection):
def __init__(self, *a, **kw):
super().__init__(*a, **kw)
self.executed = []
def execute(self, sql, params=()):
self.executed.append(sql)
return super().execute(sql, params)
db_path = tmp_path / "macos_wal.db"
with sqlite3.connect(str(db_path)) as seed:
seed.execute("PRAGMA journal_mode=WAL")
monkeypatch.setattr(hermes_state.sys, "platform", "darwin")
conn = _TracingConn(str(db_path))
try:
result = apply_wal_with_fallback(conn)
finally:
conn.close()
assert result == "wal"
assert any("checkpoint_fullfsync=1" in sql for sql in conn.executed), (
"checkpoint_fullfsync barrier must fire on the already-WAL path"
)
def test_checkpoint_fullsync_barrier_skipped_off_darwin(self, tmp_path, monkeypatch):
"""Non-macOS platforms must NOT issue the macOS-only PRAGMA."""
import sqlite3
import hermes_state
from hermes_state import apply_wal_with_fallback
class _TracingConn(sqlite3.Connection):
def __init__(self, *a, **kw):
super().__init__(*a, **kw)
self.executed = []
def execute(self, sql, params=()):
self.executed.append(sql)
return super().execute(sql, params)
monkeypatch.setattr(hermes_state.sys, "platform", "linux")
db_path = tmp_path / "linux_fresh.db"
conn = _TracingConn(str(db_path))
try:
result = apply_wal_with_fallback(conn)
finally:
conn.close()
assert result == "wal"
assert not any("checkpoint_fullfsync" in sql for sql in conn.executed), (
"checkpoint_fullfsync must not be issued off macOS"
)
def test_apply_wal_concurrent_connects_no_eio(self, tmp_path):
"""20 threads calling connect() on the same DB must not see disk I/O error."""
import sys