fix(memory/holographic): close DB connection on shutdown instead of leaking to GC (#54133)

HolographicMemoryProvider.shutdown() dropped its MemoryStore reference
without calling the existing MemoryStore.close(). Since the connection is
opened check_same_thread=False (one per session), its fd was released by
refcount/GC at a non-deterministic time on a non-deterministic thread,
churning a DB fd through the kernel free pool on every session teardown.
Call close() so the fd is released deterministically.

Reported by @alfranli123 (#44037), who pinpointed the exact code location.
Note: the report's TLS-fd-recycle corruption attribution could not be
reproduced from the code — dropping a sqlite connection flushes valid
SQLite pages via the VFS, never TLS framing, and the provider is at most a
releaser of DB fds, not a TLS-flushing socket owner. This change is correct
resource hygiene that removes per-session fd churn regardless.
This commit is contained in:
Teknium 2026-06-28 02:41:52 -07:00 committed by GitHub
parent 00d8c2c915
commit 7c0a5def58
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 68 additions and 0 deletions

View file

@ -251,6 +251,17 @@ class HolographicMemoryProvider(MemoryProvider):
logger.debug("Holographic memory_write mirror failed: %s", e)
def shutdown(self) -> None:
# Close the SQLite connection deterministically instead of leaking it
# to GC. MemoryStore opens its connection with check_same_thread=False
# (store.py), so without an explicit close() the sqlite3.Connection's
# fd is released by refcount/GC at a non-deterministic time on a
# non-deterministic thread, churning a DB fd through the kernel's free
# pool on every session teardown. close() already exists and is cheap.
if self._store is not None:
try:
self._store.close()
except Exception as e:
logger.debug("Holographic shutdown close() failed: %s", e)
self._store = None
self._retriever = None

View file

@ -0,0 +1,57 @@
"""Regression test for #44037 — holographic provider leaked its SQLite
connection to GC on shutdown instead of closing it.
The corruption-mechanism framing in #44037 (TLS bytes written into the DB via
an fd-recycle race) was not reproducible from the code: dropping a sqlite
connection flushes valid pages through SQLite's own VFS, never TLS framing, and
the provider is at most a *releaser* of DB fds, not the TLS-flushing owner.
But the underlying resource-hygiene bug is real and is what this test pins:
``HolographicMemoryProvider.shutdown()`` must call ``MemoryStore.close()`` so
the ``check_same_thread=False`` connection's fd is released deterministically
on shutdown, rather than at a non-deterministic GC time on an arbitrary thread.
"""
import sqlite3
import pytest
from plugins.memory.holographic import HolographicMemoryProvider
def _make_provider(tmp_path):
db_path = str(tmp_path / "memory_store.db")
provider = HolographicMemoryProvider(config={"db_path": db_path, "hrr_dim": 64})
provider.initialize(session_id="test-session")
return provider
def test_shutdown_closes_store_connection(tmp_path):
provider = _make_provider(tmp_path)
store = provider._store
assert store is not None
conn = store._conn
# Connection is live before shutdown.
conn.execute("SELECT 1").fetchone()
provider.shutdown()
# References are dropped...
assert provider._store is None
assert provider._retriever is None
# ...AND the underlying connection was actually closed (not left to GC).
with pytest.raises(sqlite3.ProgrammingError):
conn.execute("SELECT 1")
def test_shutdown_is_idempotent_and_safe_without_store(tmp_path):
provider = _make_provider(tmp_path)
provider.shutdown()
# Second shutdown (store already None) must not raise.
provider.shutdown()
# A provider that was never initialized must also shut down cleanly.
bare = HolographicMemoryProvider(config={"db_path": str(tmp_path / "x.db")})
bare.shutdown()