From a0be88992c94eb22a72693fc24855c418ca75404 Mon Sep 17 00:00:00 2001 From: Clifford Garwood Date: Fri, 24 Apr 2026 02:57:44 -0400 Subject: [PATCH 1/2] fix(matrix): bind PgCryptoStore device_id so fresh E2EE installs work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PgCryptoStore.__init__ defaults _device_id to "" and put_account writes that blank value into crypto_account. The UPSERT's ON CONFLICT DO UPDATE clause deliberately does not touch device_id, so once the row is written blank it stays blank forever — breaking every downstream device-scoped olm operation. Peers' to-device olm ciphertext can't match our identity key, no megolm sessions ever land, and the user sees "hermes is in the room but never responds to encrypted messages". Fix: call put_device_id(client.device_id) immediately after crypto_store.open() and before olm.load(). This sets the store's in-memory _device_id so the first put_account INSERT writes the correct value from the start. Observable symptoms without the fix, on a fresh crypto.db: - crypto_account.device_id = "" - crypto_tracked_user: 0 rows - crypto_device: 0 rows - crypto_olm_session: 0 rows - crypto_megolm_inbound_session: 0 rows - "No one-time keys nor device keys got when trying to share keys" warning on every startup - "olm event doesn't contain ciphertext for this device" DecryptionError on any inbound to-device event - Encrypted room messages arrive but never decrypt After the fix (wiped crypto.db + restart): - device_id populated with actual runtime device (e.g. CZIKTRFLOV) - all counts populate from sync as expected - encrypted DMs flow normally Who hits this: anyone with a fresh crypto.db — includes first-time matrix E2EE setup, nio→mautrix migrations (since matrix.py removes the legacy pickle on startup, creating a fresh SQLite store), and anyone who wipes crypto.db to start over. Existing installs that somehow already have a non-blank device_id would be unaffected, but no prior code path writes it correctly, so that set is likely empty. --- gateway/platforms/matrix.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py index a5f9352b5..7033e6171 100644 --- a/gateway/platforms/matrix.py +++ b/gateway/platforms/matrix.py @@ -532,6 +532,20 @@ class MatrixAdapter(BasePlatformAdapter): ) await crypto_store.open() + # Bind the store to the runtime device_id before any + # put_account() runs. PgCryptoStore defaults _device_id + # to "" and its crypto_account UPSERT never updates the + # device_id column on conflict — so once put_account + # writes blank, it stays blank forever. That breaks + # every downstream device-scoped olm operation: peer + # to-device ciphertext can't find our identity key and + # no megolm sessions ever land. Setting _device_id here + # (in-memory; the on-disk row may not exist yet) makes + # the first put_account write the correct value. + if client.device_id: + from mautrix.types import DeviceID as _DeviceID + await crypto_store.put_device_id(_DeviceID(client.device_id)) + crypto_state = _CryptoStateStore(state_store, self._joined_rooms) olm = OlmMachine(client, crypto_store, crypto_state) From 27427c47df307535a9efb9e5249b02c0c545f59e Mon Sep 17 00:00:00 2001 From: Clifford Garwood Date: Fri, 24 Apr 2026 03:16:25 -0400 Subject: [PATCH 2/2] fix(matrix): drop needless DeviceID import + mock put_device_id in tests Two adjustments to make CI pass: - In gateway/platforms/matrix.py: `DeviceID` is `NewType("DeviceID", str)`, so passing `client.device_id` directly (already a str) works identically at runtime. The explicit import was cosmetic and tripped CI environments where `mautrix.types` doesn't re-export DeviceID at the expected path ("cannot import name 'DeviceID' from 'mautrix.types' (unknown location)"). - In tests/gateway/test_matrix.py: add `put_device_id` to the hand-written `PgCryptoStore` fake so the three encryption-path tests (test_connect_with_access_token_and_encryption, test_connect_uses_configured_device_id_over_whoami, test_connect_registers_encrypted_event_handler_when_encryption_on) can exercise the new crypto-store binding without AttributeError. --- gateway/platforms/matrix.py | 4 ++-- tests/gateway/test_matrix.py | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py index 7033e6171..15589d991 100644 --- a/gateway/platforms/matrix.py +++ b/gateway/platforms/matrix.py @@ -542,9 +542,9 @@ class MatrixAdapter(BasePlatformAdapter): # no megolm sessions ever land. Setting _device_id here # (in-memory; the on-disk row may not exist yet) makes # the first put_account write the correct value. + # DeviceID is a NewType(str) so plain str works at runtime. if client.device_id: - from mautrix.types import DeviceID as _DeviceID - await crypto_store.put_device_id(_DeviceID(client.device_id)) + await crypto_store.put_device_id(client.device_id) crypto_state = _CryptoStateStore(state_store, self._joined_rooms) olm = OlmMachine(client, crypto_store, crypto_state) diff --git a/tests/gateway/test_matrix.py b/tests/gateway/test_matrix.py index a088ad9ba..50a8a6675 100644 --- a/tests/gateway/test_matrix.py +++ b/tests/gateway/test_matrix.py @@ -197,10 +197,14 @@ def _make_fake_mautrix(): self.account_id = account_id self.pickle_key = pickle_key self.db = db + self._device_id = "" async def open(self): pass + async def put_device_id(self, device_id): + self._device_id = device_id + mautrix_crypto_store_asyncpg.PgCryptoStore = PgCryptoStore # --- mautrix.util ---