From 54e61f93318d579b145421c447437ec345021eb1 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 24 May 2026 04:36:24 -0700 Subject: [PATCH] fix(matrix,gateway): Matrix E2EE installs full dep set; plugins respect is_connected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #31116 — two distinct bugs in fresh-install Matrix gateway: 1. Matrix E2EE setup installed only mautrix[encryption], leaving asyncpg / aiosqlite / Markdown / aiohttp-socks uninstalled. The first encrypted connect failed with 'No module named asyncpg' deep inside MatrixAdapter.connect(). Root cause: the setup wizard hand-rolled a pip install of one package instead of using lazy_deps.ensure( 'platform.matrix'), and check_matrix_requirements() short-circuited the runtime installer on 'import mautrix' alone — so the other 4 packages were never pulled in. 2. Discord auto-enabled itself on every gateway start, even when the user never selected Discord and had no DISCORD_BOT_TOKEN. Root cause: gateway/config.py plugin-enablement loop gated enablement on entry.check_fn() (just 'is the SDK importable?') and ignored entry.is_connected (the 'did the user configure credentials?' probe). Same bug class as commit 7849a3d73 fixed for _platform_status in the setup wizard; this is the runtime counterpart. Affects Discord, Teams, and Google Chat. Changes: - hermes_cli/setup.py::_setup_matrix — install via lazy_deps.ensure('platform.matrix') to pull the full feature group. - gateway/platforms/matrix.py::_check_e2ee_deps — verify asyncpg + aiosqlite + PgCryptoStore in addition to OlmMachine, so E2EE failures surface at startup instead of at first encrypted-room connect. - gateway/platforms/matrix.py::check_matrix_requirements — use feature_missing('platform.matrix') as the install gate instead of a single 'import mautrix' check, so partial installs trigger the lazy installer correctly. - gateway/config.py plugin-enablement loop — consult entry.is_connected before flipping enabled=True. Explicit YAML enabled=true still wins. Tests: 3 new in tests/gateway/test_matrix.py (asyncpg-required, aiosqlite-required, partial-install lazy-runs), 5 new in tests/gateway/test_platform_registry.py (is_connected=False blocks, is_connected=True enables, is_connected=None falls back to check_fn, raising probe doesn't enable, explicit YAML wins). Validation: 310 tests across affected test modules pass. --- gateway/config.py | 34 +++++ gateway/platforms/matrix.py | 50 ++++++- hermes_cli/setup.py | 70 ++++++--- tests/gateway/test_matrix.py | 73 +++++++++ tests/gateway/test_platform_registry.py | 187 ++++++++++++++++++++++++ 5 files changed, 386 insertions(+), 28 deletions(-) diff --git a/gateway/config.py b/gateway/config.py index 0a578f66f34..9d3c966b77a 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -1813,6 +1813,17 @@ def _apply_env_overrides(config: GatewayConfig) -> None: # need to seed ``PlatformConfig.extra`` from env vars (e.g. Google Chat's # project_id / subscription_name) can supply ``env_enablement_fn`` on # their PlatformEntry — called here BEFORE adapter construction. + # + # Enablement gate (#31116): when a plugin registers ``is_connected`` + # (the "has the user actually configured credentials for this?" check), + # we MUST consult it before flipping ``enabled = True``. Otherwise + # ``check_fn`` alone — which for adapter plugins typically just + # verifies the SDK is importable / lazy-installs it — silently enables + # platforms the user never opted into, and the gateway then tries to + # connect to Discord / Teams / Google Chat with no token and emits + # noisy retry-forever errors. ``_platform_status`` was already fixed + # for the same bug class in commit 7849a3d73; this is the runtime + # counterpart. try: from hermes_cli.plugins import discover_plugins discover_plugins() # idempotent @@ -1825,6 +1836,29 @@ def _apply_env_overrides(config: GatewayConfig) -> None: logger.debug("check_fn for %s raised: %s", entry.name, e) continue platform = Platform(entry.name) + existing_cfg = config.platforms.get(platform) + # Only consult is_connected for platforms that are NOT already + # explicitly configured in YAML / env (existing_cfg with + # enabled=True means the user wrote it themselves or another + # env-var bridge enabled it — keep that decision). + if existing_cfg is None or not existing_cfg.enabled: + if entry.is_connected is not None: + try: + probe_cfg = existing_cfg or PlatformConfig() + configured = bool(entry.is_connected(probe_cfg)) + except Exception as exc: + logger.debug( + "is_connected for %s raised: %s — skipping enablement", + entry.name, exc, + ) + configured = False + if not configured: + logger.debug( + "Plugin platform '%s' available but not configured " + "(is_connected returned False) — skipping enable", + entry.name, + ) + continue if platform not in config.platforms: config.platforms[platform] = PlatformConfig() config.platforms[platform].enabled = True diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py index 28b086291ae..f7837a1f7d6 100644 --- a/gateway/platforms/matrix.py +++ b/gateway/platforms/matrix.py @@ -138,7 +138,8 @@ _OUTBOUND_MENTION_RE = re.compile( ) _E2EE_INSTALL_HINT = ( - "Install with: pip install 'mautrix[encryption]' (requires libolm C library)" + "Install with: pip install 'mautrix[encryption]' asyncpg aiosqlite " + "(requires libolm C library)" ) _MATRIX_IMAGE_FILENAME_EXTS = frozenset({ @@ -214,9 +215,22 @@ def _create_matrix_session(proxy_url: str | None): def _check_e2ee_deps() -> bool: - """Return True if mautrix E2EE dependencies (python-olm) are available.""" + """Return True if mautrix E2EE dependencies are available. + + Verifies python-olm (via mautrix.crypto.OlmMachine), the SQLite crypto + store backend (mautrix.crypto.store.asyncpg.PgCryptoStore — yes, the + PgCryptoStore class also drives the sqlite backend in mautrix 0.21), + and the database drivers actually used at connect time (``asyncpg`` for + the underlying upgrade_table machinery, ``aiosqlite`` for the + ``sqlite:///`` URL we pass to ``Database.create``). Without all four, + encrypted rooms fail at connect time with a confusing + ``No module named 'asyncpg'`` (#31116). + """ try: from mautrix.crypto import OlmMachine # noqa: F401 + from mautrix.crypto.store.asyncpg import PgCryptoStore # noqa: F401 + import asyncpg # noqa: F401 + import aiosqlite # noqa: F401 return True except (ImportError, AttributeError): @@ -226,8 +240,13 @@ def _check_e2ee_deps() -> bool: def check_matrix_requirements() -> bool: """Return True if the Matrix adapter can be used. - Lazy-installs mautrix via ``tools.lazy_deps.ensure("platform.matrix")`` - on first call if not present. Rebinds all module-level type globals on success. + Lazy-installs the full ``platform.matrix`` feature group via + ``tools.lazy_deps.ensure_and_bind`` whenever any of the declared + packages (mautrix, Markdown, aiosqlite, asyncpg, aiohttp-socks) is + missing — not just mautrix itself. Previously this short-circuited on + ``import mautrix``, which left the other four packages uninstalled + forever and broke E2EE connect with ``No module named 'asyncpg'`` + (#31116). Rebinds module-level type globals on success. """ token = os.getenv("MATRIX_ACCESS_TOKEN", "") password = os.getenv("MATRIX_PASSWORD", "") @@ -239,9 +258,20 @@ def check_matrix_requirements() -> bool: if not homeserver: logger.warning("Matrix: MATRIX_HOMESERVER not set") return False + + # Check whether any package in the platform.matrix feature group is + # missing. ``feature_missing`` is cheap (per-spec importlib.metadata + # lookups) and correctly handles ``mautrix[encryption]`` by stripping + # the extras marker before checking the bare package. try: - import mautrix # noqa: F401 - except ImportError: + from tools.lazy_deps import feature_missing, ensure_and_bind + missing = feature_missing("platform.matrix") + except Exception as exc: # pragma: no cover — defensive + logger.debug("Matrix: lazy_deps lookup failed: %s", exc) + missing = () + ensure_and_bind = None # type: ignore[assignment] + + if missing or ensure_and_bind is None: def _import(): from mautrix.types import ( ContentURI, EventID, EventType, PaginationDirection, @@ -261,10 +291,14 @@ def check_matrix_requirements() -> bool: "UserID": UserID, } - from tools.lazy_deps import ensure_and_bind + if ensure_and_bind is None: + return False if not ensure_and_bind("platform.matrix", _import, globals(), prompt=False): logger.warning( - "Matrix: mautrix not installed. Run: pip install 'mautrix[encryption]'" + "Matrix: required packages not installed (%s). " + "Run: pip install 'mautrix[encryption]' asyncpg aiosqlite " + "Markdown aiohttp-socks", + ", ".join(missing) if missing else "platform.matrix", ) return False diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 20525205db0..16eeba4e825 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -2188,28 +2188,58 @@ def _setup_matrix(): print_success("E2EE enabled") matrix_pkg = "mautrix[encryption]" if want_e2ee else "mautrix" + # Use the central lazy-deps feature group so we install ALL of + # platform.matrix's dependencies (mautrix, Markdown, aiosqlite, + # asyncpg, aiohttp-socks) — not just mautrix itself. The previous + # hand-rolled ``pip install mautrix[encryption]`` left asyncpg / + # aiosqlite uninstalled and broke E2EE connect with + # ``No module named 'asyncpg'`` on every fresh install (#31116). try: - __import__("mautrix") + from tools.lazy_deps import ensure as _lazy_ensure, feature_missing + _missing_before = feature_missing("platform.matrix") + if _missing_before: + print_info( + f"Installing {matrix_pkg} (+ {len(_missing_before)} runtime deps)..." + ) + try: + _lazy_ensure("platform.matrix", prompt=False) + print_success(f"{matrix_pkg} installed") + except Exception as exc: + print_warning( + f"Install failed — run manually: pip install " + f"'mautrix[encryption]' asyncpg aiosqlite Markdown " + f"aiohttp-socks" + ) + print_info(f" Error: {exc}") except ImportError: - print_info(f"Installing {matrix_pkg}...") - import subprocess - uv_bin = shutil.which("uv") - if uv_bin: - result = subprocess.run( - [uv_bin, "pip", "install", "--python", sys.executable, matrix_pkg], - capture_output=True, text=True, - ) - else: - result = subprocess.run( - [sys.executable, "-m", "pip", "install", matrix_pkg], - capture_output=True, text=True, - ) - if result.returncode == 0: - print_success(f"{matrix_pkg} installed") - else: - print_warning(f"Install failed — run manually: pip install '{matrix_pkg}'") - if result.stderr: - print_info(f" Error: {result.stderr.strip().splitlines()[-1]}") + # tools.lazy_deps unavailable (extreme edge case — partial + # install). Fall back to the legacy single-package install + # path so the wizard still does *something*. + try: + __import__("mautrix") + except ImportError: + print_info(f"Installing {matrix_pkg}...") + import subprocess + uv_bin = shutil.which("uv") + if uv_bin: + result = subprocess.run( + [uv_bin, "pip", "install", "--python", sys.executable, matrix_pkg], + capture_output=True, text=True, + ) + else: + result = subprocess.run( + [sys.executable, "-m", "pip", "install", matrix_pkg], + capture_output=True, text=True, + ) + if result.returncode == 0: + print_success(f"{matrix_pkg} installed") + else: + print_warning( + f"Install failed — run manually: pip install " + f"'{matrix_pkg}' asyncpg aiosqlite Markdown aiohttp-socks" + ) + if result.stderr: + print_info(f" Error: {result.stderr.strip().splitlines()[-1]}") print() print_info("🔒 Security: Restrict who can use your bot") diff --git a/tests/gateway/test_matrix.py b/tests/gateway/test_matrix.py index a0fb8f086d8..c7c03b1a8b1 100644 --- a/tests/gateway/test_matrix.py +++ b/tests/gateway/test_matrix.py @@ -797,6 +797,79 @@ class TestMatrixRequirements: with patch("tools.lazy_deps.ensure", side_effect=ImportError("mautrix unavailable")): assert matrix_mod.check_matrix_requirements() is False + def test_check_e2ee_deps_requires_asyncpg(self, monkeypatch): + """E2EE deps check must reject when asyncpg is missing — even if olm is present. + + Regression for #31116: ``mautrix[encryption]`` extra installs python-olm + but NOT asyncpg/aiosqlite, which are required by mautrix's crypto store + at connect time. ``_check_e2ee_deps`` previously only tested + ``OlmMachine`` import and returned True, so the failure manifested as + a confusing ``No module named 'asyncpg'`` deep in + ``MatrixAdapter.connect()``. + """ + from gateway.platforms.matrix import _check_e2ee_deps + import builtins + real_import = builtins.__import__ + + def _blocking_import(name, *args, **kwargs): + if name == "asyncpg" or name.startswith("asyncpg."): + raise ImportError("blocked for test") + return real_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", _blocking_import): + assert _check_e2ee_deps() is False + + def test_check_e2ee_deps_requires_aiosqlite(self): + """E2EE deps check must reject when aiosqlite is missing. + + Mautrix's ``Database.create("sqlite:///...")`` driver lookup imports + aiosqlite lazily — without it, connect fails at ``crypto_db.start()``. + """ + from gateway.platforms.matrix import _check_e2ee_deps + import builtins + real_import = builtins.__import__ + + def _blocking_import(name, *args, **kwargs): + if name == "aiosqlite" or name.startswith("aiosqlite."): + raise ImportError("blocked for test") + return real_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", _blocking_import): + assert _check_e2ee_deps() is False + + def test_check_requirements_runs_lazy_install_when_partial(self, monkeypatch): + """When mautrix is installed but asyncpg/aiosqlite are missing, + check_matrix_requirements must still run the lazy installer. + + Regression for #31116: the previous ``try: import mautrix`` gate + short-circuited the install of the OTHER 4 platform.matrix packages, + so a partial install (mautrix only) was treated as fully installed. + """ + monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_test") + monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org") + monkeypatch.delenv("MATRIX_ENCRYPTION", raising=False) + + from gateway.platforms import matrix as matrix_mod + + # Simulate "mautrix installed, asyncpg missing" → feature_missing + # returns a non-empty tuple → ensure_and_bind MUST be called. + called = {"ensure_and_bind": False} + + def _fake_ensure_and_bind(feature, importer, target_globals, **kwargs): + called["ensure_and_bind"] = True + assert feature == "platform.matrix" + return True # Pretend install succeeded. + + with patch("tools.lazy_deps.feature_missing", return_value=("asyncpg==0.31.0",)), \ + patch("tools.lazy_deps.ensure_and_bind", side_effect=_fake_ensure_and_bind): + matrix_mod.check_matrix_requirements() + + assert called["ensure_and_bind"], ( + "check_matrix_requirements must call ensure_and_bind whenever ANY " + "platform.matrix dep is missing, not just when mautrix itself is " + "missing (#31116)" + ) + # --------------------------------------------------------------------------- # Access-token auth / E2EE bootstrap diff --git a/tests/gateway/test_platform_registry.py b/tests/gateway/test_platform_registry.py index 4ddc645b7b2..88d607b1c39 100644 --- a/tests/gateway/test_platform_registry.py +++ b/tests/gateway/test_platform_registry.py @@ -708,3 +708,190 @@ class TestPluginPlatformSharedKeyBridge: assert extra.get("allow_from") == ["alice", "bob"] finally: _reg.unregister("mysharedplat") + + +class TestPluginEnablementGate: + """Plugin platforms must NOT auto-enable on check_fn alone (#31116). + + When a plugin registers ``is_connected`` (the "did the user actually + configure credentials" probe), ``load_gateway_config`` must consult it + before flipping ``enabled = True``. Without this gate, ``check_fn`` + semantics ("the SDK is importable") get conflated with "the user wants + this platform on", and the gateway tries to connect to e.g. Discord + with no token — emitting noisy retry-forever errors on every fresh + install that has the plugin loaded. + """ + + def _write_config(self, tmp_path, content: str = ""): + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + (hermes_home / "config.yaml").write_text(content, encoding="utf-8") + return hermes_home + + def test_plugin_with_is_connected_false_is_NOT_enabled( + self, tmp_path, monkeypatch + ): + """check_fn=True + is_connected=False must NOT enable the platform. + + Reproduces #31116: Discord plugin loads, its check_fn lazy-installs + discord.py and returns True, but the user has no DISCORD_BOT_TOKEN. + Previously this auto-enabled Discord and the gateway spammed + ``ERROR ... [Discord] No bot token configured`` on every reconnect. + """ + from gateway.platform_registry import platform_registry as _reg + + _reg.register(PlatformEntry( + name="myunconfiguredplat", + label="MyUnconfigured", + adapter_factory=lambda cfg: None, + check_fn=lambda: True, # SDK available + is_connected=lambda cfg: False, # but user hasn't set credentials + source="plugin", + )) + try: + home = self._write_config(tmp_path) + monkeypatch.setenv("HERMES_HOME", str(home)) + + from gateway.config import load_gateway_config, Platform + cfg = load_gateway_config() + + plat = Platform("myunconfiguredplat") + # Either absent entirely, or present but explicitly disabled. + if plat in cfg.platforms: + assert cfg.platforms[plat].enabled is False, ( + "Plugin with is_connected=False must NOT be auto-enabled" + ) + finally: + _reg.unregister("myunconfiguredplat") + + def test_plugin_with_is_connected_true_is_enabled( + self, tmp_path, monkeypatch + ): + """check_fn=True + is_connected=True still enables the platform.""" + from gateway.platform_registry import platform_registry as _reg + + _reg.register(PlatformEntry( + name="myconfiguredplat", + label="MyConfigured", + adapter_factory=lambda cfg: None, + check_fn=lambda: True, + is_connected=lambda cfg: True, + source="plugin", + )) + try: + home = self._write_config(tmp_path) + monkeypatch.setenv("HERMES_HOME", str(home)) + + from gateway.config import load_gateway_config, Platform + cfg = load_gateway_config() + + plat = Platform("myconfiguredplat") + assert plat in cfg.platforms + assert cfg.platforms[plat].enabled is True + finally: + _reg.unregister("myconfiguredplat") + + def test_plugin_without_is_connected_falls_back_to_check_fn( + self, tmp_path, monkeypatch + ): + """Legacy plugins that don't register is_connected keep working. + + For plugins where ``is_connected is None``, gating on ``check_fn`` + alone remains the contract — that's what callers without a + credential probe have always done. + """ + from gateway.platform_registry import platform_registry as _reg + + _reg.register(PlatformEntry( + name="mylegacyplat", + label="MyLegacy", + adapter_factory=lambda cfg: None, + check_fn=lambda: True, + # is_connected intentionally omitted (None) + source="plugin", + )) + try: + home = self._write_config(tmp_path) + monkeypatch.setenv("HERMES_HOME", str(home)) + + from gateway.config import load_gateway_config, Platform + cfg = load_gateway_config() + + plat = Platform("mylegacyplat") + assert plat in cfg.platforms + assert cfg.platforms[plat].enabled is True + finally: + _reg.unregister("mylegacyplat") + + def test_is_connected_raises_does_not_enable(self, tmp_path, monkeypatch): + """A buggy is_connected must not silently enable the platform. + + Treat a raising is_connected as "configuration unknown" — refuse to + enable, log, and move on. Anything else would re-introduce the + #31116 bug for plugins whose probe has a transient failure. + """ + from gateway.platform_registry import platform_registry as _reg + + def _bad_probe(cfg): + raise RuntimeError("plugin bug") + + _reg.register(PlatformEntry( + name="mybadprobeplat", + label="MyBadProbe", + adapter_factory=lambda cfg: None, + check_fn=lambda: True, + is_connected=_bad_probe, + source="plugin", + )) + try: + home = self._write_config(tmp_path) + monkeypatch.setenv("HERMES_HOME", str(home)) + + from gateway.config import load_gateway_config, Platform + cfg = load_gateway_config() + + plat = Platform("mybadprobeplat") + if plat in cfg.platforms: + assert cfg.platforms[plat].enabled is False + finally: + _reg.unregister("mybadprobeplat") + + def test_yaml_enabled_true_overrides_is_connected_false( + self, tmp_path, monkeypatch + ): + """Explicit YAML ``enabled: true`` wins over is_connected=False. + + If the user wrote ``platforms.X.enabled: true`` themselves, respect + that — they may be using a credential mechanism the plugin's + is_connected probe doesn't know about. Don't fight them. + """ + from gateway.platform_registry import platform_registry as _reg + + _reg.register(PlatformEntry( + name="myexplicitplat", + label="MyExplicit", + adapter_factory=lambda cfg: None, + check_fn=lambda: True, + is_connected=lambda cfg: False, + source="plugin", + )) + try: + home = self._write_config( + tmp_path, + "platforms:\n" + " myexplicitplat:\n" + " enabled: true\n", + ) + monkeypatch.setenv("HERMES_HOME", str(home)) + + from gateway.config import load_gateway_config, Platform + cfg = load_gateway_config() + + plat = Platform("myexplicitplat") + assert plat in cfg.platforms + assert cfg.platforms[plat].enabled is True, ( + "Explicit YAML enabled: true must win over plugin's " + "is_connected=False — user has the final say" + ) + finally: + _reg.unregister("myexplicitplat")