From beabbd87efcb84928b7e6387ebcfba15fbcec96a Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 18 Apr 2026 18:53:31 -0700 Subject: [PATCH] fix(gateway): close adapter resources when connect() fails or raises (#12339) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gateway startup leaks aiohttp.ClientSession (and other partial-init resources) when an adapter's connect() returns False or raises. The adapter is never added to self.adapters, so the shutdown path at gateway/run.py:2426 never calls disconnect() on it — Python GC later logs 'Unclosed client session' at process exit. Seen on 2026-04-18 18:08:16 during a double --replace takeover cycle: one of the partial-init sessions survived past shutdown and emitted the warning right before status=75/TEMPFAIL. Fix: - New GatewayRunner._safe_adapter_disconnect() helper — calls adapter.disconnect() and swallows any exception. Used on error paths. - Connect loop calls it in both failure branches: success=False and except Exception. - Adapter disconnect() implementations are already expected to be idempotent and tolerate partial-init state (they all guard on self._http_session / self._bridge_process before touching them). Tests: tests/gateway/test_safe_adapter_disconnect.py — 3 cases verify the helper forwards to disconnect, swallows exceptions, and tolerates platform=None. --- gateway/run.py | 33 +++++++++++ tests/gateway/test_safe_adapter_disconnect.py | 59 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 tests/gateway/test_safe_adapter_disconnect.py diff --git a/gateway/run.py b/gateway/run.py index f9782b2990..b72e95eb83 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -752,6 +752,26 @@ class GatewayRunner: chat_id for chat_id, mode in self._voice_mode.items() if mode == "off" ) + async def _safe_adapter_disconnect(self, adapter, platform) -> None: + """Call adapter.disconnect() defensively, swallowing any error. + + Used when adapter.connect() failed or raised — the adapter may + have allocated partial resources (aiohttp.ClientSession, poll + tasks, child subprocesses) that would otherwise leak and surface + as "Unclosed client session" warnings at process exit. + + Must tolerate partial-init state and never raise, since callers + use it inside error-handling blocks. + """ + try: + await adapter.disconnect() + except Exception as e: + logger.debug( + "Defensive %s disconnect after failed connect raised: %s", + platform.value if platform is not None else "adapter", + e, + ) + # ----------------------------------------------------------------- def _flush_memories_for_session( @@ -1913,6 +1933,15 @@ class GatewayRunner: logger.info("✓ %s connected", platform.value) else: logger.warning("✗ %s failed to connect", platform.value) + # Defensive cleanup: a failed connect() may have + # allocated resources (aiohttp.ClientSession, poll + # tasks, bridge subprocesses) before giving up. + # Without this call, those resources are orphaned + # and Python logs "Unclosed client session" at + # process exit. Adapter disconnect() implementations + # are expected to be idempotent and tolerate + # partial-init state. + await self._safe_adapter_disconnect(adapter, platform) if adapter.has_fatal_error: self._update_platform_runtime_status( platform.value, @@ -1953,6 +1982,10 @@ class GatewayRunner: } except Exception as e: logger.error("✗ %s error: %s", platform.value, e) + # Same defensive cleanup path for exceptions — an adapter + # that raised mid-connect may still have a live + # aiohttp.ClientSession or child subprocess. + await self._safe_adapter_disconnect(adapter, platform) self._update_platform_runtime_status( platform.value, platform_state="retrying", diff --git a/tests/gateway/test_safe_adapter_disconnect.py b/tests/gateway/test_safe_adapter_disconnect.py new file mode 100644 index 0000000000..ec11f2663a --- /dev/null +++ b/tests/gateway/test_safe_adapter_disconnect.py @@ -0,0 +1,59 @@ +"""Regression tests: failed-connect path must call adapter.disconnect(). + +When adapter.connect() returns False or raises, the adapter may have +allocated resources (aiohttp.ClientSession, poll tasks, child +subprocesses) before giving up. Without a defensive disconnect() call +these leak and surface as "Unclosed client session" warnings at +process exit (seen on the 2026-04-18 18:08:16 gateway restart). + +The fix: gateway/run.py wraps each adapter connect() with a safety-net +call to _safe_adapter_disconnect() in the failure branches. +""" + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from gateway.config import Platform +from gateway.run import GatewayRunner + + +@pytest.fixture +def bare_runner(): + """A GatewayRunner shell that only needs to support _safe_adapter_disconnect.""" + return object.__new__(GatewayRunner) + + +@pytest.mark.asyncio +async def test_safe_disconnect_calls_adapter_disconnect(bare_runner): + """The helper forwards to adapter.disconnect().""" + adapter = MagicMock() + adapter.disconnect = AsyncMock(return_value=None) + + await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM) + + adapter.disconnect.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_safe_disconnect_swallows_exceptions(bare_runner): + """An exception in adapter.disconnect() must not propagate — the + caller is already on an error path.""" + adapter = MagicMock() + adapter.disconnect = AsyncMock(side_effect=RuntimeError("partial init")) + + # Must NOT raise + await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM) + + adapter.disconnect.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_safe_disconnect_handles_none_platform(bare_runner): + """Logging path must tolerate platform=None.""" + adapter = MagicMock() + adapter.disconnect = AsyncMock(side_effect=ValueError("nope")) + + await bare_runner._safe_adapter_disconnect(adapter, None) + + adapter.disconnect.assert_awaited_once()