fix(gateway): close adapter resources when connect() fails or raises (#12339)

Gateway startup leaks aiohttp.ClientSession (and other partial-init
resources) when an adapter's connect() returns False or raises. The
adapter is never added to self.adapters, so the shutdown path at
gateway/run.py:2426 never calls disconnect() on it — Python GC later
logs 'Unclosed client session' at process exit.

Seen on 2026-04-18 18:08:16 during a double --replace takeover cycle:
one of the partial-init sessions survived past shutdown and emitted
the warning right before status=75/TEMPFAIL.

Fix:
- New GatewayRunner._safe_adapter_disconnect() helper — calls
  adapter.disconnect() and swallows any exception. Used on error paths.
- Connect loop calls it in both failure branches: success=False and
  except Exception.
- Adapter disconnect() implementations are already expected to be
  idempotent and tolerate partial-init state (they all guard on
  self._http_session / self._bridge_process before touching them).

Tests: tests/gateway/test_safe_adapter_disconnect.py — 3 cases verify
the helper forwards to disconnect, swallows exceptions, and tolerates
platform=None.
This commit is contained in:
Teknium 2026-04-18 18:53:31 -07:00 committed by GitHub
parent 632a807a3e
commit beabbd87ef
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 92 additions and 0 deletions

View file

@ -752,6 +752,26 @@ class GatewayRunner:
chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
)
async def _safe_adapter_disconnect(self, adapter, platform) -> None:
"""Call adapter.disconnect() defensively, swallowing any error.
Used when adapter.connect() failed or raised the adapter may
have allocated partial resources (aiohttp.ClientSession, poll
tasks, child subprocesses) that would otherwise leak and surface
as "Unclosed client session" warnings at process exit.
Must tolerate partial-init state and never raise, since callers
use it inside error-handling blocks.
"""
try:
await adapter.disconnect()
except Exception as e:
logger.debug(
"Defensive %s disconnect after failed connect raised: %s",
platform.value if platform is not None else "adapter",
e,
)
# -----------------------------------------------------------------
def _flush_memories_for_session(
@ -1913,6 +1933,15 @@ class GatewayRunner:
logger.info("%s connected", platform.value)
else:
logger.warning("%s failed to connect", platform.value)
# Defensive cleanup: a failed connect() may have
# allocated resources (aiohttp.ClientSession, poll
# tasks, bridge subprocesses) before giving up.
# Without this call, those resources are orphaned
# and Python logs "Unclosed client session" at
# process exit. Adapter disconnect() implementations
# are expected to be idempotent and tolerate
# partial-init state.
await self._safe_adapter_disconnect(adapter, platform)
if adapter.has_fatal_error:
self._update_platform_runtime_status(
platform.value,
@ -1953,6 +1982,10 @@ class GatewayRunner:
}
except Exception as e:
logger.error("%s error: %s", platform.value, e)
# Same defensive cleanup path for exceptions — an adapter
# that raised mid-connect may still have a live
# aiohttp.ClientSession or child subprocess.
await self._safe_adapter_disconnect(adapter, platform)
self._update_platform_runtime_status(
platform.value,
platform_state="retrying",

View file

@ -0,0 +1,59 @@
"""Regression tests: failed-connect path must call adapter.disconnect().
When adapter.connect() returns False or raises, the adapter may have
allocated resources (aiohttp.ClientSession, poll tasks, child
subprocesses) before giving up. Without a defensive disconnect() call
these leak and surface as "Unclosed client session" warnings at
process exit (seen on the 2026-04-18 18:08:16 gateway restart).
The fix: gateway/run.py wraps each adapter connect() with a safety-net
call to _safe_adapter_disconnect() in the failure branches.
"""
from unittest.mock import AsyncMock, MagicMock
import pytest
from gateway.config import Platform
from gateway.run import GatewayRunner
@pytest.fixture
def bare_runner():
"""A GatewayRunner shell that only needs to support _safe_adapter_disconnect."""
return object.__new__(GatewayRunner)
@pytest.mark.asyncio
async def test_safe_disconnect_calls_adapter_disconnect(bare_runner):
"""The helper forwards to adapter.disconnect()."""
adapter = MagicMock()
adapter.disconnect = AsyncMock(return_value=None)
await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM)
adapter.disconnect.assert_awaited_once()
@pytest.mark.asyncio
async def test_safe_disconnect_swallows_exceptions(bare_runner):
"""An exception in adapter.disconnect() must not propagate — the
caller is already on an error path."""
adapter = MagicMock()
adapter.disconnect = AsyncMock(side_effect=RuntimeError("partial init"))
# Must NOT raise
await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM)
adapter.disconnect.assert_awaited_once()
@pytest.mark.asyncio
async def test_safe_disconnect_handles_none_platform(bare_runner):
"""Logging path must tolerate platform=None."""
adapter = MagicMock()
adapter.disconnect = AsyncMock(side_effect=ValueError("nope"))
await bare_runner._safe_adapter_disconnect(adapter, None)
adapter.disconnect.assert_awaited_once()