mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gateway): close adapter resources when connect() fails or raises (#12339)
Gateway startup leaks aiohttp.ClientSession (and other partial-init resources) when an adapter's connect() returns False or raises. The adapter is never added to self.adapters, so the shutdown path at gateway/run.py:2426 never calls disconnect() on it — Python GC later logs 'Unclosed client session' at process exit. Seen on 2026-04-18 18:08:16 during a double --replace takeover cycle: one of the partial-init sessions survived past shutdown and emitted the warning right before status=75/TEMPFAIL. Fix: - New GatewayRunner._safe_adapter_disconnect() helper — calls adapter.disconnect() and swallows any exception. Used on error paths. - Connect loop calls it in both failure branches: success=False and except Exception. - Adapter disconnect() implementations are already expected to be idempotent and tolerate partial-init state (they all guard on self._http_session / self._bridge_process before touching them). Tests: tests/gateway/test_safe_adapter_disconnect.py — 3 cases verify the helper forwards to disconnect, swallows exceptions, and tolerates platform=None.
This commit is contained in:
parent
632a807a3e
commit
beabbd87ef
2 changed files with 92 additions and 0 deletions
|
|
@ -752,6 +752,26 @@ class GatewayRunner:
|
||||||
chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
|
chat_id for chat_id, mode in self._voice_mode.items() if mode == "off"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def _safe_adapter_disconnect(self, adapter, platform) -> None:
|
||||||
|
"""Call adapter.disconnect() defensively, swallowing any error.
|
||||||
|
|
||||||
|
Used when adapter.connect() failed or raised — the adapter may
|
||||||
|
have allocated partial resources (aiohttp.ClientSession, poll
|
||||||
|
tasks, child subprocesses) that would otherwise leak and surface
|
||||||
|
as "Unclosed client session" warnings at process exit.
|
||||||
|
|
||||||
|
Must tolerate partial-init state and never raise, since callers
|
||||||
|
use it inside error-handling blocks.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
await adapter.disconnect()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(
|
||||||
|
"Defensive %s disconnect after failed connect raised: %s",
|
||||||
|
platform.value if platform is not None else "adapter",
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
|
||||||
# -----------------------------------------------------------------
|
# -----------------------------------------------------------------
|
||||||
|
|
||||||
def _flush_memories_for_session(
|
def _flush_memories_for_session(
|
||||||
|
|
@ -1913,6 +1933,15 @@ class GatewayRunner:
|
||||||
logger.info("✓ %s connected", platform.value)
|
logger.info("✓ %s connected", platform.value)
|
||||||
else:
|
else:
|
||||||
logger.warning("✗ %s failed to connect", platform.value)
|
logger.warning("✗ %s failed to connect", platform.value)
|
||||||
|
# Defensive cleanup: a failed connect() may have
|
||||||
|
# allocated resources (aiohttp.ClientSession, poll
|
||||||
|
# tasks, bridge subprocesses) before giving up.
|
||||||
|
# Without this call, those resources are orphaned
|
||||||
|
# and Python logs "Unclosed client session" at
|
||||||
|
# process exit. Adapter disconnect() implementations
|
||||||
|
# are expected to be idempotent and tolerate
|
||||||
|
# partial-init state.
|
||||||
|
await self._safe_adapter_disconnect(adapter, platform)
|
||||||
if adapter.has_fatal_error:
|
if adapter.has_fatal_error:
|
||||||
self._update_platform_runtime_status(
|
self._update_platform_runtime_status(
|
||||||
platform.value,
|
platform.value,
|
||||||
|
|
@ -1953,6 +1982,10 @@ class GatewayRunner:
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("✗ %s error: %s", platform.value, e)
|
logger.error("✗ %s error: %s", platform.value, e)
|
||||||
|
# Same defensive cleanup path for exceptions — an adapter
|
||||||
|
# that raised mid-connect may still have a live
|
||||||
|
# aiohttp.ClientSession or child subprocess.
|
||||||
|
await self._safe_adapter_disconnect(adapter, platform)
|
||||||
self._update_platform_runtime_status(
|
self._update_platform_runtime_status(
|
||||||
platform.value,
|
platform.value,
|
||||||
platform_state="retrying",
|
platform_state="retrying",
|
||||||
|
|
|
||||||
59
tests/gateway/test_safe_adapter_disconnect.py
Normal file
59
tests/gateway/test_safe_adapter_disconnect.py
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
"""Regression tests: failed-connect path must call adapter.disconnect().
|
||||||
|
|
||||||
|
When adapter.connect() returns False or raises, the adapter may have
|
||||||
|
allocated resources (aiohttp.ClientSession, poll tasks, child
|
||||||
|
subprocesses) before giving up. Without a defensive disconnect() call
|
||||||
|
these leak and surface as "Unclosed client session" warnings at
|
||||||
|
process exit (seen on the 2026-04-18 18:08:16 gateway restart).
|
||||||
|
|
||||||
|
The fix: gateway/run.py wraps each adapter connect() with a safety-net
|
||||||
|
call to _safe_adapter_disconnect() in the failure branches.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from gateway.config import Platform
|
||||||
|
from gateway.run import GatewayRunner
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def bare_runner():
|
||||||
|
"""A GatewayRunner shell that only needs to support _safe_adapter_disconnect."""
|
||||||
|
return object.__new__(GatewayRunner)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_safe_disconnect_calls_adapter_disconnect(bare_runner):
|
||||||
|
"""The helper forwards to adapter.disconnect()."""
|
||||||
|
adapter = MagicMock()
|
||||||
|
adapter.disconnect = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM)
|
||||||
|
|
||||||
|
adapter.disconnect.assert_awaited_once()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_safe_disconnect_swallows_exceptions(bare_runner):
|
||||||
|
"""An exception in adapter.disconnect() must not propagate — the
|
||||||
|
caller is already on an error path."""
|
||||||
|
adapter = MagicMock()
|
||||||
|
adapter.disconnect = AsyncMock(side_effect=RuntimeError("partial init"))
|
||||||
|
|
||||||
|
# Must NOT raise
|
||||||
|
await bare_runner._safe_adapter_disconnect(adapter, Platform.TELEGRAM)
|
||||||
|
|
||||||
|
adapter.disconnect.assert_awaited_once()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_safe_disconnect_handles_none_platform(bare_runner):
|
||||||
|
"""Logging path must tolerate platform=None."""
|
||||||
|
adapter = MagicMock()
|
||||||
|
adapter.disconnect = AsyncMock(side_effect=ValueError("nope"))
|
||||||
|
|
||||||
|
await bare_runner._safe_adapter_disconnect(adapter, None)
|
||||||
|
|
||||||
|
adapter.disconnect.assert_awaited_once()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue