From 9d143fb66d8b78fd241774b37223bf77b471b373 Mon Sep 17 00:00:00 2001 From: happy5318 Date: Thu, 23 Apr 2026 03:36:04 +0800 Subject: [PATCH] fix(gateway): add timeout to adapter.disconnect() during shutdown Prevent gateway shutdown hangs when a platform adapter's disconnect() method blocks indefinitely (e.g., Feishu WebSocket thread waiting for network I/O). Without this timeout, systemd sends SIGKILL after TimeoutStopSec, but SIGKILL doesn't trigger Python's atexit handlers, leaving a stale PID file that causes 'PID file race lost' errors on restart. Changes: - Wrap adapter.disconnect() in asyncio.wait_for() with 15s timeout - Log warning on timeout and continue with shutdown instead of hanging - Ensures PID file cleanup always runs even if adapter cleanup fails Fixes #14128 --- gateway/run.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/gateway/run.py b/gateway/run.py index 51c9c529fe..a46c57a5e0 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -2629,14 +2629,25 @@ class GatewayRunner: self._finalize_shutdown_agents(active_agents) + # Disconnect adapters with per-adapter timeout to prevent hangs. + # A stuck adapter (e.g., Feishu WebSocket thread waiting for network + # I/O) should not block the entire shutdown sequence and prevent + # PID file cleanup, which would cause "PID file race lost" errors + # on restart after systemd SIGKILL's the process. + _adapter_disconnect_timeout = 15.0 # seconds per adapter for platform, adapter in list(self.adapters.items()): try: await adapter.cancel_background_tasks() except Exception as e: logger.debug("✗ %s background-task cancel error: %s", platform.value, e) try: - await adapter.disconnect() + await asyncio.wait_for(adapter.disconnect(), timeout=_adapter_disconnect_timeout) logger.info("✓ %s disconnected", platform.value) + except asyncio.TimeoutError: + logger.warning( + "✗ %s disconnect timed out after %.1fs - forcing continue", + platform.value, _adapter_disconnect_timeout + ) except Exception as e: logger.error("✗ %s disconnect error: %s", platform.value, e)