mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gateway): add timeout to adapter.disconnect() during shutdown
Prevent gateway shutdown hangs when a platform adapter's disconnect() method blocks indefinitely (e.g., Feishu WebSocket thread waiting for network I/O). Without this timeout, systemd sends SIGKILL after TimeoutStopSec, but SIGKILL doesn't trigger Python's atexit handlers, leaving a stale PID file that causes 'PID file race lost' errors on restart. Changes: - Wrap adapter.disconnect() in asyncio.wait_for() with 15s timeout - Log warning on timeout and continue with shutdown instead of hanging - Ensures PID file cleanup always runs even if adapter cleanup fails Fixes #14128
This commit is contained in:
parent
de849c410d
commit
9d143fb66d
1 changed files with 12 additions and 1 deletions
|
|
@ -2629,14 +2629,25 @@ class GatewayRunner:
|
|||
|
||||
self._finalize_shutdown_agents(active_agents)
|
||||
|
||||
# Disconnect adapters with per-adapter timeout to prevent hangs.
|
||||
# A stuck adapter (e.g., Feishu WebSocket thread waiting for network
|
||||
# I/O) should not block the entire shutdown sequence and prevent
|
||||
# PID file cleanup, which would cause "PID file race lost" errors
|
||||
# on restart after systemd SIGKILL's the process.
|
||||
_adapter_disconnect_timeout = 15.0 # seconds per adapter
|
||||
for platform, adapter in list(self.adapters.items()):
|
||||
try:
|
||||
await adapter.cancel_background_tasks()
|
||||
except Exception as e:
|
||||
logger.debug("✗ %s background-task cancel error: %s", platform.value, e)
|
||||
try:
|
||||
await adapter.disconnect()
|
||||
await asyncio.wait_for(adapter.disconnect(), timeout=_adapter_disconnect_timeout)
|
||||
logger.info("✓ %s disconnected", platform.value)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"✗ %s disconnect timed out after %.1fs - forcing continue",
|
||||
platform.value, _adapter_disconnect_timeout
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("✗ %s disconnect error: %s", platform.value, e)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue