fix: store asyncio task references to prevent GC mid-execution (#3267)

Python's asyncio event loop holds only weak references to tasks.
Without a strong reference, the garbage collector can destroy a task
while it's awaiting I/O — silently dropping messages. Python 3.12+
made this more aggressive.

Audit of all gateway platform adapters found 6 untracked create_task
calls across 6 files:

Per-message tasks (tracked via _background_tasks set from base class):
- gateway/platforms/webhook.py: handle_message task
- gateway/platforms/sms.py: handle_message task
- gateway/platforms/signal.py: SSE response aclose task

Long-running infrastructure tasks (stored in named instance vars):
- gateway/platforms/slack.py: Socket Mode handler (_socket_mode_task)
- gateway/platforms/discord.py: bot client (_bot_task)
- gateway/platforms/whatsapp.py: message poll loop (_poll_task, 2 sites)

All other adapters (telegram, mattermost, matrix, email, homeassistant,
dingtalk) already tracked their tasks correctly.

Salvaged from PR #3160 by memosr — expanded from 1 file to 6.
This commit is contained in:
Teknium 2026-03-26 14:36:24 -07:00 committed by GitHub
parent 3a86328847
commit 243ee67529
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 16 additions and 7 deletions

View file

@ -446,6 +446,7 @@ class DiscordAdapter(BasePlatformAdapter):
# Persistent typing indicator loops per channel (DMs don't reliably # Persistent typing indicator loops per channel (DMs don't reliably
# show the standard typing gateway event for bots) # show the standard typing gateway event for bots)
self._typing_tasks: Dict[str, asyncio.Task] = {} self._typing_tasks: Dict[str, asyncio.Task] = {}
self._bot_task: Optional[asyncio.Task] = None
# Cap to prevent unbounded growth (Discord threads get archived). # Cap to prevent unbounded growth (Discord threads get archived).
self._MAX_TRACKED_THREADS = 500 self._MAX_TRACKED_THREADS = 500
@ -588,7 +589,7 @@ class DiscordAdapter(BasePlatformAdapter):
self._register_slash_commands() self._register_slash_commands()
# Start the bot in background # Start the bot in background
asyncio.create_task(self._client.start(self.config.token)) self._bot_task = asyncio.create_task(self._client.start(self.config.token))
# Wait for ready # Wait for ready
await asyncio.wait_for(self._ready_event.wait(), timeout=30) await asyncio.wait_for(self._ready_event.wait(), timeout=30)

View file

@ -344,7 +344,9 @@ class SignalAdapter(BasePlatformAdapter):
"""Force SSE reconnection by closing the current response.""" """Force SSE reconnection by closing the current response."""
if self._sse_response and not self._sse_response.is_stream_consumed: if self._sse_response and not self._sse_response.is_stream_consumed:
try: try:
asyncio.create_task(self._sse_response.aclose()) task = asyncio.create_task(self._sse_response.aclose())
self._background_tasks.add(task)
task.add_done_callback(self._background_tasks.discard)
except Exception: except Exception:
pass pass
self._sse_response = None self._sse_response = None

View file

@ -72,6 +72,7 @@ class SlackAdapter(BasePlatformAdapter):
self._handler: Optional[AsyncSocketModeHandler] = None self._handler: Optional[AsyncSocketModeHandler] = None
self._bot_user_id: Optional[str] = None self._bot_user_id: Optional[str] = None
self._user_name_cache: Dict[str, str] = {} # user_id → display name self._user_name_cache: Dict[str, str] = {} # user_id → display name
self._socket_mode_task: Optional[asyncio.Task] = None
async def connect(self) -> bool: async def connect(self) -> bool:
"""Connect to Slack via Socket Mode.""" """Connect to Slack via Socket Mode."""
@ -119,7 +120,7 @@ class SlackAdapter(BasePlatformAdapter):
# Start Socket Mode handler in background # Start Socket Mode handler in background
self._handler = AsyncSocketModeHandler(self._app, app_token) self._handler = AsyncSocketModeHandler(self._app, app_token)
asyncio.create_task(self._handler.start_async()) self._socket_mode_task = asyncio.create_task(self._handler.start_async())
self._running = True self._running = True
logger.info("[Slack] Connected as @%s (Socket Mode)", bot_name) logger.info("[Slack] Connected as @%s (Socket Mode)", bot_name)

View file

@ -265,7 +265,9 @@ class SmsAdapter(BasePlatformAdapter):
) )
# Non-blocking: Twilio expects a fast response # Non-blocking: Twilio expects a fast response
asyncio.create_task(self.handle_message(event)) task = asyncio.create_task(self.handle_message(event))
self._background_tasks.add(task)
task.add_done_callback(self._background_tasks.discard)
# Return empty TwiML — we send replies via the REST API, not inline TwiML # Return empty TwiML — we send replies via the REST API, not inline TwiML
return web.Response( return web.Response(

View file

@ -363,7 +363,9 @@ class WebhookAdapter(BasePlatformAdapter):
) )
# Non-blocking — return 202 Accepted immediately # Non-blocking — return 202 Accepted immediately
asyncio.create_task(self.handle_message(event)) task = asyncio.create_task(self.handle_message(event))
self._background_tasks.add(task)
task.add_done_callback(self._background_tasks.discard)
return web.json_response( return web.json_response(
{ {

View file

@ -140,6 +140,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
self._message_queue: asyncio.Queue = asyncio.Queue() self._message_queue: asyncio.Queue = asyncio.Queue()
self._bridge_log_fh = None self._bridge_log_fh = None
self._bridge_log: Optional[Path] = None self._bridge_log: Optional[Path] = None
self._poll_task: Optional[asyncio.Task] = None
async def connect(self) -> bool: async def connect(self) -> bool:
""" """
@ -198,7 +199,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
print(f"[{self.name}] Using existing bridge (status: {bridge_status})") print(f"[{self.name}] Using existing bridge (status: {bridge_status})")
self._mark_connected() self._mark_connected()
self._bridge_process = None # Not managed by us self._bridge_process = None # Not managed by us
asyncio.create_task(self._poll_messages()) self._poll_task = asyncio.create_task(self._poll_messages())
return True return True
else: else:
print(f"[{self.name}] Bridge found but not connected (status: {bridge_status}), restarting") print(f"[{self.name}] Bridge found but not connected (status: {bridge_status}), restarting")
@ -304,7 +305,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
print(f"[{self.name}] If session expired, re-pair: hermes whatsapp") print(f"[{self.name}] If session expired, re-pair: hermes whatsapp")
# Start message polling task # Start message polling task
asyncio.create_task(self._poll_messages()) self._poll_task = asyncio.create_task(self._poll_messages())
self._mark_connected() self._mark_connected()
print(f"[{self.name}] Bridge started on port {self._bridge_port}") print(f"[{self.name}] Bridge started on port {self._bridge_port}")