diff --git a/plugins/platforms/discord/adapter.py b/plugins/platforms/discord/adapter.py index 46544cd1f44..391c4b99f9b 100644 --- a/plugins/platforms/discord/adapter.py +++ b/plugins/platforms/discord/adapter.py @@ -910,6 +910,17 @@ class DiscordAdapter(BasePlatformAdapter): except asyncio.TimeoutError: logger.error("[%s] Timeout waiting for connection to Discord", self.name, exc_info=True) + # Cancel the background bot task so it cannot fire on_message after + # this adapter is discarded. Without this, the task keeps running and + # a later successful reconnect leaves two active Discord clients that + # each process every message, producing duplicate threads/responses. + if self._bot_task and not self._bot_task.done(): + self._bot_task.cancel() + try: + await self._bot_task + except (asyncio.CancelledError, Exception): + pass + self._bot_task = None self._release_platform_lock() return False except Exception as e: # pragma: no cover - defensive logging @@ -919,6 +930,20 @@ class DiscordAdapter(BasePlatformAdapter): async def disconnect(self) -> None: """Disconnect from Discord.""" + # Cancel the bot task before closing the client. If connect() timed out + # and returned False, the background client.start() task may still be + # running; calling client.close() alone is not enough to stop it because + # discord.py's reconnect loop can ignore the closed flag while a + # WebSocket handshake is in flight. Explicitly cancelling the task here + # ensures the zombie client cannot receive or dispatch any further events. + if self._bot_task and not self._bot_task.done(): + self._bot_task.cancel() + try: + await self._bot_task + except (asyncio.CancelledError, Exception): + pass + self._bot_task = None + # Clean up all active voice connections before closing the client for guild_id in list(self._voice_clients.keys()): try: diff --git a/tests/gateway/test_discord_connect.py b/tests/gateway/test_discord_connect.py index 54dc903e971..b7bee41ca52 100644 --- a/tests/gateway/test_discord_connect.py +++ b/tests/gateway/test_discord_connect.py @@ -279,6 +279,88 @@ async def test_connect_releases_token_lock_on_timeout(monkeypatch): assert adapter._platform_lock_identity is None +@pytest.mark.asyncio +async def test_connect_timeout_cancels_bot_task(monkeypatch): + """Regression: connect() timeout must cancel _bot_task so the zombie + Discord client cannot fire on_message after the adapter is discarded. + + Without this fix, the orphaned task eventually completes its WebSocket + handshake and a subsequent successful reconnect leaves two live clients + that each process every message, producing duplicate threads. + """ + adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token")) + + monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None)) + monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None) + + intents = SimpleNamespace( + message_content=False, dm_messages=False, guild_messages=False, + members=False, voice_states=False, + ) + monkeypatch.setattr(discord_platform.Intents, "default", lambda: intents) + + class NeverReadyBot(FakeBot): + """Bot whose start() never fires on_ready — simulates a slow gateway handshake.""" + async def start(self, token): + await asyncio.Event().wait() # hang forever + + monkeypatch.setattr( + discord_platform.commands, + "Bot", + lambda **kwargs: NeverReadyBot( + intents=kwargs["intents"], + proxy=kwargs.get("proxy"), + allowed_mentions=kwargs.get("allowed_mentions"), + ), + ) + + async def fake_wait_for(awaitable, timeout): + awaitable.close() + raise asyncio.TimeoutError() + + monkeypatch.setattr(discord_platform.asyncio, "wait_for", fake_wait_for) + + ok = await adapter.connect() + + assert ok is False + assert adapter._bot_task is None, ( + "_bot_task must be cancelled and cleared on connect() timeout; " + "leaving it alive creates a zombie Discord client that produces duplicate threads" + ) + + +@pytest.mark.asyncio +async def test_disconnect_cancels_running_bot_task(monkeypatch): + """Regression: disconnect() must cancel _bot_task even when connect() timed out. + + _dispose_unused_adapter calls disconnect() on adapters whose connect() returned + False. If _bot_task was still running (zombie), disconnect() must cancel it. + """ + adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token")) + + monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None)) + monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None) + + # Simulate a zombie bot_task that never finishes (as if discord.py is mid-handshake) + async def _forever(): + await asyncio.Event().wait() # hang forever + + zombie_task = asyncio.create_task(_forever()) + adapter._bot_task = zombie_task + adapter._client = AsyncMock() + adapter._post_connect_task = None + adapter._voice_clients = {} + adapter._running = True + adapter._ready_event = asyncio.Event() + + await adapter.disconnect() + + # The task must have been cancelled (done + cancelled) and cleared from the adapter. + assert adapter._bot_task is None, "disconnect() must clear _bot_task" + assert zombie_task.done(), "disconnect() must have awaited the bot task to completion" + assert zombie_task.cancelled(), "disconnect() must cancel the zombie bot task" + + @pytest.mark.asyncio async def test_connect_does_not_wait_for_slash_sync(monkeypatch): adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))