diff --git a/gateway/run.py b/gateway/run.py index e8c82a0ccd4..e2595f880aa 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -5520,9 +5520,13 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew await asyncio.sleep(0.05) await self.stop(restart=True, detached_restart=detached, service_restart=via_service) - task = asyncio.create_task(_run_restart()) - self._background_tasks.add(task) - task.add_done_callback(self._background_tasks.discard) + # _run_restart is a short-lived self-terminating task (calls stop() + # then returns). Don't add it to _background_tasks — _stop_impl + # cancels all entries in that set, which would cancel _run_restart + # while it's awaiting _stop_task, propagating CancelledError into + # _stop_impl and preventing _shutdown_event.set() / _exit_code = 75. + # See #12875. + asyncio.create_task(_run_restart()) return True # Drain-timeout reasons set by _stop_impl() when a still-running turn is diff --git a/tests/gateway/test_restart_drain.py b/tests/gateway/test_restart_drain.py index 15b948a4f79..07077539b47 100644 --- a/tests/gateway/test_restart_drain.py +++ b/tests/gateway/test_restart_drain.py @@ -181,11 +181,20 @@ async def test_request_restart_is_idempotent(): runner, _adapter = make_restart_runner() runner.stop = AsyncMock() - assert runner.request_restart(detached=True, via_service=False) is True - first_task = next(iter(runner._background_tasks)) - assert runner.request_restart(detached=True, via_service=False) is False + # Patch create_task to capture the restart task (it's no longer in + # _background_tasks — see #12875). + _captured = [] + _orig_create_task = asyncio.create_task + def _capture(coro, **kw): + t = _orig_create_task(coro, **kw) + _captured.append(t) + return t + with pytest.MonkeyPatch.context() as mp: + mp.setattr(asyncio, "create_task", _capture) + assert runner.request_restart(detached=True, via_service=False) is True + assert runner.request_restart(detached=True, via_service=False) is False - await first_task + await _captured[0] runner.stop.assert_awaited_once_with( restart=True, detached_restart=True, service_restart=False