diff --git a/infographic/telegram-send-path-35205/infographic.png b/infographic/telegram-send-path-35205/infographic.png new file mode 100644 index 00000000000..ebcfd69ca6d Binary files /dev/null and b/infographic/telegram-send-path-35205/infographic.png differ diff --git a/plugins/platforms/telegram/adapter.py b/plugins/platforms/telegram/adapter.py index 37f99b75adb..5ca00428a25 100644 --- a/plugins/platforms/telegram/adapter.py +++ b/plugins/platforms/telegram/adapter.py @@ -1572,6 +1572,17 @@ class TelegramAdapter(BasePlatformAdapter): self.name, attempt, ) self._polling_network_error_count = 0 + # start_polling() succeeding IS the recovery signal: the long-poll + # connection is live again, so clear the degraded flag immediately + # rather than blocking all outbound sends for the full + # HEARTBEAT_PROBE_DELAY window. The deferred probe below is a + # defensive re-check — if it later detects a silent wedge (PTB + # running=True but consumer task dead) it re-enters the ladder, + # which re-sets _send_path_degraded. Without this clear here, a + # clean reconnect leaves the flag stuck True until the 60s probe + # (or forever, if the probe is never scheduled), blocking the send + # path even though the bot has fully recovered. See #35205. + self._send_path_degraded = False # start_polling() returning is necessary but not sufficient: # PTB's Updater can be left in a state where `running` is True # but the underlying long-poll task is wedged on a stale httpx diff --git a/tests/gateway/test_telegram_send_path_health.py b/tests/gateway/test_telegram_send_path_health.py index d5285f25109..37c76152c91 100644 --- a/tests/gateway/test_telegram_send_path_health.py +++ b/tests/gateway/test_telegram_send_path_health.py @@ -66,8 +66,48 @@ async def test_send_short_circuits_when_path_degraded(): @pytest.mark.asyncio async def test_reconnect_storm_sets_and_heartbeat_clears_flag(monkeypatch): - """_handle_polling_network_error sets the flag; a successful heartbeat - probe in _verify_polling_after_reconnect clears it.""" + """_handle_polling_network_error sets the flag while reconnecting; if the + reconnect attempt itself raises (polling not yet healthy), the flag stays + True until a later successful heartbeat probe in + _verify_polling_after_reconnect clears it.""" + adapter = _make_adapter() + adapter._app = MagicMock() + adapter._app.updater = MagicMock() + adapter._app.updater.running = True + adapter._app.updater.stop = AsyncMock() + # First start_polling attempt fails — the reconnect handler must leave the + # flag set (path still unhealthy) and not clear it prematurely. + adapter._app.updater.start_polling = AsyncMock(side_effect=OSError("still down")) + adapter._app.bot = MagicMock() + adapter._app.bot.get_me = AsyncMock(return_value=MagicMock()) + adapter._polling_error_callback_ref = AsyncMock() + monkeypatch.setattr( + "plugins.platforms.telegram.adapter.Update", MagicMock(ALL_TYPES=[]) + ) + # Suppress the self-rescheduled retry so the test doesn't recurse. + monkeypatch.setattr( + "plugins.platforms.telegram.adapter.asyncio.ensure_future", MagicMock() + ) + + with patch("plugins.platforms.telegram.adapter.asyncio.sleep", new_callable=AsyncMock): + await adapter._handle_polling_network_error(OSError("Bad Gateway")) + # start_polling failed → path still degraded. + assert adapter._send_path_degraded is True + + # Now the deferred probe runs against a recovered (running) updater and + # a responsive bot — it clears the flag. + adapter._app.updater.running = True + with patch("plugins.platforms.telegram.adapter.asyncio.sleep", new_callable=AsyncMock): + await adapter._verify_polling_after_reconnect() + assert adapter._send_path_degraded is False + + +@pytest.mark.asyncio +async def test_successful_reconnect_clears_flag_without_probe(monkeypatch): + """Regression for #35205: a successful start_polling() clears + _send_path_degraded immediately, so outbound sends are not blocked for + the full HEARTBEAT_PROBE_DELAY window (and never get stuck True if the + deferred probe is never scheduled / never runs).""" adapter = _make_adapter() adapter._app = MagicMock() adapter._app.updater = MagicMock() @@ -80,10 +120,17 @@ async def test_reconnect_storm_sets_and_heartbeat_clears_flag(monkeypatch): monkeypatch.setattr( "plugins.platforms.telegram.adapter.Update", MagicMock(ALL_TYPES=[]) ) - - await adapter._handle_polling_network_error(OSError("Bad Gateway")) - assert adapter._send_path_degraded is True + # Don't let the deferred probe run — prove the clear happens in the + # reconnect handler itself, not in _verify_polling_after_reconnect. + monkeypatch.setattr( + adapter, "_verify_polling_after_reconnect", AsyncMock() + ) with patch("plugins.platforms.telegram.adapter.asyncio.sleep", new_callable=AsyncMock): - await adapter._verify_polling_after_reconnect() + await adapter._handle_polling_network_error(OSError("Bad Gateway")) + assert adapter._send_path_degraded is False + assert adapter._polling_network_error_count == 0 + # And send() works again right away. + result = await adapter.send("123", "hello") + assert result.success is True