From 404b06ac4fc307d868ea51ac656387641ef16b39 Mon Sep 17 00:00:00 2001 From: liuhao1024 Date: Thu, 25 Jun 2026 02:30:10 +0530 Subject: [PATCH] fix(gateway): honor server retry_after in _send_with_retry for Telegram flood control (#46762) When Telegram's sendRichMessage returns a FloodWait/RetryAfter error, _try_send_rich() now extracts the server-provided retry_after value and propagates it through SendResult.retry_after. The base _send_with_retry() layer honors this value instead of using its default short exponential backoff (~2s, ~4s), preventing the retry budget from being exhausted against a server that demands a 25-37s wait. Salvaged from #46774 by @liuhao1024. Telegram adapter path moved from gateway/platforms/telegram.py to plugins/platforms/telegram/adapter.py since the original PR. Closes #46762 --- gateway/platforms/base.py | 16 +++++++- plugins/platforms/telegram/adapter.py | 10 +++++ tests/gateway/test_send_retry.py | 54 +++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 2 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index ac1eeef0b89..0637ca75519 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1743,6 +1743,9 @@ class SendResult: # stream consumer can send the missing tail instead of marking a clipped # response complete. retryable: bool = False # True for transient connection errors — base will retry automatically + # Server-requested retry delay in seconds (e.g. Telegram FloodWait retry_after). + # When present, _send_with_retry() honors this instead of its default backoff. + retry_after: Optional[float] = None # When the adapter had to split an oversized payload across multiple # platform messages (e.g. Telegram edit_message overflow split-and-deliver), # ``message_id`` is the LAST visible message id (so subsequent edits target @@ -3760,9 +3763,16 @@ class BasePlatformAdapter(ABC): return result if is_network: - # Retry with exponential backoff for transient errors + # Retry with exponential backoff for transient errors. + # Honor server-requested retry_after (e.g. Telegram FloodWait) + # when present — it is authoritative over our backoff schedule. + server_retry_after = result.retry_after for attempt in range(1, max_retries + 1): - delay = base_delay * (2 ** (attempt - 1)) + random.uniform(0, 1) + if server_retry_after is not None: + delay = server_retry_after + random.uniform(0, 1) + server_retry_after = None # only honor once per send + else: + delay = base_delay * (2 ** (attempt - 1)) + random.uniform(0, 1) logger.warning( "[%s] Send failed (attempt %d/%d, retrying in %.1fs): %s", self.name, attempt, max_retries, delay, error_str, @@ -3778,6 +3788,8 @@ class BasePlatformAdapter(ABC): logger.info("[%s] Send succeeded on retry %d", self.name, attempt) return result error_str = result.error or "" + if result.retry_after is not None: + server_retry_after = result.retry_after if not (result.retryable or self._is_retryable_error(error_str)): break # error switched to non-transient — fall through to plain-text fallback else: diff --git a/plugins/platforms/telegram/adapter.py b/plugins/platforms/telegram/adapter.py index de08c149a16..0bdcf31dbbf 100644 --- a/plugins/platforms/telegram/adapter.py +++ b/plugins/platforms/telegram/adapter.py @@ -1378,6 +1378,15 @@ class TelegramAdapter(BasePlatformAdapter): _TimedOut = None is_timeout = (_TimedOut and isinstance(exc, _TimedOut)) or "timed out" in err_str is_connect_timeout = self._looks_like_connect_timeout(exc) + # Extract server-requested retry_after for flood control so the + # base retry layer honors Telegram's backoff instead of its own + # short exponential schedule. + _retry_after = getattr(exc, "retry_after", None) + if _retry_after is None: + import re as _re + _m = _re.search(r"retry\s+(?:in\s+)?(\d+)", err_str, _re.IGNORECASE) + if _m: + _retry_after = float(_m.group(1)) logger.warning( "[%s] sendRichMessage transient failure (no legacy resend): %s", self.name, exc, @@ -1386,6 +1395,7 @@ class TelegramAdapter(BasePlatformAdapter): success=False, error=str(exc), retryable=(is_connect_timeout or not is_timeout), + retry_after=_retry_after, ) message_id = None diff --git a/tests/gateway/test_send_retry.py b/tests/gateway/test_send_retry.py index 62945d9f4d9..c57a7712e3d 100644 --- a/tests/gateway/test_send_retry.py +++ b/tests/gateway/test_send_retry.py @@ -282,3 +282,57 @@ class TestSendWithRetryFallback: result = await adapter._send_with_retry("chat1", "hello", max_retries=2) assert not result.success assert len(adapter._send_calls) == 2 # original + fallback only + + +# --------------------------------------------------------------------------- +# _send_with_retry — retry_after honor +# --------------------------------------------------------------------------- + +class TestSendWithRetryAfter: + @pytest.mark.asyncio + async def test_retry_after_honored_on_first_retry(self): + """When the initial result has retry_after, the first retry waits that long.""" + adapter = _StubAdapter() + adapter._send_results = [ + SendResult(success=False, error="Flood control exceeded. Retry in 37 seconds", + retryable=True, retry_after=37.0), + SendResult(success=True, message_id="ok"), + ] + with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep: + result = await adapter._send_with_retry("chat1", "hello", max_retries=2, base_delay=2.0) + assert result.success + # First sleep should use retry_after (~37s + jitter), not base_delay (~2s) + first_sleep = mock_sleep.call_args_list[0][0][0] + assert first_sleep >= 36.0 # 37 - 1 (max jitter) + + @pytest.mark.asyncio + async def test_retry_after_from_subsequent_result(self): + """If a retry itself returns retry_after, the next retry honors it.""" + adapter = _StubAdapter() + adapter._send_results = [ + SendResult(success=False, error="ConnectError", retryable=True), + SendResult(success=False, error="Flood control exceeded. Retry in 30 seconds", + retryable=True, retry_after=30.0), + SendResult(success=True, message_id="ok"), + ] + with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep: + result = await adapter._send_with_retry("chat1", "hello", max_retries=3, base_delay=2.0) + assert result.success + # Second sleep should use the retry_after from the second result + second_sleep = mock_sleep.call_args_list[1][0][0] + assert second_sleep >= 29.0 # 30 - 1 (max jitter) + + @pytest.mark.asyncio + async def test_no_retry_after_uses_default_backoff(self): + """Without retry_after, default exponential backoff is used.""" + adapter = _StubAdapter() + adapter._send_results = [ + SendResult(success=False, error="ConnectError", retryable=True), + SendResult(success=True, message_id="ok"), + ] + with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep: + result = await adapter._send_with_retry("chat1", "hello", max_retries=2, base_delay=2.0) + assert result.success + # Sleep should be ~2s (base_delay * 2^0 + jitter), NOT 37s + first_sleep = mock_sleep.call_args_list[0][0][0] + assert first_sleep < 5.0