mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
fix(gateway): honor server retry_after in _send_with_retry for Telegram flood control (#46762)
When Telegram's sendRichMessage returns a FloodWait/RetryAfter error, _try_send_rich() now extracts the server-provided retry_after value and propagates it through SendResult.retry_after. The base _send_with_retry() layer honors this value instead of using its default short exponential backoff (~2s, ~4s), preventing the retry budget from being exhausted against a server that demands a 25-37s wait. Salvaged from #46774 by @liuhao1024. Telegram adapter path moved from gateway/platforms/telegram.py to plugins/platforms/telegram/adapter.py since the original PR. Closes #46762
This commit is contained in:
parent
cedbb4cfa2
commit
404b06ac4f
3 changed files with 78 additions and 2 deletions
|
|
@ -1743,6 +1743,9 @@ class SendResult:
|
|||
# stream consumer can send the missing tail instead of marking a clipped
|
||||
# response complete.
|
||||
retryable: bool = False # True for transient connection errors — base will retry automatically
|
||||
# Server-requested retry delay in seconds (e.g. Telegram FloodWait retry_after).
|
||||
# When present, _send_with_retry() honors this instead of its default backoff.
|
||||
retry_after: Optional[float] = None
|
||||
# When the adapter had to split an oversized payload across multiple
|
||||
# platform messages (e.g. Telegram edit_message overflow split-and-deliver),
|
||||
# ``message_id`` is the LAST visible message id (so subsequent edits target
|
||||
|
|
@ -3760,9 +3763,16 @@ class BasePlatformAdapter(ABC):
|
|||
return result
|
||||
|
||||
if is_network:
|
||||
# Retry with exponential backoff for transient errors
|
||||
# Retry with exponential backoff for transient errors.
|
||||
# Honor server-requested retry_after (e.g. Telegram FloodWait)
|
||||
# when present — it is authoritative over our backoff schedule.
|
||||
server_retry_after = result.retry_after
|
||||
for attempt in range(1, max_retries + 1):
|
||||
delay = base_delay * (2 ** (attempt - 1)) + random.uniform(0, 1)
|
||||
if server_retry_after is not None:
|
||||
delay = server_retry_after + random.uniform(0, 1)
|
||||
server_retry_after = None # only honor once per send
|
||||
else:
|
||||
delay = base_delay * (2 ** (attempt - 1)) + random.uniform(0, 1)
|
||||
logger.warning(
|
||||
"[%s] Send failed (attempt %d/%d, retrying in %.1fs): %s",
|
||||
self.name, attempt, max_retries, delay, error_str,
|
||||
|
|
@ -3778,6 +3788,8 @@ class BasePlatformAdapter(ABC):
|
|||
logger.info("[%s] Send succeeded on retry %d", self.name, attempt)
|
||||
return result
|
||||
error_str = result.error or ""
|
||||
if result.retry_after is not None:
|
||||
server_retry_after = result.retry_after
|
||||
if not (result.retryable or self._is_retryable_error(error_str)):
|
||||
break # error switched to non-transient — fall through to plain-text fallback
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -1378,6 +1378,15 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
_TimedOut = None
|
||||
is_timeout = (_TimedOut and isinstance(exc, _TimedOut)) or "timed out" in err_str
|
||||
is_connect_timeout = self._looks_like_connect_timeout(exc)
|
||||
# Extract server-requested retry_after for flood control so the
|
||||
# base retry layer honors Telegram's backoff instead of its own
|
||||
# short exponential schedule.
|
||||
_retry_after = getattr(exc, "retry_after", None)
|
||||
if _retry_after is None:
|
||||
import re as _re
|
||||
_m = _re.search(r"retry\s+(?:in\s+)?(\d+)", err_str, _re.IGNORECASE)
|
||||
if _m:
|
||||
_retry_after = float(_m.group(1))
|
||||
logger.warning(
|
||||
"[%s] sendRichMessage transient failure (no legacy resend): %s",
|
||||
self.name, exc,
|
||||
|
|
@ -1386,6 +1395,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
success=False,
|
||||
error=str(exc),
|
||||
retryable=(is_connect_timeout or not is_timeout),
|
||||
retry_after=_retry_after,
|
||||
)
|
||||
|
||||
message_id = None
|
||||
|
|
|
|||
|
|
@ -282,3 +282,57 @@ class TestSendWithRetryFallback:
|
|||
result = await adapter._send_with_retry("chat1", "hello", max_retries=2)
|
||||
assert not result.success
|
||||
assert len(adapter._send_calls) == 2 # original + fallback only
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _send_with_retry — retry_after honor
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSendWithRetryAfter:
|
||||
@pytest.mark.asyncio
|
||||
async def test_retry_after_honored_on_first_retry(self):
|
||||
"""When the initial result has retry_after, the first retry waits that long."""
|
||||
adapter = _StubAdapter()
|
||||
adapter._send_results = [
|
||||
SendResult(success=False, error="Flood control exceeded. Retry in 37 seconds",
|
||||
retryable=True, retry_after=37.0),
|
||||
SendResult(success=True, message_id="ok"),
|
||||
]
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
|
||||
result = await adapter._send_with_retry("chat1", "hello", max_retries=2, base_delay=2.0)
|
||||
assert result.success
|
||||
# First sleep should use retry_after (~37s + jitter), not base_delay (~2s)
|
||||
first_sleep = mock_sleep.call_args_list[0][0][0]
|
||||
assert first_sleep >= 36.0 # 37 - 1 (max jitter)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_retry_after_from_subsequent_result(self):
|
||||
"""If a retry itself returns retry_after, the next retry honors it."""
|
||||
adapter = _StubAdapter()
|
||||
adapter._send_results = [
|
||||
SendResult(success=False, error="ConnectError", retryable=True),
|
||||
SendResult(success=False, error="Flood control exceeded. Retry in 30 seconds",
|
||||
retryable=True, retry_after=30.0),
|
||||
SendResult(success=True, message_id="ok"),
|
||||
]
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
|
||||
result = await adapter._send_with_retry("chat1", "hello", max_retries=3, base_delay=2.0)
|
||||
assert result.success
|
||||
# Second sleep should use the retry_after from the second result
|
||||
second_sleep = mock_sleep.call_args_list[1][0][0]
|
||||
assert second_sleep >= 29.0 # 30 - 1 (max jitter)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_retry_after_uses_default_backoff(self):
|
||||
"""Without retry_after, default exponential backoff is used."""
|
||||
adapter = _StubAdapter()
|
||||
adapter._send_results = [
|
||||
SendResult(success=False, error="ConnectError", retryable=True),
|
||||
SendResult(success=True, message_id="ok"),
|
||||
]
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
|
||||
result = await adapter._send_with_retry("chat1", "hello", max_retries=2, base_delay=2.0)
|
||||
assert result.success
|
||||
# Sleep should be ~2s (base_delay * 2^0 + jitter), NOT 37s
|
||||
first_sleep = mock_sleep.call_args_list[0][0][0]
|
||||
assert first_sleep < 5.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue