fix(telegram): reset sticky fallback IP on connect failure, retry primary DNS

When a sticky fallback IP (from DoH discovery) becomes unreachable,
the transport previously got stuck in an attempt_order that only
tried the dead IP.  This prevented the gateway from recovering
until the service was restarted.

Changes:
- Always include primary DNS path (None) after the sticky IP in the
  attempt_order so that a primary-path retry happens on sticky failure.
- Reset self._sticky_ip to None when the currently sticky IP hits
  a connect timeout / connect error, allowing the next request to
  retry from scratch.

Fixes silent Telegram disconnection when discovered fallback IPs
are transiently or permanently unreachable.
This commit is contained in:
falconexe 2026-05-12 19:15:04 +00:00 committed by Teknium
parent 8439ddc1b1
commit 5c4b43ced7

View file

@ -76,6 +76,8 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
sticky_ip = self._sticky_ip
attempt_order: list[Optional[str]] = [sticky_ip] if sticky_ip else [None]
if sticky_ip:
attempt_order.append(None) # retry primary DNS after sticky failure
for ip in self._fallback_ips:
if ip != sticky_ip:
attempt_order.append(ip)
@ -99,6 +101,14 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
last_error = exc
if not _is_retryable_connect_error(exc):
raise
if ip is not None and ip == self._sticky_ip:
async with self._sticky_lock:
if self._sticky_ip == ip:
self._sticky_ip = None
logger.warning(
"[Telegram] Sticky fallback IP %s failed; resetting to primary DNS path",
ip,
)
if ip is None:
logger.warning(
"[Telegram] Primary api.telegram.org connection failed (%s); trying fallback IPs %s",