mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-02 07:11:49 +00:00
fix(telegram): reset sticky fallback IP on connect failure, retry primary DNS
When a sticky fallback IP (from DoH discovery) becomes unreachable, the transport previously got stuck in an attempt_order that only tried the dead IP. This prevented the gateway from recovering until the service was restarted. Changes: - Always include primary DNS path (None) after the sticky IP in the attempt_order so that a primary-path retry happens on sticky failure. - Reset self._sticky_ip to None when the currently sticky IP hits a connect timeout / connect error, allowing the next request to retry from scratch. Fixes silent Telegram disconnection when discovered fallback IPs are transiently or permanently unreachable.
This commit is contained in:
parent
8439ddc1b1
commit
5c4b43ced7
1 changed files with 10 additions and 0 deletions
|
|
@ -76,6 +76,8 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
|||
|
||||
sticky_ip = self._sticky_ip
|
||||
attempt_order: list[Optional[str]] = [sticky_ip] if sticky_ip else [None]
|
||||
if sticky_ip:
|
||||
attempt_order.append(None) # retry primary DNS after sticky failure
|
||||
for ip in self._fallback_ips:
|
||||
if ip != sticky_ip:
|
||||
attempt_order.append(ip)
|
||||
|
|
@ -99,6 +101,14 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
|||
last_error = exc
|
||||
if not _is_retryable_connect_error(exc):
|
||||
raise
|
||||
if ip is not None and ip == self._sticky_ip:
|
||||
async with self._sticky_lock:
|
||||
if self._sticky_ip == ip:
|
||||
self._sticky_ip = None
|
||||
logger.warning(
|
||||
"[Telegram] Sticky fallback IP %s failed; resetting to primary DNS path",
|
||||
ip,
|
||||
)
|
||||
if ip is None:
|
||||
logger.warning(
|
||||
"[Telegram] Primary api.telegram.org connection failed (%s); trying fallback IPs %s",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue