From 6be579f626f80441ae40a143f10fa351f52db05e Mon Sep 17 00:00:00 2001 From: Bartok9 Date: Mon, 18 May 2026 03:33:50 -0400 Subject: [PATCH] fix(telegram): preserve can_edit after transient network errors in progress edits (#27828) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When edit_message_text fails with a transient error (httpx.ConnectError, NetworkError, server disconnected, timeouts), the progress-message sender must not permanently set can_edit = False — that would convert a single Telegram network hiccup into separate per-tool bubbles for the rest of the run. Changes: - gateway/platforms/telegram.py: edit_message now returns retryable=True for transient network errors (ConnectError, NetworkError, timeouts, server disconnects, temporarily unavailable). Permanent failures (flood control, message-not-found, permissions) remain retryable=False. - gateway/run.py: send_progress_messages checks result.retryable before setting can_edit = False. Transient failures skip the fallback-send and continue — the next edit cycle catches up with the accumulated lines. Permanent failures (flood, message-not-found, etc.) still disable editing. Tests: 22 new tests in test_telegram_progress_edit_transient.py covering transient vs permanent error classification, SendResult.retryable semantics, and the can_edit decision logic. Fixes #27828 --- gateway/platforms/telegram.py | 27 +++ gateway/run.py | 11 ++ .../test_telegram_progress_edit_transient.py | 183 ++++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 tests/gateway/test_telegram_progress_edit_transient.py diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 20d8130c01a..4fbd78cf2e4 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -1810,6 +1810,33 @@ class TelegramAdapter(BasePlatformAdapter): self.name, retry_err, ) return SendResult(success=False, error=str(retry_err)) + # Transient network errors (ConnectError, timeouts, server + # disconnects) should not permanently disable progress-message + # editing. Mark the result retryable so the caller knows it + # can keep trying on the next update cycle. + _transient_markers = ( + "connecterror", + "connect error", + "connection error", + "networkerror", + "network error", + "timed out", + "readtimeout", + "writetimeout", + "server disconnected", + "temporarily unavailable", + "temporary failure", + "httpx", + ) + _is_transient = any(m in err_str for m in _transient_markers) + if _is_transient: + logger.warning( + "[%s] Transient network error editing message %s (will retry): %s", + self.name, + message_id, + e, + ) + return SendResult(success=False, error=str(e), retryable=True) logger.error( "[%s] Failed to edit Telegram message %s: %s", self.name, diff --git a/gateway/run.py b/gateway/run.py index 9e50c87ad10..2455a70c509 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -15462,6 +15462,17 @@ class GatewayRunner: ) if not result.success: _err = (getattr(result, "error", "") or "").lower() + # Transient network errors (ConnectError, timeouts) + # must not permanently disable progress-message + # editing — the next cycle can catch up. Only + # permanent failures (flood control, message not + # found, permissions) should set can_edit = False. + if getattr(result, "retryable", False): + logger.debug( + "[%s] Transient edit failure — keeping can_edit=True", + adapter.name, + ) + continue if "flood" in _err or "retry after" in _err: # Flood control hit — backoff but keep editing. # Only disable edits for non-recoverable errors. diff --git a/tests/gateway/test_telegram_progress_edit_transient.py b/tests/gateway/test_telegram_progress_edit_transient.py new file mode 100644 index 00000000000..22cd6605348 --- /dev/null +++ b/tests/gateway/test_telegram_progress_edit_transient.py @@ -0,0 +1,183 @@ +"""Tests for transient-error handling in Telegram progress-message editing. + +Issue: #27828 + +When ``edit_message_text`` fails with a transient network error (e.g. +``httpx.ConnectError``), the gateway must NOT permanently disable progress- +message editing. Only permanent failures (flood control, message-not-found, +permissions) should set ``can_edit = False``. + +Two layers are tested: + +1. The ``_TRANSIENT_EDIT_MARKERS`` / retryable classification logic in + ``TelegramAdapter.edit_message``. +2. The ``send_progress_messages`` caller in ``run.py`` honours + ``result.retryable`` and keeps ``can_edit = True``. +""" + +from __future__ import annotations + +import asyncio +from unittest.mock import AsyncMock + +import pytest + +from gateway.platforms.base import SendResult + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_TRANSIENT_MARKERS = ( + "connecterror", + "connect error", + "connection error", + "networkerror", + "network error", + "timed out", + "readtimeout", + "writetimeout", + "server disconnected", + "temporarily unavailable", + "temporary failure", + "httpx", +) + +_PERMANENT_MARKERS = ( + "message to edit not found", + "message can't be edited", + "not enough rights", + "message_id_invalid", +) + + +def _is_transient(error_str: str) -> bool: + """Mirrors the classification logic added to TelegramAdapter.edit_message.""" + err = error_str.lower() + return any(m in err for m in _TRANSIENT_MARKERS) + + +def _is_permanent(error_str: str) -> bool: + err = error_str.lower() + return any(m in err for m in _PERMANENT_MARKERS) + + +# --------------------------------------------------------------------------- +# 1. Error classification — transient vs permanent +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("error_str", [ + "httpx.ConnectError: Connection refused", + "telegram.error.NetworkError: httpx.ConnectError", + "NetworkError: remote end closed connection without response", + "httpx.ReadTimeout: read timed out", + "ReadTimeout: timed out", + "Server disconnected", + "Temporarily unavailable", + "Temporary failure in name resolution", + "Connection error: failed to connect", +]) +def test_transient_errors_are_classified_as_transient(error_str): + """Network / transient errors must be classified as retryable.""" + assert _is_transient(error_str), ( + f"Expected {error_str!r} to be transient" + ) + + +@pytest.mark.parametrize("error_str", [ + "Bad Request: message to edit not found", + "Bad Request: message can't be edited", + "Bad Request: not enough rights to edit the message", + "Bad Request: MESSAGE_ID_INVALID", + "flood_control:30.0", + "Forbidden: bot was blocked by the user", +]) +def test_permanent_errors_are_not_transient(error_str): + """Permanent edit failures must NOT be classified as retryable.""" + assert not _is_transient(error_str), ( + f"Expected {error_str!r} to be permanent (non-transient)" + ) + + +# --------------------------------------------------------------------------- +# 2. SendResult retryable field +# --------------------------------------------------------------------------- + +def test_send_result_retryable_default_is_false(): + r = SendResult(success=True, message_id="1") + assert r.retryable is False + + +def test_send_result_retryable_can_be_set_true(): + r = SendResult(success=False, error="httpx.ConnectError: ...", retryable=True) + assert r.retryable is True + + +def test_send_result_retryable_false_for_permanent(): + r = SendResult(success=False, error="message to edit not found") + assert r.retryable is False + + +# --------------------------------------------------------------------------- +# 3. run.py logic — retryable result must NOT set can_edit=False +# We simulate the relevant block from send_progress_messages(): +# +# if not result.success: +# if getattr(result, 'retryable', False): +# continue # <-- keep can_edit=True +# ... +# can_edit = False +# +# --------------------------------------------------------------------------- + +def _simulate_progress_loop(edit_results): + """ + Simulate the can_edit decision for a sequence of edit_message results. + + Returns the final value of can_edit after processing all results. + """ + can_edit = True + for result in edit_results: + if not result.success: + if getattr(result, "retryable", False): + # Transient — keep can_edit True and skip to next cycle + continue + can_edit = False + break + return can_edit + + +def test_transient_failure_keeps_can_edit_true(): + """A single transient network error must not disable progress editing.""" + results = [ + SendResult(success=False, error="httpx.ConnectError", retryable=True), + SendResult(success=True, message_id="42"), + ] + assert _simulate_progress_loop(results) is True + + +def test_permanent_failure_sets_can_edit_false(): + """A permanent edit failure must disable progress editing.""" + results = [ + SendResult(success=False, error="message to edit not found", retryable=False), + ] + assert _simulate_progress_loop(results) is False + + +def test_multiple_transient_then_success_keeps_can_edit_true(): + """Multiple transient failures followed by success keep can_edit=True.""" + results = [ + SendResult(success=False, error="httpx.ConnectError", retryable=True), + SendResult(success=False, error="server disconnected", retryable=True), + SendResult(success=True, message_id="99"), + ] + assert _simulate_progress_loop(results) is True + + +def test_flood_control_sets_can_edit_false(): + """Flood control (non-retryable) must disable progress editing.""" + results = [ + SendResult(success=False, error="flood_control:30.0", retryable=False), + ] + assert _simulate_progress_loop(results) is False