fix(telegram): preserve can_edit after transient network errors in progress edits (#27828)

When edit_message_text fails with a transient error (httpx.ConnectError,
NetworkError, server disconnected, timeouts), the progress-message sender
must not permanently set can_edit = False — that would convert a single
Telegram network hiccup into separate per-tool bubbles for the rest of the run.

Changes:
- gateway/platforms/telegram.py: edit_message now returns retryable=True for
  transient network errors (ConnectError, NetworkError, timeouts, server
  disconnects, temporarily unavailable). Permanent failures (flood control,
  message-not-found, permissions) remain retryable=False.
- gateway/run.py: send_progress_messages checks result.retryable before
  setting can_edit = False. Transient failures skip the fallback-send and
  continue — the next edit cycle catches up with the accumulated lines.
  Permanent failures (flood, message-not-found, etc.) still disable editing.

Tests: 22 new tests in test_telegram_progress_edit_transient.py covering
transient vs permanent error classification, SendResult.retryable semantics,
and the can_edit decision logic.

Fixes #27828
This commit is contained in:
Bartok9 2026-05-18 03:33:50 -04:00 committed by Teknium
parent 32435dfad8
commit 6be579f626
3 changed files with 221 additions and 0 deletions

View file

@ -1810,6 +1810,33 @@ class TelegramAdapter(BasePlatformAdapter):
self.name, retry_err,
)
return SendResult(success=False, error=str(retry_err))
# Transient network errors (ConnectError, timeouts, server
# disconnects) should not permanently disable progress-message
# editing. Mark the result retryable so the caller knows it
# can keep trying on the next update cycle.
_transient_markers = (
"connecterror",
"connect error",
"connection error",
"networkerror",
"network error",
"timed out",
"readtimeout",
"writetimeout",
"server disconnected",
"temporarily unavailable",
"temporary failure",
"httpx",
)
_is_transient = any(m in err_str for m in _transient_markers)
if _is_transient:
logger.warning(
"[%s] Transient network error editing message %s (will retry): %s",
self.name,
message_id,
e,
)
return SendResult(success=False, error=str(e), retryable=True)
logger.error(
"[%s] Failed to edit Telegram message %s: %s",
self.name,

View file

@ -15462,6 +15462,17 @@ class GatewayRunner:
)
if not result.success:
_err = (getattr(result, "error", "") or "").lower()
# Transient network errors (ConnectError, timeouts)
# must not permanently disable progress-message
# editing — the next cycle can catch up. Only
# permanent failures (flood control, message not
# found, permissions) should set can_edit = False.
if getattr(result, "retryable", False):
logger.debug(
"[%s] Transient edit failure — keeping can_edit=True",
adapter.name,
)
continue
if "flood" in _err or "retry after" in _err:
# Flood control hit — backoff but keep editing.
# Only disable edits for non-recoverable errors.

View file

@ -0,0 +1,183 @@
"""Tests for transient-error handling in Telegram progress-message editing.
Issue: #27828
When ``edit_message_text`` fails with a transient network error (e.g.
``httpx.ConnectError``), the gateway must NOT permanently disable progress-
message editing. Only permanent failures (flood control, message-not-found,
permissions) should set ``can_edit = False``.
Two layers are tested:
1. The ``_TRANSIENT_EDIT_MARKERS`` / retryable classification logic in
``TelegramAdapter.edit_message``.
2. The ``send_progress_messages`` caller in ``run.py`` honours
``result.retryable`` and keeps ``can_edit = True``.
"""
from __future__ import annotations
import asyncio
from unittest.mock import AsyncMock
import pytest
from gateway.platforms.base import SendResult
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
_TRANSIENT_MARKERS = (
"connecterror",
"connect error",
"connection error",
"networkerror",
"network error",
"timed out",
"readtimeout",
"writetimeout",
"server disconnected",
"temporarily unavailable",
"temporary failure",
"httpx",
)
_PERMANENT_MARKERS = (
"message to edit not found",
"message can't be edited",
"not enough rights",
"message_id_invalid",
)
def _is_transient(error_str: str) -> bool:
"""Mirrors the classification logic added to TelegramAdapter.edit_message."""
err = error_str.lower()
return any(m in err for m in _TRANSIENT_MARKERS)
def _is_permanent(error_str: str) -> bool:
err = error_str.lower()
return any(m in err for m in _PERMANENT_MARKERS)
# ---------------------------------------------------------------------------
# 1. Error classification — transient vs permanent
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("error_str", [
"httpx.ConnectError: Connection refused",
"telegram.error.NetworkError: httpx.ConnectError",
"NetworkError: remote end closed connection without response",
"httpx.ReadTimeout: read timed out",
"ReadTimeout: timed out",
"Server disconnected",
"Temporarily unavailable",
"Temporary failure in name resolution",
"Connection error: failed to connect",
])
def test_transient_errors_are_classified_as_transient(error_str):
"""Network / transient errors must be classified as retryable."""
assert _is_transient(error_str), (
f"Expected {error_str!r} to be transient"
)
@pytest.mark.parametrize("error_str", [
"Bad Request: message to edit not found",
"Bad Request: message can't be edited",
"Bad Request: not enough rights to edit the message",
"Bad Request: MESSAGE_ID_INVALID",
"flood_control:30.0",
"Forbidden: bot was blocked by the user",
])
def test_permanent_errors_are_not_transient(error_str):
"""Permanent edit failures must NOT be classified as retryable."""
assert not _is_transient(error_str), (
f"Expected {error_str!r} to be permanent (non-transient)"
)
# ---------------------------------------------------------------------------
# 2. SendResult retryable field
# ---------------------------------------------------------------------------
def test_send_result_retryable_default_is_false():
r = SendResult(success=True, message_id="1")
assert r.retryable is False
def test_send_result_retryable_can_be_set_true():
r = SendResult(success=False, error="httpx.ConnectError: ...", retryable=True)
assert r.retryable is True
def test_send_result_retryable_false_for_permanent():
r = SendResult(success=False, error="message to edit not found")
assert r.retryable is False
# ---------------------------------------------------------------------------
# 3. run.py logic — retryable result must NOT set can_edit=False
# We simulate the relevant block from send_progress_messages():
#
# if not result.success:
# if getattr(result, 'retryable', False):
# continue # <-- keep can_edit=True
# ...
# can_edit = False
#
# ---------------------------------------------------------------------------
def _simulate_progress_loop(edit_results):
"""
Simulate the can_edit decision for a sequence of edit_message results.
Returns the final value of can_edit after processing all results.
"""
can_edit = True
for result in edit_results:
if not result.success:
if getattr(result, "retryable", False):
# Transient — keep can_edit True and skip to next cycle
continue
can_edit = False
break
return can_edit
def test_transient_failure_keeps_can_edit_true():
"""A single transient network error must not disable progress editing."""
results = [
SendResult(success=False, error="httpx.ConnectError", retryable=True),
SendResult(success=True, message_id="42"),
]
assert _simulate_progress_loop(results) is True
def test_permanent_failure_sets_can_edit_false():
"""A permanent edit failure must disable progress editing."""
results = [
SendResult(success=False, error="message to edit not found", retryable=False),
]
assert _simulate_progress_loop(results) is False
def test_multiple_transient_then_success_keeps_can_edit_true():
"""Multiple transient failures followed by success keep can_edit=True."""
results = [
SendResult(success=False, error="httpx.ConnectError", retryable=True),
SendResult(success=False, error="server disconnected", retryable=True),
SendResult(success=True, message_id="99"),
]
assert _simulate_progress_loop(results) is True
def test_flood_control_sets_can_edit_false():
"""Flood control (non-retryable) must disable progress editing."""
results = [
SendResult(success=False, error="flood_control:30.0", retryable=False),
]
assert _simulate_progress_loop(results) is False