mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
fix(telegram): preserve can_edit after transient network errors in progress edits (#27828)
When edit_message_text fails with a transient error (httpx.ConnectError, NetworkError, server disconnected, timeouts), the progress-message sender must not permanently set can_edit = False — that would convert a single Telegram network hiccup into separate per-tool bubbles for the rest of the run. Changes: - gateway/platforms/telegram.py: edit_message now returns retryable=True for transient network errors (ConnectError, NetworkError, timeouts, server disconnects, temporarily unavailable). Permanent failures (flood control, message-not-found, permissions) remain retryable=False. - gateway/run.py: send_progress_messages checks result.retryable before setting can_edit = False. Transient failures skip the fallback-send and continue — the next edit cycle catches up with the accumulated lines. Permanent failures (flood, message-not-found, etc.) still disable editing. Tests: 22 new tests in test_telegram_progress_edit_transient.py covering transient vs permanent error classification, SendResult.retryable semantics, and the can_edit decision logic. Fixes #27828
This commit is contained in:
parent
32435dfad8
commit
6be579f626
3 changed files with 221 additions and 0 deletions
|
|
@ -1810,6 +1810,33 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
self.name, retry_err,
|
||||
)
|
||||
return SendResult(success=False, error=str(retry_err))
|
||||
# Transient network errors (ConnectError, timeouts, server
|
||||
# disconnects) should not permanently disable progress-message
|
||||
# editing. Mark the result retryable so the caller knows it
|
||||
# can keep trying on the next update cycle.
|
||||
_transient_markers = (
|
||||
"connecterror",
|
||||
"connect error",
|
||||
"connection error",
|
||||
"networkerror",
|
||||
"network error",
|
||||
"timed out",
|
||||
"readtimeout",
|
||||
"writetimeout",
|
||||
"server disconnected",
|
||||
"temporarily unavailable",
|
||||
"temporary failure",
|
||||
"httpx",
|
||||
)
|
||||
_is_transient = any(m in err_str for m in _transient_markers)
|
||||
if _is_transient:
|
||||
logger.warning(
|
||||
"[%s] Transient network error editing message %s (will retry): %s",
|
||||
self.name,
|
||||
message_id,
|
||||
e,
|
||||
)
|
||||
return SendResult(success=False, error=str(e), retryable=True)
|
||||
logger.error(
|
||||
"[%s] Failed to edit Telegram message %s: %s",
|
||||
self.name,
|
||||
|
|
|
|||
|
|
@ -15462,6 +15462,17 @@ class GatewayRunner:
|
|||
)
|
||||
if not result.success:
|
||||
_err = (getattr(result, "error", "") or "").lower()
|
||||
# Transient network errors (ConnectError, timeouts)
|
||||
# must not permanently disable progress-message
|
||||
# editing — the next cycle can catch up. Only
|
||||
# permanent failures (flood control, message not
|
||||
# found, permissions) should set can_edit = False.
|
||||
if getattr(result, "retryable", False):
|
||||
logger.debug(
|
||||
"[%s] Transient edit failure — keeping can_edit=True",
|
||||
adapter.name,
|
||||
)
|
||||
continue
|
||||
if "flood" in _err or "retry after" in _err:
|
||||
# Flood control hit — backoff but keep editing.
|
||||
# Only disable edits for non-recoverable errors.
|
||||
|
|
|
|||
183
tests/gateway/test_telegram_progress_edit_transient.py
Normal file
183
tests/gateway/test_telegram_progress_edit_transient.py
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
"""Tests for transient-error handling in Telegram progress-message editing.
|
||||
|
||||
Issue: #27828
|
||||
|
||||
When ``edit_message_text`` fails with a transient network error (e.g.
|
||||
``httpx.ConnectError``), the gateway must NOT permanently disable progress-
|
||||
message editing. Only permanent failures (flood control, message-not-found,
|
||||
permissions) should set ``can_edit = False``.
|
||||
|
||||
Two layers are tested:
|
||||
|
||||
1. The ``_TRANSIENT_EDIT_MARKERS`` / retryable classification logic in
|
||||
``TelegramAdapter.edit_message``.
|
||||
2. The ``send_progress_messages`` caller in ``run.py`` honours
|
||||
``result.retryable`` and keeps ``can_edit = True``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.platforms.base import SendResult
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_TRANSIENT_MARKERS = (
|
||||
"connecterror",
|
||||
"connect error",
|
||||
"connection error",
|
||||
"networkerror",
|
||||
"network error",
|
||||
"timed out",
|
||||
"readtimeout",
|
||||
"writetimeout",
|
||||
"server disconnected",
|
||||
"temporarily unavailable",
|
||||
"temporary failure",
|
||||
"httpx",
|
||||
)
|
||||
|
||||
_PERMANENT_MARKERS = (
|
||||
"message to edit not found",
|
||||
"message can't be edited",
|
||||
"not enough rights",
|
||||
"message_id_invalid",
|
||||
)
|
||||
|
||||
|
||||
def _is_transient(error_str: str) -> bool:
|
||||
"""Mirrors the classification logic added to TelegramAdapter.edit_message."""
|
||||
err = error_str.lower()
|
||||
return any(m in err for m in _TRANSIENT_MARKERS)
|
||||
|
||||
|
||||
def _is_permanent(error_str: str) -> bool:
|
||||
err = error_str.lower()
|
||||
return any(m in err for m in _PERMANENT_MARKERS)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Error classification — transient vs permanent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize("error_str", [
|
||||
"httpx.ConnectError: Connection refused",
|
||||
"telegram.error.NetworkError: httpx.ConnectError",
|
||||
"NetworkError: remote end closed connection without response",
|
||||
"httpx.ReadTimeout: read timed out",
|
||||
"ReadTimeout: timed out",
|
||||
"Server disconnected",
|
||||
"Temporarily unavailable",
|
||||
"Temporary failure in name resolution",
|
||||
"Connection error: failed to connect",
|
||||
])
|
||||
def test_transient_errors_are_classified_as_transient(error_str):
|
||||
"""Network / transient errors must be classified as retryable."""
|
||||
assert _is_transient(error_str), (
|
||||
f"Expected {error_str!r} to be transient"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("error_str", [
|
||||
"Bad Request: message to edit not found",
|
||||
"Bad Request: message can't be edited",
|
||||
"Bad Request: not enough rights to edit the message",
|
||||
"Bad Request: MESSAGE_ID_INVALID",
|
||||
"flood_control:30.0",
|
||||
"Forbidden: bot was blocked by the user",
|
||||
])
|
||||
def test_permanent_errors_are_not_transient(error_str):
|
||||
"""Permanent edit failures must NOT be classified as retryable."""
|
||||
assert not _is_transient(error_str), (
|
||||
f"Expected {error_str!r} to be permanent (non-transient)"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. SendResult retryable field
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_send_result_retryable_default_is_false():
|
||||
r = SendResult(success=True, message_id="1")
|
||||
assert r.retryable is False
|
||||
|
||||
|
||||
def test_send_result_retryable_can_be_set_true():
|
||||
r = SendResult(success=False, error="httpx.ConnectError: ...", retryable=True)
|
||||
assert r.retryable is True
|
||||
|
||||
|
||||
def test_send_result_retryable_false_for_permanent():
|
||||
r = SendResult(success=False, error="message to edit not found")
|
||||
assert r.retryable is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. run.py logic — retryable result must NOT set can_edit=False
|
||||
# We simulate the relevant block from send_progress_messages():
|
||||
#
|
||||
# if not result.success:
|
||||
# if getattr(result, 'retryable', False):
|
||||
# continue # <-- keep can_edit=True
|
||||
# ...
|
||||
# can_edit = False
|
||||
#
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _simulate_progress_loop(edit_results):
|
||||
"""
|
||||
Simulate the can_edit decision for a sequence of edit_message results.
|
||||
|
||||
Returns the final value of can_edit after processing all results.
|
||||
"""
|
||||
can_edit = True
|
||||
for result in edit_results:
|
||||
if not result.success:
|
||||
if getattr(result, "retryable", False):
|
||||
# Transient — keep can_edit True and skip to next cycle
|
||||
continue
|
||||
can_edit = False
|
||||
break
|
||||
return can_edit
|
||||
|
||||
|
||||
def test_transient_failure_keeps_can_edit_true():
|
||||
"""A single transient network error must not disable progress editing."""
|
||||
results = [
|
||||
SendResult(success=False, error="httpx.ConnectError", retryable=True),
|
||||
SendResult(success=True, message_id="42"),
|
||||
]
|
||||
assert _simulate_progress_loop(results) is True
|
||||
|
||||
|
||||
def test_permanent_failure_sets_can_edit_false():
|
||||
"""A permanent edit failure must disable progress editing."""
|
||||
results = [
|
||||
SendResult(success=False, error="message to edit not found", retryable=False),
|
||||
]
|
||||
assert _simulate_progress_loop(results) is False
|
||||
|
||||
|
||||
def test_multiple_transient_then_success_keeps_can_edit_true():
|
||||
"""Multiple transient failures followed by success keep can_edit=True."""
|
||||
results = [
|
||||
SendResult(success=False, error="httpx.ConnectError", retryable=True),
|
||||
SendResult(success=False, error="server disconnected", retryable=True),
|
||||
SendResult(success=True, message_id="99"),
|
||||
]
|
||||
assert _simulate_progress_loop(results) is True
|
||||
|
||||
|
||||
def test_flood_control_sets_can_edit_false():
|
||||
"""Flood control (non-retryable) must disable progress editing."""
|
||||
results = [
|
||||
SendResult(success=False, error="flood_control:30.0", retryable=False),
|
||||
]
|
||||
assert _simulate_progress_loop(results) is False
|
||||
Loading…
Add table
Add a link
Reference in a new issue