mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(telegram): fall back to no thread_id on 'Message thread not found' (#3390)
python-telegram-bot's BadRequest inherits from NetworkError, so the send() retry loop was catching 'Message thread not found' as a transient network error and retrying 3 times before silently failing. This killed all tool progress messages, streaming responses, and typing indicators when the incoming message carried an invalid message_thread_id. Now detect BadRequest inside the NetworkError handler: - 'thread not found' + thread_id set → clear thread_id and retry once (message still reaches the chat, just without topic threading) - Other BadRequest errors → raise immediately (permanent, don't retry) - True NetworkError → retry as before (transient) 252 silent failures in gateway.log traced to this on 2026-03-26. 5 new tests for thread fallback, non-thread BadRequest, no-thread sends, network retry, and multi-chunk fallback.
This commit is contained in:
parent
b7bcae49c6
commit
41d9d08078
2 changed files with 225 additions and 2 deletions
199
tests/gateway/test_telegram_thread_fallback.py
Normal file
199
tests/gateway/test_telegram_thread_fallback.py
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
"""Tests for Telegram send() thread_id fallback.
|
||||
|
||||
When message_thread_id points to a non-existent thread, Telegram returns
|
||||
BadRequest('Message thread not found'). Since BadRequest is a subclass of
|
||||
NetworkError in python-telegram-bot, the old retry loop treated this as a
|
||||
transient error and retried 3 times before silently failing — killing all
|
||||
tool progress messages, streaming responses, and typing indicators.
|
||||
|
||||
The fix detects "thread not found" BadRequest errors and retries the send
|
||||
WITHOUT message_thread_id so the message still reaches the chat.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import PlatformConfig, Platform
|
||||
from gateway.platforms.base import SendResult
|
||||
|
||||
|
||||
# ── Fake telegram.error hierarchy ──────────────────────────────────────
|
||||
# Mirrors the real python-telegram-bot hierarchy:
|
||||
# BadRequest → NetworkError → TelegramError → Exception
|
||||
|
||||
|
||||
class FakeNetworkError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FakeBadRequest(FakeNetworkError):
|
||||
pass
|
||||
|
||||
|
||||
# Build a fake telegram module tree so the adapter's internal imports work
|
||||
_fake_telegram = types.ModuleType("telegram")
|
||||
_fake_telegram_error = types.ModuleType("telegram.error")
|
||||
_fake_telegram_error.NetworkError = FakeNetworkError
|
||||
_fake_telegram_error.BadRequest = FakeBadRequest
|
||||
_fake_telegram.error = _fake_telegram_error
|
||||
_fake_telegram_constants = types.ModuleType("telegram.constants")
|
||||
_fake_telegram_constants.ParseMode = SimpleNamespace(MARKDOWN_V2="MarkdownV2")
|
||||
_fake_telegram.constants = _fake_telegram_constants
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _inject_fake_telegram(monkeypatch):
|
||||
"""Inject fake telegram modules so the adapter can import from them."""
|
||||
monkeypatch.setitem(sys.modules, "telegram", _fake_telegram)
|
||||
monkeypatch.setitem(sys.modules, "telegram.error", _fake_telegram_error)
|
||||
monkeypatch.setitem(sys.modules, "telegram.constants", _fake_telegram_constants)
|
||||
|
||||
|
||||
def _make_adapter():
|
||||
from gateway.platforms.telegram import TelegramAdapter
|
||||
|
||||
config = PlatformConfig(enabled=True, token="fake-token")
|
||||
adapter = object.__new__(TelegramAdapter)
|
||||
adapter._config = config
|
||||
adapter._platform = Platform.TELEGRAM
|
||||
adapter._connected = True
|
||||
adapter._dm_topics = {}
|
||||
adapter._dm_topics_config = []
|
||||
adapter._reply_to_mode = "first"
|
||||
adapter._fallback_ips = []
|
||||
adapter._polling_conflict_count = 0
|
||||
adapter._polling_network_error_count = 0
|
||||
adapter._polling_error_callback_ref = None
|
||||
adapter.platform = Platform.TELEGRAM
|
||||
return adapter
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_retries_without_thread_on_thread_not_found():
|
||||
"""When message_thread_id causes 'thread not found', retry without it."""
|
||||
adapter = _make_adapter()
|
||||
|
||||
call_log = []
|
||||
|
||||
async def mock_send_message(**kwargs):
|
||||
call_log.append(dict(kwargs))
|
||||
tid = kwargs.get("message_thread_id")
|
||||
if tid is not None:
|
||||
raise FakeBadRequest("Message thread not found")
|
||||
return SimpleNamespace(message_id=42)
|
||||
|
||||
adapter._bot = SimpleNamespace(send_message=mock_send_message)
|
||||
|
||||
result = await adapter.send(
|
||||
chat_id="123",
|
||||
content="test message",
|
||||
metadata={"thread_id": "99999"},
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert result.message_id == "42"
|
||||
# First call has thread_id, second call retries without
|
||||
assert len(call_log) == 2
|
||||
assert call_log[0]["message_thread_id"] == 99999
|
||||
assert call_log[1]["message_thread_id"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_raises_on_other_bad_request():
|
||||
"""Non-thread BadRequest errors should NOT be retried — they fail immediately."""
|
||||
adapter = _make_adapter()
|
||||
|
||||
async def mock_send_message(**kwargs):
|
||||
raise FakeBadRequest("Chat not found")
|
||||
|
||||
adapter._bot = SimpleNamespace(send_message=mock_send_message)
|
||||
|
||||
result = await adapter.send(
|
||||
chat_id="123",
|
||||
content="test message",
|
||||
metadata={"thread_id": "99999"},
|
||||
)
|
||||
|
||||
assert result.success is False
|
||||
assert "Chat not found" in result.error
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_without_thread_id_unaffected():
|
||||
"""Normal sends without thread_id should work as before."""
|
||||
adapter = _make_adapter()
|
||||
|
||||
call_log = []
|
||||
|
||||
async def mock_send_message(**kwargs):
|
||||
call_log.append(dict(kwargs))
|
||||
return SimpleNamespace(message_id=100)
|
||||
|
||||
adapter._bot = SimpleNamespace(send_message=mock_send_message)
|
||||
|
||||
result = await adapter.send(
|
||||
chat_id="123",
|
||||
content="test message",
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert len(call_log) == 1
|
||||
assert call_log[0]["message_thread_id"] is None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_retries_network_errors_normally():
|
||||
"""Real transient network errors (not BadRequest) should still be retried."""
|
||||
adapter = _make_adapter()
|
||||
|
||||
attempt = [0]
|
||||
|
||||
async def mock_send_message(**kwargs):
|
||||
attempt[0] += 1
|
||||
if attempt[0] < 3:
|
||||
raise FakeNetworkError("Connection reset")
|
||||
return SimpleNamespace(message_id=200)
|
||||
|
||||
adapter._bot = SimpleNamespace(send_message=mock_send_message)
|
||||
|
||||
result = await adapter.send(
|
||||
chat_id="123",
|
||||
content="test message",
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert attempt[0] == 3 # Two retries then success
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_thread_fallback_only_fires_once():
|
||||
"""After clearing thread_id, subsequent chunks should also use None."""
|
||||
adapter = _make_adapter()
|
||||
|
||||
call_log = []
|
||||
|
||||
async def mock_send_message(**kwargs):
|
||||
call_log.append(dict(kwargs))
|
||||
tid = kwargs.get("message_thread_id")
|
||||
if tid is not None:
|
||||
raise FakeBadRequest("Message thread not found")
|
||||
return SimpleNamespace(message_id=42)
|
||||
|
||||
adapter._bot = SimpleNamespace(send_message=mock_send_message)
|
||||
|
||||
# Send a long message that gets split into chunks
|
||||
long_msg = "A" * 5000 # Exceeds Telegram's 4096 limit
|
||||
result = await adapter.send(
|
||||
chat_id="123",
|
||||
content=long_msg,
|
||||
metadata={"thread_id": "99999"},
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
# First chunk: attempt with thread → fail → retry without → succeed
|
||||
# Second chunk: should use thread_id=None directly (effective_thread_id
|
||||
# was cleared per-chunk but the metadata doesn't change between chunks)
|
||||
# The key point: the message was delivered despite the invalid thread
|
||||
Loading…
Add table
Add a link
Reference in a new issue