fix(telegram): address post-merge audit follow-ups (#28670, #28672, #28674, #28676, #28678)

Five small fixes against issues filed during the post-merge salvage audit:

* #28670: `_GATEWAY_PROVIDER_ERROR_RE` false-positives on legitimate prose.
  Replace the regex with an anchored `_GATEWAY_PROVIDER_ERROR_SHAPE_RE` and
  add a length-cap heuristic to `_looks_like_gateway_provider_error`:
  short envelope at the start of the message → real provider error; long
  prose containing 'HTTP 404' → assistant answer, leave alone.

* #28672: drop the pointless 1s asyncio.sleep on Telegram thread-not-found
  retries. The same-thread retry is preserved (catches Telegram's
  occasional transient flake exercised by
  test_send_retries_transient_thread_not_found_before_fallback) but with
  no artificial delay.

* #28674: broaden `_should_retry_without_dm_topic_reply_anchor` to also
  fire when Bot API rejects `direct_messages_topic_id` for synthetic /
  resumed sends that have no reply anchor. Avoids dropping post-resume
  background notifications if the topic id goes stale.

* #28676: delete the dead image-document branch superseded by bd0c54d17
  (which returns early on the same extension set).

* #28678: extend chat-scoped allowlist (`TELEGRAM_GROUP_ALLOWED_CHATS`)
  to also cover `chat_type == 'channel'`, so operators can authorize
  channel posts by chat id without falling back to per-user allowlists.

Tests:
- scripts/run_tests.sh tests/gateway/test_telegram_thread_fallback.py -q  → 41/41
- scripts/run_tests.sh tests/cron/test_scheduler.py -q                    → 127/127
- broader test set: same 3 pre-existing test-pollution failures reproduce
  on plain main.
This commit is contained in:
Teknium 2026-05-19 03:15:30 -07:00
parent 88ee58f7d2
commit a3c753128d
2 changed files with 93 additions and 34 deletions

View file

@ -656,12 +656,42 @@ class TelegramAdapter(BasePlatformAdapter):
metadata: Optional[Dict[str, Any]],
reply_to_message_id: Optional[int],
) -> bool:
return (
bool(metadata and metadata.get("telegram_dm_topic_reply_fallback"))
and reply_to_message_id is not None
and cls._is_bad_request_error(error)
and "message to be replied not found" in str(error).lower()
)
"""True when a DM-topic send should be retried with routing stripped.
Two cases trigger the retry:
1. The original anchor-stale case the reply target was deleted, so
Bot API returns "message to be replied not found". The retry drops
the reply anchor and the topic id together.
2. The synthetic-event case (added when #27937 introduced
``direct_messages_topic_id`` fallback for sends without an anchor):
if Bot API rejects the topic id itself with any BadRequest that
mentions topic/thread routing, we retry without routing rather
than dropping the message.
"""
if not (metadata and metadata.get("telegram_dm_topic_reply_fallback")):
return False
if not cls._is_bad_request_error(error):
return False
err_lower = str(error).lower()
if reply_to_message_id is not None and "message to be replied not found" in err_lower:
return True
# Synthetic / resumed sends route via ``direct_messages_topic_id``
# instead of a reply anchor. If Telegram rejects the topic id, fall
# back to a plain DM send.
if metadata.get("direct_messages_topic_id"):
topic_markers = (
"direct_messages_topic",
"message thread not found",
"thread not found",
"topic_closed",
"topic_deleted",
"topic not found",
)
if any(marker in err_lower for marker in topic_markers):
return True
return False
async def _send_with_dm_topic_reply_anchor_retry(
self,
@ -1752,17 +1782,22 @@ class TelegramAdapter(BasePlatformAdapter):
# specific cases instead of blindly retrying.
if _BadReq and isinstance(send_err, _BadReq):
if self._is_thread_not_found_error(send_err) and effective_thread_id is not None:
# Telegram has been observed to return a
# one-off "thread not found" that recovers on
# an immediate retry (transient flake — see
# test_send_retries_transient_thread_not_found_before_fallback).
# Try the same thread_id once without sleeping
# before falling back to a plain send.
if not retried_thread_not_found:
retried_thread_not_found = True
logger.warning(
"[%s] Thread %s not found, retrying once with message_thread_id",
"[%s] Thread %s not found, retrying once with same thread_id",
self.name, effective_thread_id,
)
await asyncio.sleep(1)
continue
# Thread doesn't exist — retry without
# message_thread_id so the message still
# reaches the chat.
# Second failure: the thread is genuinely gone.
# Retry without ``message_thread_id`` so the
# message still reaches the chat.
logger.warning(
"[%s] Thread %s not found, retrying without message_thread_id",
self.name, effective_thread_id,
@ -4944,20 +4979,11 @@ class TelegramAdapter(BasePlatformAdapter):
await self.handle_message(event)
return
if ext in SUPPORTED_IMAGE_DOCUMENT_TYPES:
file_obj = await doc.get_file()
image_bytes = await file_obj.download_as_bytearray()
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
event.media_urls = [cached_path]
event.media_types = [SUPPORTED_IMAGE_DOCUMENT_TYPES[ext]]
event.message_type = MessageType.PHOTO
logger.info("[Telegram] Cached user image document at %s", cached_path)
media_group_id = getattr(msg, "media_group_id", None)
if media_group_id:
await self._queue_media_group_event(str(media_group_id), event)
else:
await self.handle_message(event)
return
# NOTE: image-document handling is performed earlier in this
# function (ext in _TELEGRAM_IMAGE_EXTENSIONS or image/* mime),
# which returns before reaching here. Any subsequent
# ext-in-SUPPORTED_IMAGE_DOCUMENT_TYPES branch would be dead
# code — the extension sets are identical.
# Check if supported
if ext not in SUPPORTED_DOCUMENT_TYPES:

View file

@ -166,9 +166,42 @@ def _gateway_provider_error_reply(text: str) -> str:
)
_GATEWAY_PROVIDER_ERROR_SHAPE_RE = re.compile(
r"^\s*(\W*\s*)?("
r"api\s+(?:call\s+)?failed"
r"|provider\s+authentication\s+failed"
r"|non-retryable\s+error"
r"|rate\s+limited\s+after\s+\d+\s+retries"
r"|error\s+code\s*:"
r"|http\s*\d{3}\b"
r"|incorrect\s+api\s+key"
r"|invalid\s+api\s+key"
r")",
re.IGNORECASE,
)
def _looks_like_gateway_provider_error(text: str) -> bool:
"""True when text is infrastructure/provider failure, not normal content."""
return bool(_GATEWAY_PROVIDER_ERROR_RE.search(text))
"""True when text is infrastructure/provider failure, not normal content.
Two heuristics combined so the rewrite only fires on actual provider
error envelopes, not on assistant prose that happens to mention an
HTTP status code:
1. The text is short real provider errors are 13 lines of envelope
text; assistant answers are usually longer.
2. AND the error marker appears at the start of the message (optionally
behind a punctuation/symbol prefix), not buried mid-paragraph in an
explanation like "HTTP 404 means 'not found' — ...".
"""
if not text:
return False
body = str(text).strip()
# Provider failure envelopes are short. Assistant answers that happen
# to mention HTTP status codes ("HTTP 404 means...") tend to be longer.
if len(body) > 400 or body.count("\n") > 4:
return False
return bool(_GATEWAY_PROVIDER_ERROR_SHAPE_RE.search(body))
def _sanitize_gateway_final_response(platform: Any, text: str) -> str:
@ -6076,17 +6109,17 @@ class GatewayRunner:
user_id = source.user_id
# Telegram (and similar) authorize entire group/forum chats by
# chat ID via TELEGRAM_GROUP_ALLOWED_CHATS / QQ_GROUP_ALLOWED_USERS.
# Telegram (and similar) authorize entire group/forum/channel chats
# by chat ID via TELEGRAM_GROUP_ALLOWED_CHATS / QQ_GROUP_ALLOWED_USERS.
# That allowlist is chat-scoped, so it must work even when
# source.user_id is None — Telegram emits anonymous-admin posts
# and sender_chat traffic in groups with no `from_user`, and an
# operator who explicitly listed the chat expects those to be
# honored. Run this check before the no-user-id guard below so
# source.user_id is None — Telegram emits anonymous-admin posts,
# sender_chat traffic, and channel broadcasts with no `from_user`,
# and an operator who explicitly listed the chat expects those to
# be honored. Run this check before the no-user-id guard below so
# documented behavior matches reality
# (website/docs/reference/environment-variables.md,
# website/docs/user-guide/messaging/telegram.md).
if source.chat_type in {"group", "forum"} and source.chat_id:
if source.chat_type in {"group", "forum", "channel"} and source.chat_id:
chat_allowlist_env = {
Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_CHATS",
Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS",