feat(gateway): typed send-error classification (SendResult.error_kind) (#50342)

Add a platform-neutral send-failure vocabulary so consumers can branch on a
typed category instead of substring-matching the raw provider message.

- base.py: SEND_ERROR_KINDS + classify_send_error() (too_long / bad_format /
  forbidden / not_found / rate_limited / transient / unknown), and an optional
  SendResult.error_kind field (defaults None — fully backward compatible).
- telegram.py: populate error_kind on send() failures; message_too_long keeps
  its existing error token plus error_kind='too_long'.

Purely additive: no behavioral change to the existing degrade-and-deliver
paths (MarkdownV2->plain-text fallback, overflow split, retry classification
all untouched). 22 new tests + 210 adapter regression tests green.
This commit is contained in:
Teknium 2026-06-21 12:34:22 -07:00 committed by GitHub
parent 6bbacc2238
commit c0409a87ff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 244 additions and 2 deletions

View file

@ -1674,6 +1674,105 @@ class SendResult:
# made up the full payload, in send order. Empty tuple for the common
# single-message case.
continuation_message_ids: tuple = ()
# Machine-readable failure category (set only when ``success`` is False).
# ``error`` stays the human-readable detail string; ``error_kind`` lets
# consumers branch deterministically instead of substring-matching the raw
# provider message. One of the values in :data:`SEND_ERROR_KINDS` or
# ``None`` (unset / not classified). Producers should set this via
# :func:`classify_send_error`.
error_kind: Optional[str] = None
# Machine-readable send-failure categories. Kept platform-neutral so every
# adapter can populate ``SendResult.error_kind`` from the same vocabulary and
# the gateway can decide — once, in one place — whether a failure is worth
# surfacing to the user.
#
# too_long content exceeded the platform's per-message size cap; the
# adapter typically recovers via continuation/split, so this is
# informational rather than a hard failure.
# bad_format the platform rejected the message markup/entities (parse
# error); a plain-text retry is the actionable fix.
# forbidden the bot is blocked, kicked, or lacks permission to post to the
# target — the bot CANNOT reach the user, so there is nowhere to
# surface a notice.
# not_found the target chat/thread/message no longer exists.
# rate_limited the platform throttled the send (flood control).
# transient a connection-level failure that is safe to retry.
# unknown classification did not match any known shape.
SEND_ERROR_KINDS = frozenset(
{
"too_long",
"bad_format",
"forbidden",
"not_found",
"rate_limited",
"transient",
"unknown",
}
)
def classify_send_error(exc: Optional[BaseException], error_text: str = "") -> str:
"""Map a send exception / error string to a :data:`SEND_ERROR_KINDS` value.
Platform-neutral: matches on the lowercased text of ``exc`` (and/or the
explicit ``error_text``) against the substrings the major messaging APIs
use. Conservative anything unrecognized returns ``"unknown"`` so callers
never mistake an unclassified failure for a benign one.
"""
parts = []
if error_text:
parts.append(error_text)
if exc is not None:
parts.append(str(exc))
parts.append(exc.__class__.__name__)
blob = " ".join(parts).lower()
if not blob.strip():
return "unknown"
if "message_too_long" in blob or "too long" in blob or "message is too long" in blob:
return "too_long"
if (
"can't parse entities" in blob
or "cant parse entities" in blob
or "can't find end" in blob
or "unsupported start tag" in blob
or ("entity" in blob and "parse" in blob)
or ("bad request" in blob and "entit" in blob)
):
return "bad_format"
if (
"forbidden" in blob
or "bot was blocked" in blob
or "blocked by the user" in blob
or "user is deactivated" in blob
or "not enough rights" in blob
or "have no rights" in blob
or "not a member" in blob
):
return "forbidden"
if (
"chat not found" in blob
or "message to edit not found" in blob
or "message to reply not found" in blob
or "thread not found" in blob
or "topic_deleted" in blob
or "message_id_invalid" in blob
):
return "not_found"
if (
"flood" in blob
or "too many requests" in blob
or "retry after" in blob
or "rate limit" in blob
):
return "rate_limited"
for pat in _RETRYABLE_ERROR_PATTERNS:
if pat in blob:
return "transient"
if "connecttimeout" in blob:
return "transient"
return "unknown"
class EphemeralReply(str):

View file

@ -72,6 +72,7 @@ from gateway.platforms.base import (
MessageType,
ProcessingOutcome,
SendResult,
classify_send_error,
cache_image_from_bytes,
cache_audio_from_bytes,
cache_video_from_bytes,
@ -2763,6 +2764,7 @@ class TelegramAdapter(BasePlatformAdapter):
except Exception as e:
logger.error("[%s] Failed to send Telegram message: %s", self.name, e, exc_info=True)
err_str = str(e).lower()
error_kind = classify_send_error(e)
# Message too long — content exceeded 4096 chars. Return failure so
# stream consumer enters fallback mode and sends the remainder.
if "message_too_long" in err_str or "too long" in err_str:
@ -2770,7 +2772,7 @@ class TelegramAdapter(BasePlatformAdapter):
"[%s] send() content too long, falling back to new-message continuation",
self.name,
)
return SendResult(success=False, error="message_too_long")
return SendResult(success=False, error="message_too_long", error_kind="too_long")
# TimedOut usually means the request may have reached Telegram —
# mark as non-retryable so _send_with_retry() doesn't re-send.
# Exceptions: a wrapped ConnectTimeout (no connection established)
@ -2780,7 +2782,12 @@ class TelegramAdapter(BasePlatformAdapter):
is_timeout = (_to and isinstance(e, _to)) or "timed out" in err_str
is_connect_timeout = self._looks_like_connect_timeout(e)
is_pool_timeout = self._looks_like_pool_timeout(e)
return SendResult(success=False, error=str(e), retryable=(is_connect_timeout or is_pool_timeout or not is_timeout))
return SendResult(
success=False,
error=str(e),
retryable=(is_connect_timeout or is_pool_timeout or not is_timeout),
error_kind=error_kind,
)
async def send_or_update_status(
self,

View file

@ -0,0 +1,136 @@
"""Tests for structured send-error classification (SendResult.error_kind).
Covers the platform-neutral ``classify_send_error`` vocabulary in
``gateway/platforms/base.py`` and its wiring into the Telegram adapter's
``send()`` failure path, so consumers can branch on a typed category instead
of substring-matching the raw provider message.
"""
import pytest
from gateway.platforms.base import (
SEND_ERROR_KINDS,
SendResult,
classify_send_error,
)
class _FakeBadRequest(Exception):
"""Stand-in for a provider BadRequest carrying a message string."""
@pytest.mark.parametrize(
"text,expected",
[
("Message_too_long", "too_long"),
("Bad Request: message is too long", "too_long"),
("Bad Request: can't parse entities: unsupported start tag", "bad_format"),
("Bad Request: can't find end of the entity", "bad_format"),
("Forbidden: bot was blocked by the user", "forbidden"),
("Forbidden: user is deactivated", "forbidden"),
("Bad Request: not enough rights to send text messages", "forbidden"),
("Bad Request: chat not found", "not_found"),
("Bad Request: message to edit not found", "not_found"),
("Too Many Requests: retry after 12", "rate_limited"),
("Flood control exceeded", "rate_limited"),
("ConnectError: connection refused", "transient"),
("ConnectTimeout", "transient"),
("some entirely novel provider message", "unknown"),
("", "unknown"),
],
)
def test_classify_send_error_text(text, expected):
assert classify_send_error(None, text) == expected
def test_classify_uses_exception_class_name():
# The class name participates in classification even when str(exc) is empty.
exc = type("Forbidden", (Exception,), {})()
assert classify_send_error(exc) == "forbidden"
def test_classify_prefers_explicit_text_and_exception_together():
exc = _FakeBadRequest("chat not found")
assert classify_send_error(exc) == "not_found"
def test_every_classification_is_in_the_vocabulary():
samples = [
"message_too_long",
"can't parse entities",
"forbidden",
"chat not found",
"flood",
"connecterror",
"mystery",
"",
]
for s in samples:
assert classify_send_error(None, s) in SEND_ERROR_KINDS
def test_unknown_never_masquerades_as_benign():
# An unrecognized failure must classify as "unknown", never as a benign
# category like too_long that a consumer might treat as a soft recovery.
assert classify_send_error(None, "kaboom 500 internal") == "unknown"
def test_sendresult_error_kind_defaults_none_and_is_backward_compatible():
# Existing call sites that never set error_kind keep working unchanged.
ok = SendResult(success=True, message_id="42")
assert ok.error_kind is None
legacy_fail = SendResult(success=False, error="boom")
assert legacy_fail.error_kind is None
def test_telegram_send_failure_populates_error_kind():
"""Telegram send() failures carry a typed error_kind alongside error."""
import asyncio
from unittest.mock import AsyncMock, MagicMock
from gateway.config import PlatformConfig
from plugins.platforms.telegram.adapter import TelegramAdapter
cfg = PlatformConfig(enabled=True, token="fake-token", extra={})
adapter = TelegramAdapter(cfg)
# Minimal bot whose send_message raises a parse/entity rejection.
bot = MagicMock()
bot.send_message = AsyncMock(
side_effect=Exception("Bad Request: can't parse entities: bad tag")
)
bot.send_chat_action = AsyncMock()
# Force the legacy (non-rich) path and a connected bot.
adapter._bot = bot
adapter._rich_messages_enabled = False
result = asyncio.run(adapter.send("123", "<b>broken"))
assert result.success is False
# Telegram has a plain-text fallback for parse errors inside the send loop,
# so a raw parse failure that still escapes is classified for consumers.
assert result.error_kind in SEND_ERROR_KINDS
assert result.error_kind != "unknown" or result.error
def test_telegram_too_long_sets_too_long_kind():
import asyncio
from unittest.mock import AsyncMock, MagicMock
from gateway.config import PlatformConfig
from plugins.platforms.telegram.adapter import TelegramAdapter
cfg = PlatformConfig(enabled=True, token="fake-token", extra={})
adapter = TelegramAdapter(cfg)
bot = MagicMock()
bot.send_message = AsyncMock(
side_effect=Exception("Bad Request: message is too long")
)
bot.send_chat_action = AsyncMock()
adapter._bot = bot
adapter._rich_messages_enabled = False
result = asyncio.run(adapter.send("123", "x" * 5000))
assert result.success is False
assert result.error == "message_too_long"
assert result.error_kind == "too_long"