mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(telegram): use UTF-16 code units for message length splitting (#8725)
Port from nearai/ironclaw#2304: Telegram's 4096 character limit is measured in UTF-16 code units, not Unicode codepoints. Characters outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B, musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units. Previously, truncate_message() used Python's len() which counts codepoints. This could produce chunks exceeding Telegram's actual limit when messages contain many astral-plane characters. Changes: - Add utf16_len() helper and _prefix_within_utf16_limit() for UTF-16-aware string measurement and truncation - Add _custom_unit_to_cp() binary-search helper that maps a custom-unit budget to the largest safe codepoint slice position - Update truncate_message() to accept optional len_fn parameter - Telegram adapter now passes len_fn=utf16_len when splitting messages - Fix fallback truncation in Telegram error handler to use _prefix_within_utf16_limit instead of codepoint slicing - Update send_message_tool.py to use utf16_len for Telegram platform - Add comprehensive tests: utf16_len, _prefix_within_utf16_limit, truncate_message with len_fn (emoji splitting, content preservation, code block handling) - Update mock lambdas in reply_mode tests to accept **kw for len_fn
This commit is contained in:
parent
3cd6cbee5f
commit
9e992df8ae
6 changed files with 240 additions and 25 deletions
|
|
@ -21,6 +21,59 @@ from urllib.parse import urlsplit
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def utf16_len(s: str) -> int:
|
||||
"""Count UTF-16 code units in *s*.
|
||||
|
||||
Telegram's message-length limit (4 096) is measured in UTF-16 code units,
|
||||
**not** Unicode code-points. Characters outside the Basic Multilingual
|
||||
Plane (emoji like 😀, CJK Extension B, musical symbols, …) are encoded as
|
||||
surrogate pairs and therefore consume **two** UTF-16 code units each, even
|
||||
though Python's ``len()`` counts them as one.
|
||||
|
||||
Ported from nearai/ironclaw#2304 which discovered the same discrepancy in
|
||||
Rust's ``chars().count()``.
|
||||
"""
|
||||
return len(s.encode("utf-16-le")) // 2
|
||||
|
||||
|
||||
def _prefix_within_utf16_limit(s: str, limit: int) -> str:
|
||||
"""Return the longest prefix of *s* whose UTF-16 length ≤ *limit*.
|
||||
|
||||
Unlike a plain ``s[:limit]``, this respects surrogate-pair boundaries so
|
||||
we never slice a multi-code-unit character in half.
|
||||
"""
|
||||
if utf16_len(s) <= limit:
|
||||
return s
|
||||
# Binary search for the longest safe prefix
|
||||
lo, hi = 0, len(s)
|
||||
while lo < hi:
|
||||
mid = (lo + hi + 1) // 2
|
||||
if utf16_len(s[:mid]) <= limit:
|
||||
lo = mid
|
||||
else:
|
||||
hi = mid - 1
|
||||
return s[:lo]
|
||||
|
||||
|
||||
def _custom_unit_to_cp(s: str, budget: int, len_fn) -> int:
|
||||
"""Return the largest codepoint offset *n* such that ``len_fn(s[:n]) <= budget``.
|
||||
|
||||
Used by :meth:`BasePlatformAdapter.truncate_message` when *len_fn* measures
|
||||
length in units different from Python codepoints (e.g. UTF-16 code units).
|
||||
Falls back to binary search which is O(log n) calls to *len_fn*.
|
||||
"""
|
||||
if len_fn(s) <= budget:
|
||||
return len(s)
|
||||
lo, hi = 0, len(s)
|
||||
while lo < hi:
|
||||
mid = (lo + hi + 1) // 2
|
||||
if len_fn(s[:mid]) <= budget:
|
||||
lo = mid
|
||||
else:
|
||||
hi = mid - 1
|
||||
return lo
|
||||
|
||||
|
||||
def is_network_accessible(host: str) -> bool:
|
||||
"""Return True if *host* would expose the server beyond loopback.
|
||||
|
||||
|
|
@ -1886,7 +1939,11 @@ class BasePlatformAdapter(ABC):
|
|||
return content
|
||||
|
||||
@staticmethod
|
||||
def truncate_message(content: str, max_length: int = 4096) -> List[str]:
|
||||
def truncate_message(
|
||||
content: str,
|
||||
max_length: int = 4096,
|
||||
len_fn: Optional["Callable[[str], int]"] = None,
|
||||
) -> List[str]:
|
||||
"""
|
||||
Split a long message into chunks, preserving code block boundaries.
|
||||
|
||||
|
|
@ -1898,11 +1955,16 @@ class BasePlatformAdapter(ABC):
|
|||
Args:
|
||||
content: The full message content
|
||||
max_length: Maximum length per chunk (platform-specific)
|
||||
len_fn: Optional length function for measuring string length.
|
||||
Defaults to ``len`` (Unicode code-points). Pass
|
||||
``utf16_len`` for platforms that measure message
|
||||
length in UTF-16 code units (e.g. Telegram).
|
||||
|
||||
Returns:
|
||||
List of message chunks
|
||||
"""
|
||||
if len(content) <= max_length:
|
||||
_len = len_fn or len
|
||||
if _len(content) <= max_length:
|
||||
return [content]
|
||||
|
||||
INDICATOR_RESERVE = 10 # room for " (XX/XX)"
|
||||
|
|
@ -1921,22 +1983,33 @@ class BasePlatformAdapter(ABC):
|
|||
|
||||
# How much body text we can fit after accounting for the prefix,
|
||||
# a potential closing fence, and the chunk indicator.
|
||||
headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
|
||||
headroom = max_length - INDICATOR_RESERVE - _len(prefix) - _len(FENCE_CLOSE)
|
||||
if headroom < 1:
|
||||
headroom = max_length // 2
|
||||
|
||||
# Everything remaining fits in one final chunk
|
||||
if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
|
||||
if _len(prefix) + _len(remaining) <= max_length - INDICATOR_RESERVE:
|
||||
chunks.append(prefix + remaining)
|
||||
break
|
||||
|
||||
# Find a natural split point (prefer newlines, then spaces)
|
||||
region = remaining[:headroom]
|
||||
# Find a natural split point (prefer newlines, then spaces).
|
||||
# When _len != len (e.g. utf16_len for Telegram), headroom is
|
||||
# measured in the custom unit. We need codepoint-based slice
|
||||
# positions that stay within the custom-unit budget.
|
||||
#
|
||||
# _safe_slice_pos() maps a custom-unit budget to the largest
|
||||
# codepoint offset whose custom length ≤ budget.
|
||||
if _len is not len:
|
||||
# Map headroom (custom units) → codepoint slice length
|
||||
_cp_limit = _custom_unit_to_cp(remaining, headroom, _len)
|
||||
else:
|
||||
_cp_limit = headroom
|
||||
region = remaining[:_cp_limit]
|
||||
split_at = region.rfind("\n")
|
||||
if split_at < headroom // 2:
|
||||
if split_at < _cp_limit // 2:
|
||||
split_at = region.rfind(" ")
|
||||
if split_at < 1:
|
||||
split_at = headroom
|
||||
split_at = _cp_limit
|
||||
|
||||
# Avoid splitting inside an inline code span (`...`).
|
||||
# If the text before split_at has an odd number of unescaped
|
||||
|
|
@ -1956,7 +2029,7 @@ class BasePlatformAdapter(ABC):
|
|||
safe_split = candidate.rfind(" ", 0, last_bt)
|
||||
nl_split = candidate.rfind("\n", 0, last_bt)
|
||||
safe_split = max(safe_split, nl_split)
|
||||
if safe_split > headroom // 4:
|
||||
if safe_split > _cp_limit // 4:
|
||||
split_at = safe_split
|
||||
|
||||
chunk_body = remaining[:split_at]
|
||||
|
|
|
|||
|
|
@ -66,6 +66,8 @@ from gateway.platforms.base import (
|
|||
cache_audio_from_bytes,
|
||||
cache_document_from_bytes,
|
||||
SUPPORTED_DOCUMENT_TYPES,
|
||||
utf16_len,
|
||||
_prefix_within_utf16_limit,
|
||||
)
|
||||
from gateway.platforms.telegram_network import (
|
||||
TelegramFallbackTransport,
|
||||
|
|
@ -799,7 +801,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
try:
|
||||
# Format and split message if needed
|
||||
formatted = self.format_message(content)
|
||||
chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
|
||||
chunks = self.truncate_message(
|
||||
formatted, self.MAX_MESSAGE_LENGTH, len_fn=utf16_len,
|
||||
)
|
||||
if len(chunks) > 1:
|
||||
# truncate_message appends a raw " (1/2)" suffix. Escape the
|
||||
# MarkdownV2-special parentheses so Telegram doesn't reject the
|
||||
|
|
@ -970,7 +974,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
# streaming). Truncate and succeed so the stream consumer can
|
||||
# split the overflow into a new message instead of dying.
|
||||
if "message_too_long" in err_str or "too long" in err_str:
|
||||
truncated = content[: self.MAX_MESSAGE_LENGTH - 20] + "…"
|
||||
truncated = _prefix_within_utf16_limit(
|
||||
content, self.MAX_MESSAGE_LENGTH - 20
|
||||
) + "…"
|
||||
try:
|
||||
await self._bot.edit_message_text(
|
||||
chat_id=int(chat_id),
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ class TestSendWithReplyToMode:
|
|||
@pytest.mark.asyncio
|
||||
async def test_off_mode_no_reply_reference(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("off")
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
|
|
@ -137,7 +137,7 @@ class TestSendWithReplyToMode:
|
|||
@pytest.mark.asyncio
|
||||
async def test_first_mode_only_first_chunk_references(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("first")
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
|
|
@ -152,7 +152,7 @@ class TestSendWithReplyToMode:
|
|||
@pytest.mark.asyncio
|
||||
async def test_all_mode_all_chunks_reference(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("all")
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
|
|
@ -165,7 +165,7 @@ class TestSendWithReplyToMode:
|
|||
@pytest.mark.asyncio
|
||||
async def test_no_reply_to_param_no_reference(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("all")
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to=None)
|
||||
|
||||
|
|
@ -176,7 +176,7 @@ class TestSendWithReplyToMode:
|
|||
@pytest.mark.asyncio
|
||||
async def test_single_chunk_respects_first_mode(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("first")
|
||||
adapter.truncate_message = lambda content, max_len: ["single chunk"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]
|
||||
|
||||
await adapter.send("12345", "test", reply_to="999")
|
||||
|
||||
|
|
@ -187,7 +187,7 @@ class TestSendWithReplyToMode:
|
|||
@pytest.mark.asyncio
|
||||
async def test_single_chunk_off_mode(self):
|
||||
adapter, channel, ref_msg = _make_discord_adapter("off")
|
||||
adapter.truncate_message = lambda content, max_len: ["single chunk"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]
|
||||
|
||||
await adapter.send("12345", "test", reply_to="999")
|
||||
|
||||
|
|
@ -200,7 +200,7 @@ class TestSendWithReplyToMode:
|
|||
async def test_invalid_mode_falls_back_to_first_behavior(self):
|
||||
"""Invalid mode behaves like 'first' — only first chunk gets reference."""
|
||||
adapter, channel, ref_msg = _make_discord_adapter("banana")
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]
|
||||
|
||||
await adapter.send("12345", "test", reply_to="999")
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ from gateway.platforms.base import (
|
|||
MessageEvent,
|
||||
MessageType,
|
||||
safe_url_for_log,
|
||||
utf16_len,
|
||||
_prefix_within_utf16_limit,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -448,3 +450,135 @@ class TestGetHumanDelay:
|
|||
with patch.dict(os.environ, env):
|
||||
delay = BasePlatformAdapter._get_human_delay()
|
||||
assert 0.1 <= delay <= 0.2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# utf16_len / _prefix_within_utf16_limit / truncate_message with len_fn
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ported from nearai/ironclaw#2304 — Telegram counts message length in UTF-16
|
||||
# code units, not Unicode code-points. Astral-plane characters (emoji, CJK
|
||||
# Extension B) are surrogate pairs: 1 Python char but 2 UTF-16 units.
|
||||
|
||||
|
||||
class TestUtf16Len:
|
||||
"""Verify the UTF-16 length helper."""
|
||||
|
||||
def test_ascii(self):
|
||||
assert utf16_len("hello") == 5
|
||||
|
||||
def test_bmp_cjk(self):
|
||||
# CJK ideographs in the BMP are 1 code unit each
|
||||
assert utf16_len("你好") == 2
|
||||
|
||||
def test_emoji_surrogate_pair(self):
|
||||
# 😀 (U+1F600) is outside BMP → 2 UTF-16 code units
|
||||
assert utf16_len("😀") == 2
|
||||
|
||||
def test_mixed(self):
|
||||
# "hi😀" = 2 + 2 = 4 UTF-16 units
|
||||
assert utf16_len("hi😀") == 4
|
||||
|
||||
def test_musical_symbol(self):
|
||||
# 𝄞 (U+1D11E) — Musical Symbol G Clef, surrogate pair
|
||||
assert utf16_len("𝄞") == 2
|
||||
|
||||
def test_empty(self):
|
||||
assert utf16_len("") == 0
|
||||
|
||||
|
||||
class TestPrefixWithinUtf16Limit:
|
||||
"""Verify UTF-16-aware prefix truncation."""
|
||||
|
||||
def test_fits_entirely(self):
|
||||
assert _prefix_within_utf16_limit("hello", 10) == "hello"
|
||||
|
||||
def test_ascii_truncation(self):
|
||||
result = _prefix_within_utf16_limit("hello world", 5)
|
||||
assert result == "hello"
|
||||
assert utf16_len(result) <= 5
|
||||
|
||||
def test_does_not_split_surrogate_pair(self):
|
||||
# "a😀b" = 1 + 2 + 1 = 4 UTF-16 units; limit 2 should give "a"
|
||||
result = _prefix_within_utf16_limit("a😀b", 2)
|
||||
assert result == "a"
|
||||
assert utf16_len(result) <= 2
|
||||
|
||||
def test_emoji_at_limit(self):
|
||||
# "😀" = 2 UTF-16 units; limit 2 should include it
|
||||
result = _prefix_within_utf16_limit("😀x", 2)
|
||||
assert result == "😀"
|
||||
|
||||
def test_all_emoji(self):
|
||||
msg = "😀" * 10 # 20 UTF-16 units
|
||||
result = _prefix_within_utf16_limit(msg, 6)
|
||||
assert result == "😀😀😀"
|
||||
assert utf16_len(result) == 6
|
||||
|
||||
def test_empty(self):
|
||||
assert _prefix_within_utf16_limit("", 5) == ""
|
||||
|
||||
|
||||
class TestTruncateMessageUtf16:
|
||||
"""Verify truncate_message respects UTF-16 lengths when len_fn=utf16_len."""
|
||||
|
||||
def test_short_emoji_message_no_split(self):
|
||||
"""A short message under the UTF-16 limit should not be split."""
|
||||
msg = "Hello 😀 world"
|
||||
chunks = BasePlatformAdapter.truncate_message(msg, 4096, len_fn=utf16_len)
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0] == msg
|
||||
|
||||
def test_emoji_near_limit_triggers_split(self):
|
||||
"""A message at 4096 codepoints but >4096 UTF-16 units must split."""
|
||||
# 2049 emoji = 2049 codepoints but 4098 UTF-16 units → exceeds 4096
|
||||
msg = "😀" * 2049
|
||||
assert len(msg) == 2049 # Python len sees 2049 chars
|
||||
assert utf16_len(msg) == 4098 # but it's 4098 UTF-16 units
|
||||
|
||||
# Without UTF-16 awareness, this would NOT split (2049 < 4096)
|
||||
chunks_naive = BasePlatformAdapter.truncate_message(msg, 4096)
|
||||
assert len(chunks_naive) == 1, "Without len_fn, no split expected"
|
||||
|
||||
# With UTF-16 awareness, it MUST split
|
||||
chunks = BasePlatformAdapter.truncate_message(msg, 4096, len_fn=utf16_len)
|
||||
assert len(chunks) > 1, "With utf16_len, message should be split"
|
||||
|
||||
# Each chunk must fit within the UTF-16 limit
|
||||
for i, chunk in enumerate(chunks):
|
||||
assert utf16_len(chunk) <= 4096, (
|
||||
f"Chunk {i} exceeds 4096 UTF-16 units: {utf16_len(chunk)}"
|
||||
)
|
||||
|
||||
def test_each_utf16_chunk_within_limit(self):
|
||||
"""All chunks produced with utf16_len must fit the limit."""
|
||||
# Mix of BMP and astral-plane characters
|
||||
msg = ("Hello 😀 world 🎵 test 𝄞 " * 200).strip()
|
||||
max_len = 200
|
||||
chunks = BasePlatformAdapter.truncate_message(msg, max_len, len_fn=utf16_len)
|
||||
for i, chunk in enumerate(chunks):
|
||||
u16_len = utf16_len(chunk)
|
||||
assert u16_len <= max_len + 20, (
|
||||
f"Chunk {i} UTF-16 length {u16_len} exceeds {max_len}"
|
||||
)
|
||||
|
||||
def test_all_content_preserved(self):
|
||||
"""Splitting with utf16_len must not lose content."""
|
||||
words = ["emoji😀", "music🎵", "cjk你好", "plain"] * 100
|
||||
msg = " ".join(words)
|
||||
chunks = BasePlatformAdapter.truncate_message(msg, 200, len_fn=utf16_len)
|
||||
reassembled = " ".join(chunks)
|
||||
for word in words:
|
||||
assert word in reassembled, f"Word '{word}' lost during UTF-16 split"
|
||||
|
||||
def test_code_blocks_preserved_with_utf16(self):
|
||||
"""Code block fence handling should work with utf16_len too."""
|
||||
msg = "Before\n```python\n" + "x = '😀'\n" * 200 + "```\nAfter"
|
||||
chunks = BasePlatformAdapter.truncate_message(msg, 300, len_fn=utf16_len)
|
||||
assert len(chunks) > 1
|
||||
# Each chunk should have balanced fences
|
||||
for i, chunk in enumerate(chunks):
|
||||
fence_count = chunk.count("```")
|
||||
assert fence_count % 2 == 0, (
|
||||
f"Chunk {i} has unbalanced fences ({fence_count})"
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class TestSendWithReplyToMode:
|
|||
adapter = adapter_factory(reply_to_mode="off")
|
||||
adapter._bot = MagicMock()
|
||||
adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
|
|
@ -133,7 +133,7 @@ class TestSendWithReplyToMode:
|
|||
adapter = adapter_factory(reply_to_mode="first")
|
||||
adapter._bot = MagicMock()
|
||||
adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
|
|
@ -148,7 +148,7 @@ class TestSendWithReplyToMode:
|
|||
adapter = adapter_factory(reply_to_mode="all")
|
||||
adapter._bot = MagicMock()
|
||||
adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
|
|
@ -162,7 +162,7 @@ class TestSendWithReplyToMode:
|
|||
adapter = adapter_factory(reply_to_mode="all")
|
||||
adapter._bot = MagicMock()
|
||||
adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to=None)
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ class TestSendWithReplyToMode:
|
|||
adapter = adapter_factory(reply_to_mode="first")
|
||||
adapter._bot = MagicMock()
|
||||
adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
|
||||
adapter.truncate_message = lambda content, max_len: ["single chunk"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]
|
||||
|
||||
await adapter.send("12345", "test", reply_to="999")
|
||||
|
||||
|
|
|
|||
|
|
@ -322,7 +322,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
|
|||
(preserves code-block boundaries, adds part indicators).
|
||||
"""
|
||||
from gateway.config import Platform
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
from gateway.platforms.base import BasePlatformAdapter, utf16_len
|
||||
from gateway.platforms.telegram import TelegramAdapter
|
||||
from gateway.platforms.discord import DiscordAdapter
|
||||
from gateway.platforms.slack import SlackAdapter
|
||||
|
|
@ -354,9 +354,11 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None,
|
|||
|
||||
# Smart-chunk the message to fit within platform limits.
|
||||
# For short messages or platforms without a known limit this is a no-op.
|
||||
# Telegram measures length in UTF-16 code units, not Unicode codepoints.
|
||||
max_len = _MAX_LENGTHS.get(platform)
|
||||
if max_len:
|
||||
chunks = BasePlatformAdapter.truncate_message(message, max_len)
|
||||
_len_fn = utf16_len if platform == Platform.TELEGRAM else None
|
||||
chunks = BasePlatformAdapter.truncate_message(message, max_len, len_fn=_len_fn)
|
||||
else:
|
||||
chunks = [message]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue