mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-27 01:11:40 +00:00
fix(telegram): use UTF-16 code units for message length splitting (#8725)
Port from nearai/ironclaw#2304: Telegram's 4096 character limit is measured in UTF-16 code units, not Unicode codepoints. Characters outside the Basic Multilingual Plane (emoji like 😀, CJK Extension B, musical symbols) are surrogate pairs: 1 Python char but 2 UTF-16 units. Previously, truncate_message() used Python's len() which counts codepoints. This could produce chunks exceeding Telegram's actual limit when messages contain many astral-plane characters. Changes: - Add utf16_len() helper and _prefix_within_utf16_limit() for UTF-16-aware string measurement and truncation - Add _custom_unit_to_cp() binary-search helper that maps a custom-unit budget to the largest safe codepoint slice position - Update truncate_message() to accept optional len_fn parameter - Telegram adapter now passes len_fn=utf16_len when splitting messages - Fix fallback truncation in Telegram error handler to use _prefix_within_utf16_limit instead of codepoint slicing - Update send_message_tool.py to use utf16_len for Telegram platform - Add comprehensive tests: utf16_len, _prefix_within_utf16_limit, truncate_message with len_fn (emoji splitting, content preservation, code block handling) - Update mock lambdas in reply_mode tests to accept **kw for len_fn
This commit is contained in:
parent
3cd6cbee5f
commit
9e992df8ae
6 changed files with 240 additions and 25 deletions
|
|
@ -121,7 +121,7 @@ class TestSendWithReplyToMode:
|
|||
adapter = adapter_factory(reply_to_mode="off")
|
||||
adapter._bot = MagicMock()
|
||||
adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
|
|
@ -133,7 +133,7 @@ class TestSendWithReplyToMode:
|
|||
adapter = adapter_factory(reply_to_mode="first")
|
||||
adapter._bot = MagicMock()
|
||||
adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
|
|
@ -148,7 +148,7 @@ class TestSendWithReplyToMode:
|
|||
adapter = adapter_factory(reply_to_mode="all")
|
||||
adapter._bot = MagicMock()
|
||||
adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2", "chunk3"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to="999")
|
||||
|
||||
|
|
@ -162,7 +162,7 @@ class TestSendWithReplyToMode:
|
|||
adapter = adapter_factory(reply_to_mode="all")
|
||||
adapter._bot = MagicMock()
|
||||
adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
|
||||
adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["chunk1", "chunk2"]
|
||||
|
||||
await adapter.send("12345", "test content", reply_to=None)
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ class TestSendWithReplyToMode:
|
|||
adapter = adapter_factory(reply_to_mode="first")
|
||||
adapter._bot = MagicMock()
|
||||
adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
|
||||
adapter.truncate_message = lambda content, max_len: ["single chunk"]
|
||||
adapter.truncate_message = lambda content, max_len, **kw: ["single chunk"]
|
||||
|
||||
await adapter.send("12345", "test", reply_to="999")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue