From a9ebee5f02b5148ceb9fb540eea58954d04e160d Mon Sep 17 00:00:00 2001 From: Hedirman Date: Mon, 4 May 2026 06:54:18 +0800 Subject: [PATCH] Fix WhatsApp long message splitting --- gateway/platforms/whatsapp.py | 22 +++++- scripts/whatsapp-bridge/bridge.js | 85 ++++++++++++++++++----- tests/gateway/test_whatsapp_formatting.py | 33 +++++++++ 3 files changed, 122 insertions(+), 18 deletions(-) diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index 3aff6bfd37..ec45487039 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -217,6 +217,7 @@ class WhatsAppAdapter(BasePlatformAdapter): # WhatsApp message limits — practical UX limit, not protocol max. # WhatsApp allows ~65K but long messages are unreadable on mobile. MAX_MESSAGE_LENGTH = 4096 + DEFAULT_REPLY_PREFIX = "⚕ *Hermes Agent*\n────────────\n" # Default bridge location relative to the hermes-agent install _DEFAULT_BRIDGE_DIR = Path(__file__).resolve().parents[2] / "scripts" / "whatsapp-bridge" @@ -252,6 +253,25 @@ class WhatsAppAdapter(BasePlatformAdapter): # notification before the normal "✓ whatsapp disconnected" fires. self._shutting_down: bool = False + def _effective_reply_prefix(self) -> str: + """Return the prefix the Node bridge will add in self-chat mode.""" + whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat") + if whatsapp_mode != "self-chat": + return "" + if self._reply_prefix is not None: + return self._reply_prefix.replace("\\n", "\n") + env_prefix = os.getenv("WHATSAPP_REPLY_PREFIX") + if env_prefix is not None: + return env_prefix.replace("\\n", "\n") + return self.DEFAULT_REPLY_PREFIX + + def _outgoing_chunk_limit(self) -> int: + """Reserve room for the bridge-side prefix so final WhatsApp text fits.""" + prefix_len = len(self._effective_reply_prefix()) + # Keep enough space for truncate_message's pagination indicator and + # code-fence repair even if a user configures a very long prefix. + return max(1024, self.MAX_MESSAGE_LENGTH - prefix_len) + def _whatsapp_require_mention(self) -> bool: configured = self.config.extra.get("require_mention") if configured is not None: @@ -780,7 +800,7 @@ class WhatsAppAdapter(BasePlatformAdapter): # Format and chunk the message formatted = self.format_message(content) - chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH) + chunks = self.truncate_message(formatted, self._outgoing_chunk_limit()) last_message_id = None for chunk in chunks: diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index af6d6b54a0..162acdaca1 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -55,6 +55,12 @@ const DEFAULT_REPLY_PREFIX = '⚕ *Hermes Agent*\n────────── const REPLY_PREFIX = process.env.WHATSAPP_REPLY_PREFIX === undefined ? DEFAULT_REPLY_PREFIX : process.env.WHATSAPP_REPLY_PREFIX.replace(/\\n/g, '\n'); +const MAX_MESSAGE_LENGTH = parseInt(process.env.WHATSAPP_MAX_MESSAGE_LENGTH || '4096', 10); +const CHUNK_DELAY_MS = parseInt(process.env.WHATSAPP_CHUNK_DELAY_MS || '300', 10); + +function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} function formatOutgoingMessage(message) { // In bot mode, messages come from a different number so the prefix is @@ -64,6 +70,38 @@ function formatOutgoingMessage(message) { return REPLY_PREFIX ? `${REPLY_PREFIX}${message}` : message; } +function splitLongMessage(message, maxLength = MAX_MESSAGE_LENGTH) { + const text = String(message || ''); + if (!text) return []; + if (!Number.isFinite(maxLength) || maxLength < 1 || text.length <= maxLength) { + return [text]; + } + + const chunks = []; + let remaining = text; + while (remaining.length > maxLength) { + let splitAt = remaining.lastIndexOf('\n', maxLength); + if (splitAt < Math.floor(maxLength / 2)) { + splitAt = remaining.lastIndexOf(' ', maxLength); + } + if (splitAt < 1) splitAt = maxLength; + + chunks.push(remaining.slice(0, splitAt).trimEnd()); + remaining = remaining.slice(splitAt).trimStart(); + } + if (remaining) chunks.push(remaining); + return chunks; +} + +function trackSentMessageId(sent) { + if (sent?.key?.id) { + recentlySentIds.add(sent.key.id); + if (recentlySentIds.size > MAX_RECENT_IDS) { + recentlySentIds.delete(recentlySentIds.values().next().value); + } + } +} + function normalizeWhatsAppId(value) { if (!value) return ''; return String(value).replace(':', '@'); @@ -423,17 +461,22 @@ app.post('/send', async (req, res) => { } try { - const sent = await sock.sendMessage(chatId, { text: formatOutgoingMessage(message) }); - - // Track sent message ID to prevent echo-back loops - if (sent?.key?.id) { - recentlySentIds.add(sent.key.id); - if (recentlySentIds.size > MAX_RECENT_IDS) { - recentlySentIds.delete(recentlySentIds.values().next().value); + const chunks = splitLongMessage(formatOutgoingMessage(message)); + const messageIds = []; + for (let i = 0; i < chunks.length; i += 1) { + const sent = await sock.sendMessage(chatId, { text: chunks[i] }); + trackSentMessageId(sent); + if (sent?.key?.id) messageIds.push(sent.key.id); + if (chunks.length > 1 && i < chunks.length - 1) { + await sleep(CHUNK_DELAY_MS); } } - res.json({ success: true, messageId: sent?.key?.id }); + res.json({ + success: true, + messageId: messageIds[messageIds.length - 1], + messageIds, + }); } catch (err) { res.status(500).json({ error: err.message }); } @@ -452,8 +495,22 @@ app.post('/edit', async (req, res) => { try { const key = { id: messageId, fromMe: true, remoteJid: chatId }; - await sock.sendMessage(chatId, { text: formatOutgoingMessage(message), edit: key }); - res.json({ success: true }); + const chunks = splitLongMessage(formatOutgoingMessage(message)); + const messageIds = []; + + await sock.sendMessage(chatId, { text: chunks[0], edit: key }); + if (chunks.length > 1) { + for (let i = 1; i < chunks.length; i += 1) { + const sent = await sock.sendMessage(chatId, { text: chunks[i] }); + trackSentMessageId(sent); + if (sent?.key?.id) messageIds.push(sent.key.id); + if (i < chunks.length - 1) { + await sleep(CHUNK_DELAY_MS); + } + } + } + + res.json({ success: true, messageIds }); } catch (err) { res.status(500).json({ error: err.message }); } @@ -547,13 +604,7 @@ app.post('/send-media', async (req, res) => { const sent = await sock.sendMessage(chatId, msgPayload); - // Track sent message ID to prevent echo-back loops - if (sent?.key?.id) { - recentlySentIds.add(sent.key.id); - if (recentlySentIds.size > MAX_RECENT_IDS) { - recentlySentIds.delete(recentlySentIds.values().next().value); - } - } + trackSentMessageId(sent); res.json({ success: true, messageId: sent?.key?.id }); } catch (err) { diff --git a/tests/gateway/test_whatsapp_formatting.py b/tests/gateway/test_whatsapp_formatting.py index 1293847835..1cb4c7bf3d 100644 --- a/tests/gateway/test_whatsapp_formatting.py +++ b/tests/gateway/test_whatsapp_formatting.py @@ -145,6 +145,21 @@ class TestMessageLimits: from gateway.platforms.whatsapp import WhatsAppAdapter assert WhatsAppAdapter.MAX_MESSAGE_LENGTH == 4096 + def test_chunk_limit_reserves_default_self_chat_prefix(self, monkeypatch): + adapter = _make_adapter() + monkeypatch.delenv("WHATSAPP_REPLY_PREFIX", raising=False) + monkeypatch.setenv("WHATSAPP_MODE", "self-chat") + + assert adapter._outgoing_chunk_limit() == ( + adapter.MAX_MESSAGE_LENGTH - len(adapter.DEFAULT_REPLY_PREFIX) + ) + + def test_chunk_limit_does_not_reserve_prefix_in_bot_mode(self, monkeypatch): + adapter = _make_adapter() + monkeypatch.setenv("WHATSAPP_MODE", "bot") + + assert adapter._outgoing_chunk_limit() == adapter.MAX_MESSAGE_LENGTH + # --------------------------------------------------------------------------- # send() chunking tests @@ -180,6 +195,24 @@ class TestSendChunking: # Should have made multiple calls assert adapter._http_session.post.call_count > 1 + @pytest.mark.asyncio + async def test_chunks_leave_room_for_bridge_prefix(self, monkeypatch): + adapter = _make_adapter() + monkeypatch.delenv("WHATSAPP_REPLY_PREFIX", raising=False) + monkeypatch.setenv("WHATSAPP_MODE", "self-chat") + resp = MagicMock(status=200) + resp.json = AsyncMock(return_value={"messageId": "msg1"}) + adapter._http_session.post = MagicMock(return_value=_AsyncCM(resp)) + + long_msg = "a " * 3000 + + await adapter.send("chat1", long_msg) + + for call in adapter._http_session.post.call_args_list: + payload = call.kwargs.get("json") or call[1].get("json") + final_text = adapter.DEFAULT_REPLY_PREFIX + payload["message"] + assert len(final_text) <= adapter.MAX_MESSAGE_LENGTH + @pytest.mark.asyncio async def test_empty_message_no_send(self): adapter = _make_adapter()