From da9f96bf51aad33804ee37c671fff975c96c06db Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sat, 11 Apr 2026 12:00:05 -0700 Subject: [PATCH] fix(weixin): keep multi-line messages in single bubble by default (#7903) The Weixin adapter was splitting responses at every top-level newline, causing notification spam (up to 70 API calls for a single long markdown response). This salvages the best aspects of six contributor PRs: Compact mode (new default): - Messages under the 4000-char limit stay as a single bubble even with multiple lines, paragraphs, and code blocks - Only oversized messages get split at logical markdown boundaries - Inter-chunk delay (0.3s) between chunks prevents WeChat rate-limit drops Legacy mode (opt-in): - Set split_multiline_messages: true in platforms.weixin.extra config - Or set WEIXIN_SPLIT_MULTILINE_MESSAGES=true env var - Restores the old per-line splitting behavior Salvaged from PRs #7797 (guantoubaozi), #7792 (luoxiao6645), #7838 (qyx596), #7825 (weedge), #7784 (sherunlock03), #7773 (JnyRoad). Core fix unanimous across all six; config toggle from #7838; inter-chunk delay from #7825. --- gateway/config.py | 3 + gateway/platforms/weixin.py | 77 ++++++++++++++++----- tests/gateway/test_weixin.py | 30 ++++++-- website/docs/user-guide/messaging/weixin.md | 15 ++-- 4 files changed, 97 insertions(+), 28 deletions(-) diff --git a/gateway/config.py b/gateway/config.py index d2dc45eae..34ef31d7b 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -1017,6 +1017,9 @@ def _apply_env_overrides(config: GatewayConfig) -> None: weixin_group_allowed_users = os.getenv("WEIXIN_GROUP_ALLOWED_USERS", "").strip() if weixin_group_allowed_users: extra["group_allow_from"] = weixin_group_allowed_users + weixin_split_multiline = os.getenv("WEIXIN_SPLIT_MULTILINE_MESSAGES", "").strip() + if weixin_split_multiline: + extra["split_multiline_messages"] = weixin_split_multiline weixin_home = os.getenv("WEIXIN_HOME_CHANNEL", "").strip() if weixin_home: config.platforms[Platform.WEIXIN].home_channel = HomeChannel( diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py index 42b0b7fff..e25bb350f 100644 --- a/gateway/platforms/weixin.py +++ b/gateway/platforms/weixin.py @@ -755,23 +755,58 @@ def _pack_markdown_blocks_for_weixin(content: str, max_length: int) -> List[str] return packed -def _split_text_for_weixin_delivery(content: str, max_length: int) -> List[str]: +def _split_text_for_weixin_delivery( + content: str, max_length: int, split_per_line: bool = False, +) -> List[str]: """Split content into sequential Weixin messages. - Prefer one message per top-level line/markdown unit when the author used - explicit line breaks. Oversized units fall back to block-aware packing so - long code fences still split safely. - """ - if len(content) <= max_length and "\n" not in content: - return [content] + *compact* (default): Keep everything in a single message whenever it fits + within the platform limit, even when the author used explicit line breaks. + Only fall back to block-aware packing when the payload exceeds + ``max_length``. - chunks: List[str] = [] - for unit in _split_delivery_units_for_weixin(content): - if len(unit) <= max_length: - chunks.append(unit) - continue - chunks.extend(_pack_markdown_blocks_for_weixin(unit, max_length)) - return chunks or [content] + *per_line* (``split_per_line=True``): Legacy behavior — top-level line + breaks become separate chat messages; oversized units still use + block-aware packing. + + The active mode is controlled via ``config.yaml`` -> + ``platforms.weixin.extra.split_multiline_messages`` (``true`` / ``false``) + or the env var ``WEIXIN_SPLIT_MULTILINE_MESSAGES``. + """ + if split_per_line: + # Legacy: one message per top-level delivery unit. + if len(content) <= max_length and "\n" not in content: + return [content] + chunks: List[str] = [] + for unit in _split_delivery_units_for_weixin(content): + if len(unit) <= max_length: + chunks.append(unit) + continue + chunks.extend(_pack_markdown_blocks_for_weixin(unit, max_length)) + return chunks or [content] + + # Compact (default): single message when under the limit. + if len(content) <= max_length: + return [content] + return _pack_markdown_blocks_for_weixin(content, max_length) or [content] + + +def _coerce_bool(value: Any, default: bool = True) -> bool: + """Coerce a config value to bool, tolerating strings like ``"true"``.""" + if value is None: + return default + if isinstance(value, bool): + return value + if isinstance(value, (int, float)): + return bool(value) + text = str(value).strip().lower() + if not text: + return default + if text in {"1", "true", "yes", "on"}: + return True + if text in {"0", "false", "no", "off"}: + return False + return default def _extract_text(item_list: List[Dict[str, Any]]) -> str: @@ -991,6 +1026,11 @@ class WeixinAdapter(BasePlatformAdapter): group_allow_from = os.getenv("WEIXIN_GROUP_ALLOWED_USERS", "") self._allow_from = self._coerce_list(allow_from) self._group_allow_from = self._coerce_list(group_allow_from) + self._split_multiline_messages = _coerce_bool( + extra.get("split_multiline_messages") + or os.getenv("WEIXIN_SPLIT_MULTILINE_MESSAGES"), + default=False, + ) if self._account_id and not self._token: persisted = load_weixin_account(hermes_home, self._account_id) @@ -1330,7 +1370,9 @@ class WeixinAdapter(BasePlatformAdapter): logger.debug("[%s] getConfig failed for %s: %s", self.name, _safe_id(user_id), exc) def _split_text(self, content: str) -> List[str]: - return _split_text_for_weixin_delivery(content, self.MAX_MESSAGE_LENGTH) + return _split_text_for_weixin_delivery( + content, self.MAX_MESSAGE_LENGTH, self._split_multiline_messages, + ) async def send( self, @@ -1344,7 +1386,10 @@ class WeixinAdapter(BasePlatformAdapter): context_token = self._token_store.get(self._account_id, chat_id) last_message_id: Optional[str] = None try: - for chunk in self._split_text(self.format_message(content)): + chunks = self._split_text(self.format_message(content)) + for idx, chunk in enumerate(chunks): + if idx > 0: + await asyncio.sleep(0.3) client_id = f"hermes-weixin-{uuid.uuid4().hex}" await _send_message( self._session, diff --git a/tests/gateway/test_weixin.py b/tests/gateway/test_weixin.py index 74b59f2f1..caf4a7eba 100644 --- a/tests/gateway/test_weixin.py +++ b/tests/gateway/test_weixin.py @@ -62,15 +62,15 @@ class TestWeixinFormatting: class TestWeixinChunking: - def test_split_text_sends_top_level_newlines_as_separate_messages(self): + def test_split_text_keeps_short_multiline_message_in_single_chunk(self): adapter = _make_adapter() content = adapter.format_message("第一行\n第二行\n第三行") chunks = adapter._split_text(content) - assert chunks == ["第一行", "第二行", "第三行"] + assert chunks == ["第一行\n第二行\n第三行"] - def test_split_text_keeps_indented_followup_with_previous_line(self): + def test_split_text_keeps_short_reformatted_table_in_single_chunk(self): adapter = _make_adapter() content = adapter.format_message( @@ -81,10 +81,7 @@ class TestWeixinChunking: ) chunks = adapter._split_text(content) - assert chunks == [ - "- Setting: Timeout\n Value: 30s", - "- Setting: Retries\n Value: 3", - ] + assert chunks == [content] def test_split_text_keeps_complete_code_block_together_when_possible(self): adapter = _make_adapter() @@ -114,6 +111,23 @@ class TestWeixinChunking: assert all(len(chunk) <= adapter.MAX_MESSAGE_LENGTH for chunk in chunks) assert all(chunk.count("```") >= 2 for chunk in chunks) + def test_split_text_can_restore_legacy_multiline_splitting_via_config(self): + adapter = WeixinAdapter( + PlatformConfig( + enabled=True, + extra={ + "account_id": "acct", + "token": "***", + "split_multiline_messages": True, + }, + ) + ) + + content = adapter.format_message("第一行\n第二行\n第三行") + chunks = adapter._split_text(content) + + assert chunks == ["第一行", "第二行", "第三行"] + class TestWeixinConfig: def test_apply_env_overrides_configures_weixin(self): @@ -127,6 +141,7 @@ class TestWeixinConfig: "WEIXIN_BASE_URL": "https://ilink.example.com/", "WEIXIN_CDN_BASE_URL": "https://cdn.example.com/c2c/", "WEIXIN_DM_POLICY": "allowlist", + "WEIXIN_SPLIT_MULTILINE_MESSAGES": "true", "WEIXIN_ALLOWED_USERS": "wxid_1,wxid_2", "WEIXIN_HOME_CHANNEL": "wxid_1", "WEIXIN_HOME_CHANNEL_NAME": "Primary DM", @@ -142,6 +157,7 @@ class TestWeixinConfig: assert platform_config.extra["base_url"] == "https://ilink.example.com" assert platform_config.extra["cdn_base_url"] == "https://cdn.example.com/c2c" assert platform_config.extra["dm_policy"] == "allowlist" + assert platform_config.extra["split_multiline_messages"] == "true" assert platform_config.extra["allow_from"] == "wxid_1,wxid_2" assert platform_config.home_channel == HomeChannel(Platform.WEIXIN, "wxid_1", "Primary DM") diff --git a/website/docs/user-guide/messaging/weixin.md b/website/docs/user-guide/messaging/weixin.md index 656081a22..f658e0e23 100644 --- a/website/docs/user-guide/messaging/weixin.md +++ b/website/docs/user-guide/messaging/weixin.md @@ -66,6 +66,9 @@ WEIXIN_ACCOUNT_ID=your-account-id WEIXIN_DM_POLICY=open WEIXIN_ALLOWED_USERS=user_id_1,user_id_2 +# Optional: restore legacy multiline splitting behavior +# WEIXIN_SPLIT_MULTILINE_MESSAGES=true + # Optional: home channel for cron/notifications WEIXIN_HOME_CHANNEL=chat_id WEIXIN_HOME_CHANNEL_NAME=Home @@ -88,7 +91,7 @@ The adapter will restore saved credentials, connect to the iLink API, and begin - **AES-128-ECB encrypted CDN** — automatic encryption/decryption for all media transfers - **Context token persistence** — disk-backed reply continuity across restarts - **Markdown formatting** — headers, tables, and code blocks are reformatted for WeChat readability -- **Smart message chunking** — long messages are split at logical boundaries (paragraphs, code fences) +- **Smart message chunking** — messages stay as a single bubble when under the limit; only oversized payloads split at logical boundaries - **Typing indicators** — shows "typing…" status in the WeChat client while the agent processes - **SSRF protection** — outbound media URLs are validated before download - **Message deduplication** — 5-minute sliding window prevents double-processing @@ -108,6 +111,7 @@ Set these in `config.yaml` under `platforms.weixin.extra`: | `group_policy` | `disabled` | Group access: `open`, `allowlist`, `disabled` | | `allow_from` | `[]` | User IDs allowed for DMs (when dm_policy=allowlist) | | `group_allow_from` | `[]` | Group IDs allowed (when group_policy=allowlist) | +| `split_multiline_messages` | `false` | When `true`, split multi-line replies into multiple chat messages (legacy behavior). When `false`, keep multi-line replies as one message unless they exceed the length limit. | ## Access Policies @@ -211,13 +215,14 @@ WeChat's personal chat does not natively render full Markdown. The adapter refor ## Message Chunking -Long messages are split intelligently for chat delivery: +Messages are delivered as a single chat message whenever they fit within the platform limit. Only oversized payloads are split for delivery: - Maximum message length: **4000 characters** -- Split points prefer paragraph boundaries and blank lines -- Code fences are kept intact (never split mid-block) -- Indented continuation lines (sub-items in reformatted tables/lists) stay with their parent +- Messages under the limit stay intact even when they contain multiple paragraphs or line breaks +- Oversized messages split at logical boundaries (paragraphs, blank lines, code fences) +- Code fences are kept intact whenever possible (never split mid-block unless the fence itself exceeds the limit) - Oversized individual blocks fall back to the base adapter's truncation logic +- A 0.3 s inter-chunk delay prevents WeChat rate-limit drops when multiple chunks are sent ## Typing Indicators