From 5b121c6e358a4eb83ee3cb1ec2cfd1b8cae3c7b7 Mon Sep 17 00:00:00 2001 From: WideLee Date: Thu, 7 May 2026 07:30:13 -0700 Subject: [PATCH] feat(qqbot): process attachments in quoted (reply) messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user replies while quoting another message, QQ sets 'message_type = 103' and pushes the referenced message's content + attachments inside 'msg_elements[0]'. The old adapter ignored msg_elements entirely, so: - Bare quote-replies (no user text) surfaced nothing to the LLM. - Quoted images/files/voice were never downloaded or described. - Quoted voice messages specifically produced no transcript — the model had no way to see what the user was referring to when saying 'about this voice note…'. This commit adds _process_quoted_context(d) which extracts msg_elements, unions their attachments, and runs them through the SAME _process_attachments pipeline as the main message body. Quoted voice gets an STT transcript (tried via QQ's asr_refer_text first, then the configured STT provider); quoted images get cached just like main-body images; quoted files surface with their original filename intact (not the CDN URL hash). The quoted content is prepended to the user's text as a '[Quoted message]:' block so the LLM sees the full referential context on one turn. Images-only quotes surface a '[Quoted message]: (image)' marker so the model knows an image was referenced even if no text came with it. All four inbound handlers (_handle_c2c_message, _handle_group_message, _handle_guild_message, _handle_dm_message) now call the helper uniformly — one merge pattern, not four divergent implementations. Filename preservation is carried by _process_attachments' existing '[Attachment: {filename or ct}]' line; nothing else needed for that. 12 new tests under TestProcessQuotedContext and TestMergeQuoteInto cover: - Non-quote messages short-circuit to empty - message_type=103 with no msg_elements is harmless - Text-only quotes render with '[Quoted message]:' prefix - Voice attachments in the quote flow through STT - File attachments in the quote preserve the original filename - Image attachments surface cached paths + media types - Images-only quote still emits a marker - Multiple msg_elements are concatenated - Malformed message_type values return empty - _merge_quote_into prepends with a blank-line separator Full qqbot suite: 130 passed (72 existing + 19 chunked + 27 keyboards + 12 quoted). Co-authored-by: WideLee --- gateway/platforms/qqbot/adapter.py | 135 ++++++++++++++++++ tests/gateway/test_qqbot.py | 217 +++++++++++++++++++++++++++++ 2 files changed, 352 insertions(+) diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py index 046758c796..7240097323 100644 --- a/gateway/platforms/qqbot/adapter.py +++ b/gateway/platforms/qqbot/adapter.py @@ -1031,6 +1031,13 @@ class QQAdapter(BasePlatformAdapter): len(voice_transcripts), ) + # Merge any quoted-message context (message_type=103 → msg_elements[0]). + quoted = await self._process_quoted_context(d) + text = self._merge_quote_into(text, quoted["quote_block"]) + if quoted["image_urls"]: + image_urls = image_urls + quoted["image_urls"] + image_media_types = image_media_types + quoted["image_media_types"] + if not text.strip() and not image_urls: return @@ -1089,6 +1096,13 @@ class QQAdapter(BasePlatformAdapter): else attachment_info ) + # Merge any quoted-message context (message_type=103 → msg_elements[0]). + quoted = await self._process_quoted_context(d) + text = self._merge_quote_into(text, quoted["quote_block"]) + if quoted["image_urls"]: + image_urls = image_urls + quoted["image_urls"] + image_media_types = image_media_types + quoted["image_media_types"] + if not text.strip() and not image_urls: return @@ -1156,6 +1170,13 @@ class QQAdapter(BasePlatformAdapter): else attachment_info ) + # Merge any quoted-message context (message_type=103 → msg_elements[0]). + quoted = await self._process_quoted_context(d) + text = self._merge_quote_into(text, quoted["quote_block"]) + if quoted["image_urls"]: + image_urls = image_urls + quoted["image_urls"] + image_media_types = image_media_types + quoted["image_media_types"] + if not text.strip() and not image_urls: return @@ -1220,6 +1241,13 @@ class QQAdapter(BasePlatformAdapter): else attachment_info ) + # Merge any quoted-message context (message_type=103 → msg_elements[0]). + quoted = await self._process_quoted_context(d) + text = self._merge_quote_into(text, quoted["quote_block"]) + if quoted["image_urls"]: + image_urls = image_urls + quoted["image_urls"] + image_media_types = image_media_types + quoted["image_media_types"] + if not text.strip() and not image_urls: return @@ -1240,6 +1268,113 @@ class QQAdapter(BasePlatformAdapter): ) await self.handle_message(event) + # ------------------------------------------------------------------ + # Quoted-message handling + # ------------------------------------------------------------------ + + async def _process_quoted_context( + self, + d: Dict[str, Any], + ) -> Dict[str, Any]: + """Process the quoted message a user is replying to. + + When a user replies while quoting another message, the platform sets + ``message_type = 103`` and pushes the referenced message's content and + attachments inside ``msg_elements[0]``. The old adapter ignored + ``msg_elements`` entirely, so: + + - Quoted text was surfaced only when the user typed something of + their own — bare quote-replies showed nothing. + - Quoted attachments (images, voice, files) were never downloaded + or described. + - Quoted voice messages specifically produced no transcript, so the + LLM had no way to see what the user was referring to. + + This method parses ``msg_elements`` and runs the quoted attachments + through the same :meth:`_process_attachments` pipeline as the main + message body, so quoted voice messages get STT transcripts and + quoted images are cached identically. + + :param d: Raw inbound message dict (from the WS dispatch payload). + :returns: Dict with keys: + + - ``quote_block``: string to prepend to the user's text body + (empty when there's nothing quoted). + - ``image_urls``: list of cached quoted-image paths. + - ``image_media_types``: parallel list of image MIME types. + """ + empty = { + "quote_block": "", + "image_urls": [], + "image_media_types": [], + } + # Short-circuit: only message_type 103 indicates a quote. + try: + if int(d.get("message_type", 0) or 0) != 103: + return empty + except (TypeError, ValueError): + return empty + + elements = d.get("msg_elements") + if not isinstance(elements, list) or not elements: + return empty + + # msg_elements[0] carries the referenced message. Additional elements + # (if any) are very rare in practice; we concatenate their text and + # union their attachments for completeness. + quoted_text_parts: List[str] = [] + all_attachments: List[Dict[str, Any]] = [] + for elem in elements: + if not isinstance(elem, dict): + continue + etext = str(elem.get("content", "")).strip() + if etext: + quoted_text_parts.append(etext) + eatts = elem.get("attachments") + if isinstance(eatts, list): + for a in eatts: + if isinstance(a, dict): + all_attachments.append(a) + + att_result = await self._process_attachments(all_attachments) + quoted_voice = att_result.get("voice_transcripts") or [] + quoted_info = att_result.get("attachment_info") or "" + quoted_images = att_result.get("image_urls") or [] + quoted_image_types = att_result.get("image_media_types") or [] + + lines: List[str] = [] + if quoted_text_parts: + lines.append(" ".join(quoted_text_parts)) + for t in quoted_voice: + lines.append(t) + if quoted_info: + lines.append(quoted_info) + + if not lines and not quoted_images: + return empty + + if lines: + quote_block = "[Quoted message]:\n" + "\n".join(lines) + else: + # Images-only quote: give the LLM at least a marker so it knows + # context was referenced. + quote_block = "[Quoted message]: (image)" + + return { + "quote_block": quote_block, + "image_urls": quoted_images, + "image_media_types": quoted_image_types, + } + + @staticmethod + def _merge_quote_into(text: str, quote_block: str) -> str: + """Prepend ``quote_block`` to *text*, separated by a blank line.""" + if not quote_block: + return text + if text.strip(): + return f"{quote_block}\n\n{text}".strip() + return quote_block + # ------------------------------------------------------------------ # Attachment processing # ------------------------------------------------------------------ diff --git a/tests/gateway/test_qqbot.py b/tests/gateway/test_qqbot.py index 5ecc28dd4c..336f9ccf6a 100644 --- a/tests/gateway/test_qqbot.py +++ b/tests/gateway/test_qqbot.py @@ -1301,3 +1301,220 @@ class TestAdapterInteractionDispatch: "user_openid": "u", "data": {"resolved": {"button_data": "approve:s:deny"}}, }) + + +# --------------------------------------------------------------------------- +# Quoted-message handling (message_type=103 → msg_elements) +# --------------------------------------------------------------------------- + +class TestProcessQuotedContext: + """Verify the quoted-message pipeline: text + voice STT + images + files.""" + + def _make_adapter(self): + from gateway.platforms.qqbot.adapter import QQAdapter + return QQAdapter(_make_config(app_id="a", client_secret="b")) + + @pytest.mark.asyncio + async def test_non_quote_message_returns_empty(self): + adapter = self._make_adapter() + d = {"message_type": 0, "content": "hi"} + out = await adapter._process_quoted_context(d) + assert out == {"quote_block": "", "image_urls": [], "image_media_types": []} + + @pytest.mark.asyncio + async def test_quote_type_but_no_elements_returns_empty(self): + adapter = self._make_adapter() + d = {"message_type": 103} + out = await adapter._process_quoted_context(d) + assert out["quote_block"] == "" + + @pytest.mark.asyncio + async def test_quote_with_text_only(self): + adapter = self._make_adapter() + # Stub out _process_attachments since there are no attachments anyway. + async def fake_process(_a): + return {"image_urls": [], "image_media_types": [], + "voice_transcripts": [], "attachment_info": ""} + adapter._process_attachments = fake_process # type: ignore[assignment] + + d = { + "message_type": 103, + "msg_elements": [ + {"content": "Did you see this file?", "attachments": []}, + ], + } + out = await adapter._process_quoted_context(d) + assert out["quote_block"].startswith("[Quoted message]:") + assert "Did you see this file?" in out["quote_block"] + assert out["image_urls"] == [] + + @pytest.mark.asyncio + async def test_quote_with_voice_attachment_runs_stt(self): + adapter = self._make_adapter() + + # Capture what attachments are passed into _process_attachments. + captured = [] + + async def fake_process(atts): + captured.append(atts) + return { + "image_urls": [], + "image_media_types": [], + "voice_transcripts": ["[Voice] hello from the quoted audio"], + "attachment_info": "", + } + + adapter._process_attachments = fake_process # type: ignore[assignment] + + d = { + "message_type": 103, + "msg_elements": [{ + "content": "", + "attachments": [ + {"content_type": "audio/silk", + "url": "https://qq-cdn/x.silk", + "filename": "rec.silk"} + ], + }], + } + out = await adapter._process_quoted_context(d) + + # The quoted voice attachment must actually flow through STT. + assert captured and len(captured[0]) == 1 + assert captured[0][0]["content_type"] == "audio/silk" + assert "[Quoted message]:" in out["quote_block"] + assert "hello from the quoted audio" in out["quote_block"] + + @pytest.mark.asyncio + async def test_quote_with_file_preserves_filename(self): + """Quoted file attachments must surface the original filename, not the CDN hash.""" + adapter = self._make_adapter() + + async def fake_process(atts): + # Mirror _process_attachments's behaviour: non-image/voice attachments + # show up in attachment_info using the real filename. + parts = [] + for a in atts: + fn = a.get("filename") or a.get("content_type", "file") + parts.append(f"[Attachment: {fn}]") + return { + "image_urls": [], "image_media_types": [], + "voice_transcripts": [], + "attachment_info": "\n".join(parts), + } + + adapter._process_attachments = fake_process # type: ignore[assignment] + + d = { + "message_type": 103, + "msg_elements": [{ + "content": "check this", + "attachments": [ + {"content_type": "application/zip", + "url": "https://qq-cdn/abc123", + "filename": "quarterly-report.zip"}, + ], + }], + } + out = await adapter._process_quoted_context(d) + assert "quarterly-report.zip" in out["quote_block"] + assert "check this" in out["quote_block"] + + @pytest.mark.asyncio + async def test_quote_with_image_returns_cached_paths(self): + adapter = self._make_adapter() + + async def fake_process(atts): + return { + "image_urls": ["/tmp/cached_q.jpg"], + "image_media_types": ["image/jpeg"], + "voice_transcripts": [], + "attachment_info": "", + } + + adapter._process_attachments = fake_process # type: ignore[assignment] + + d = { + "message_type": 103, + "msg_elements": [{ + "content": "look at this", + "attachments": [{"content_type": "image/jpeg", "url": "https://x"}], + }], + } + out = await adapter._process_quoted_context(d) + assert out["image_urls"] == ["/tmp/cached_q.jpg"] + assert out["image_media_types"] == ["image/jpeg"] + assert "look at this" in out["quote_block"] + + @pytest.mark.asyncio + async def test_quote_with_image_only_no_text(self): + """Images-only quote still surfaces a marker so the LLM has context.""" + adapter = self._make_adapter() + + async def fake_process(atts): + return { + "image_urls": ["/tmp/only.png"], + "image_media_types": ["image/png"], + "voice_transcripts": [], + "attachment_info": "", + } + + adapter._process_attachments = fake_process # type: ignore[assignment] + + d = { + "message_type": 103, + "msg_elements": [{ + "content": "", + "attachments": [{"content_type": "image/png", "url": "https://x"}], + }], + } + out = await adapter._process_quoted_context(d) + assert out["quote_block"] + assert out["image_urls"] == ["/tmp/only.png"] + + @pytest.mark.asyncio + async def test_multiple_elements_concatenated(self): + adapter = self._make_adapter() + + async def fake_process(atts): + assert len(atts) == 2 + return { + "image_urls": [], "image_media_types": [], + "voice_transcripts": [], "attachment_info": "", + } + + adapter._process_attachments = fake_process # type: ignore[assignment] + + d = { + "message_type": 103, + "msg_elements": [ + {"content": "first", "attachments": [{"content_type": "image/png", "url": "a"}]}, + {"content": "second", "attachments": [{"content_type": "image/png", "url": "b"}]}, + ], + } + out = await adapter._process_quoted_context(d) + assert "first" in out["quote_block"] + assert "second" in out["quote_block"] + + @pytest.mark.asyncio + async def test_invalid_message_type_string_returns_empty(self): + adapter = self._make_adapter() + out = await adapter._process_quoted_context( + {"message_type": "not-a-number", "msg_elements": [{"content": "x"}]} + ) + assert out["quote_block"] == "" + + +class TestMergeQuoteInto: + def test_empty_quote_returns_original(self): + from gateway.platforms.qqbot.adapter import QQAdapter + assert QQAdapter._merge_quote_into("hello", "") == "hello" + + def test_empty_text_returns_only_quote(self): + from gateway.platforms.qqbot.adapter import QQAdapter + assert QQAdapter._merge_quote_into("", "[Quoted]") == "[Quoted]" + + def test_both_present_joined_with_blank_line(self): + from gateway.platforms.qqbot.adapter import QQAdapter + merged = QQAdapter._merge_quote_into("hi there", "[Quoted]:\nctx") + assert merged == "[Quoted]:\nctx\n\nhi there"