feat(qqbot): process attachments in quoted (reply) messages

When a user replies while quoting another message, QQ sets 'message_type = 103' and pushes the referenced message's content + attachments inside 'msg_elements[0]'. The old adapter ignored msg_elements entirely, so: - Bare quote-replies (no user text) surfaced nothing to the LLM. - Quoted images/files/voice were never downloaded or described. - Quoted voice messages specifically produced no transcript — the model had no way to see what the user was referring to when saying 'about this voice note…'. This commit adds _process_quoted_context(d) which extracts msg_elements, unions their attachments, and runs them through the SAME _process_attachments pipeline as the main message body. Quoted voice gets an STT transcript (tried via QQ's asr_refer_text first, then the configured STT provider); quoted images get cached just like main-body images; quoted files surface with their original filename intact (not the CDN URL hash). The quoted content is prepended to the user's text as a '[Quoted message]:' block so the LLM sees the full referential context on one turn. Images-only quotes surface a '[Quoted message]: (image)' marker so the model knows an image was referenced even if no text came with it. All four inbound handlers (_handle_c2c_message, _handle_group_message, _handle_guild_message, _handle_dm_message) now call the helper uniformly — one merge pattern, not four divergent implementations. Filename preservation is carried by _process_attachments' existing '[Attachment: {filename or ct}]' line; nothing else needed for that. 12 new tests under TestProcessQuotedContext and TestMergeQuoteInto cover: - Non-quote messages short-circuit to empty - message_type=103 with no msg_elements is harmless - Text-only quotes render with '[Quoted message]:' prefix - Voice attachments in the quote flow through STT - File attachments in the quote preserve the original filename - Image attachments surface cached paths + media types - Images-only quote still emits a marker - Multiple msg_elements are concatenated - Malformed message_type values return empty - _merge_quote_into prepends with a blank-line separator Full qqbot suite: 130 passed (72 existing + 19 chunked + 27 keyboards + 12 quoted). Co-authored-by: WideLee <limkuan24@gmail.com>
2026-05-09 03:11:58 +00:00 · 2026-05-07 07:30:13 -07:00 · 2026-05-07 07:30:13 -07:00 · 5b121c6e35
commit 5b121c6e35
parent de584cd1dd
2 changed files with 352 additions and 0 deletions
--- a/gateway/platforms/qqbot/adapter.py
+++ b/gateway/platforms/qqbot/adapter.py
@ -1031,6 +1031,13 @@ class QQAdapter(BasePlatformAdapter):
            len(voice_transcripts),
        )

+        # Merge any quoted-message context (message_type=103 → msg_elements[0]).
+        quoted = await self._process_quoted_context(d)
+        text = self._merge_quote_into(text, quoted["quote_block"])
+        if quoted["image_urls"]:
+            image_urls = image_urls + quoted["image_urls"]
+            image_media_types = image_media_types + quoted["image_media_types"]
+
        if not text.strip() and not image_urls:
            return

@ -1089,6 +1096,13 @@ class QQAdapter(BasePlatformAdapter):
                else attachment_info
            )

+        # Merge any quoted-message context (message_type=103 → msg_elements[0]).
+        quoted = await self._process_quoted_context(d)
+        text = self._merge_quote_into(text, quoted["quote_block"])
+        if quoted["image_urls"]:
+            image_urls = image_urls + quoted["image_urls"]
+            image_media_types = image_media_types + quoted["image_media_types"]
+
        if not text.strip() and not image_urls:
            return

@ -1156,6 +1170,13 @@ class QQAdapter(BasePlatformAdapter):
                else attachment_info
            )

+        # Merge any quoted-message context (message_type=103 → msg_elements[0]).
+        quoted = await self._process_quoted_context(d)
+        text = self._merge_quote_into(text, quoted["quote_block"])
+        if quoted["image_urls"]:
+            image_urls = image_urls + quoted["image_urls"]
+            image_media_types = image_media_types + quoted["image_media_types"]
+
        if not text.strip() and not image_urls:
            return

@ -1220,6 +1241,13 @@ class QQAdapter(BasePlatformAdapter):
                else attachment_info
            )

+        # Merge any quoted-message context (message_type=103 → msg_elements[0]).
+        quoted = await self._process_quoted_context(d)
+        text = self._merge_quote_into(text, quoted["quote_block"])
+        if quoted["image_urls"]:
+            image_urls = image_urls + quoted["image_urls"]
+            image_media_types = image_media_types + quoted["image_media_types"]
+
        if not text.strip() and not image_urls:
            return

@ -1240,6 +1268,113 @@ class QQAdapter(BasePlatformAdapter):
        )
        await self.handle_message(event)

+    # ------------------------------------------------------------------
+    # Quoted-message handling
+    # ------------------------------------------------------------------
+
+    async def _process_quoted_context(
+            self,
+            d: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        """Process the quoted message a user is replying to.
+
+        When a user replies while quoting another message, the platform sets
+        ``message_type = 103`` and pushes the referenced message's content and
+        attachments inside ``msg_elements[0]``. The old adapter ignored
+        ``msg_elements`` entirely, so:
+
+        - Quoted text was surfaced only when the user typed something of
+          their own — bare quote-replies showed nothing.
+        - Quoted attachments (images, voice, files) were never downloaded
+          or described.
+        - Quoted voice messages specifically produced no transcript, so the
+          LLM had no way to see what the user was referring to.
+
+        This method parses ``msg_elements`` and runs the quoted attachments
+        through the same :meth:`_process_attachments` pipeline as the main
+        message body, so quoted voice messages get STT transcripts and
+        quoted images are cached identically.
+
+        :param d: Raw inbound message dict (from the WS dispatch payload).
+        :returns: Dict with keys:
+
+            - ``quote_block``: string to prepend to the user's text body
+              (empty when there's nothing quoted).
+            - ``image_urls``: list of cached quoted-image paths.
+            - ``image_media_types``: parallel list of image MIME types.
+        """
+        empty = {
+            "quote_block": "",
+            "image_urls": [],
+            "image_media_types": [],
+        }
+        # Short-circuit: only message_type 103 indicates a quote.
+        try:
+            if int(d.get("message_type", 0) or 0) != 103:
+                return empty
+        except (TypeError, ValueError):
+            return empty
+
+        elements = d.get("msg_elements")
+        if not isinstance(elements, list) or not elements:
+            return empty
+
+        # msg_elements[0] carries the referenced message. Additional elements
+        # (if any) are very rare in practice; we concatenate their text and
+        # union their attachments for completeness.
+        quoted_text_parts: List[str] = []
+        all_attachments: List[Dict[str, Any]] = []
+        for elem in elements:
+            if not isinstance(elem, dict):
+                continue
+            etext = str(elem.get("content", "")).strip()
+            if etext:
+                quoted_text_parts.append(etext)
+            eatts = elem.get("attachments")
+            if isinstance(eatts, list):
+                for a in eatts:
+                    if isinstance(a, dict):
+                        all_attachments.append(a)
+
+        att_result = await self._process_attachments(all_attachments)
+        quoted_voice = att_result.get("voice_transcripts") or []
+        quoted_info = att_result.get("attachment_info") or ""
+        quoted_images = att_result.get("image_urls") or []
+        quoted_image_types = att_result.get("image_media_types") or []
+
+        lines: List[str] = []
+        if quoted_text_parts:
+            lines.append(" ".join(quoted_text_parts))
+        for t in quoted_voice:
+            lines.append(t)
+        if quoted_info:
+            lines.append(quoted_info)
+
+        if not lines and not quoted_images:
+            return empty
+
+        if lines:
+            quote_block = "[Quoted message]:\n" + "\n".join(lines)
+        else:
+            # Images-only quote: give the LLM at least a marker so it knows
+            # context was referenced.
+            quote_block = "[Quoted message]: (image)"
+
+        return {
+            "quote_block": quote_block,
+            "image_urls": quoted_images,
+            "image_media_types": quoted_image_types,
+        }
+
+    @staticmethod
+    def _merge_quote_into(text: str, quote_block: str) -> str:
+        """Prepend ``quote_block`` to *text*, separated by a blank line."""
+        if not quote_block:
+            return text
+        if text.strip():
+            return f"{quote_block}\n\n{text}".strip()
+        return quote_block
+
    # ------------------------------------------------------------------
    # Attachment processing
    # ------------------------------------------------------------------
--- a/tests/gateway/test_qqbot.py
+++ b/tests/gateway/test_qqbot.py
@ -1301,3 +1301,220 @@ class TestAdapterInteractionDispatch:
            "user_openid": "u",
            "data": {"resolved": {"button_data": "approve:s:deny"}},
        })
+
+
+# ---------------------------------------------------------------------------
+# Quoted-message handling (message_type=103 → msg_elements)
+# ---------------------------------------------------------------------------
+
+class TestProcessQuotedContext:
+    """Verify the quoted-message pipeline: text + voice STT + images + files."""
+
+    def _make_adapter(self):
+        from gateway.platforms.qqbot.adapter import QQAdapter
+        return QQAdapter(_make_config(app_id="a", client_secret="b"))
+
+    @pytest.mark.asyncio
+    async def test_non_quote_message_returns_empty(self):
+        adapter = self._make_adapter()
+        d = {"message_type": 0, "content": "hi"}
+        out = await adapter._process_quoted_context(d)
+        assert out == {"quote_block": "", "image_urls": [], "image_media_types": []}
+
+    @pytest.mark.asyncio
+    async def test_quote_type_but_no_elements_returns_empty(self):
+        adapter = self._make_adapter()
+        d = {"message_type": 103}
+        out = await adapter._process_quoted_context(d)
+        assert out["quote_block"] == ""
+
+    @pytest.mark.asyncio
+    async def test_quote_with_text_only(self):
+        adapter = self._make_adapter()
+        # Stub out _process_attachments since there are no attachments anyway.
+        async def fake_process(_a):
+            return {"image_urls": [], "image_media_types": [],
+                    "voice_transcripts": [], "attachment_info": ""}
+        adapter._process_attachments = fake_process  # type: ignore[assignment]
+
+        d = {
+            "message_type": 103,
+            "msg_elements": [
+                {"content": "Did you see this file?", "attachments": []},
+            ],
+        }
+        out = await adapter._process_quoted_context(d)
+        assert out["quote_block"].startswith("[Quoted message]:")
+        assert "Did you see this file?" in out["quote_block"]
+        assert out["image_urls"] == []
+
+    @pytest.mark.asyncio
+    async def test_quote_with_voice_attachment_runs_stt(self):
+        adapter = self._make_adapter()
+
+        # Capture what attachments are passed into _process_attachments.
+        captured = []
+
+        async def fake_process(atts):
+            captured.append(atts)
+            return {
+                "image_urls": [],
+                "image_media_types": [],
+                "voice_transcripts": ["[Voice] hello from the quoted audio"],
+                "attachment_info": "",
+            }
+
+        adapter._process_attachments = fake_process  # type: ignore[assignment]
+
+        d = {
+            "message_type": 103,
+            "msg_elements": [{
+                "content": "",
+                "attachments": [
+                    {"content_type": "audio/silk",
+                     "url": "https://qq-cdn/x.silk",
+                     "filename": "rec.silk"}
+                ],
+            }],
+        }
+        out = await adapter._process_quoted_context(d)
+
+        # The quoted voice attachment must actually flow through STT.
+        assert captured and len(captured[0]) == 1
+        assert captured[0][0]["content_type"] == "audio/silk"
+        assert "[Quoted message]:" in out["quote_block"]
+        assert "hello from the quoted audio" in out["quote_block"]
+
+    @pytest.mark.asyncio
+    async def test_quote_with_file_preserves_filename(self):
+        """Quoted file attachments must surface the original filename, not the CDN hash."""
+        adapter = self._make_adapter()
+
+        async def fake_process(atts):
+            # Mirror _process_attachments's behaviour: non-image/voice attachments
+            # show up in attachment_info using the real filename.
+            parts = []
+            for a in atts:
+                fn = a.get("filename") or a.get("content_type", "file")
+                parts.append(f"[Attachment: {fn}]")
+            return {
+                "image_urls": [], "image_media_types": [],
+                "voice_transcripts": [],
+                "attachment_info": "\n".join(parts),
+            }
+
+        adapter._process_attachments = fake_process  # type: ignore[assignment]
+
+        d = {
+            "message_type": 103,
+            "msg_elements": [{
+                "content": "check this",
+                "attachments": [
+                    {"content_type": "application/zip",
+                     "url": "https://qq-cdn/abc123",
+                     "filename": "quarterly-report.zip"},
+                ],
+            }],
+        }
+        out = await adapter._process_quoted_context(d)
+        assert "quarterly-report.zip" in out["quote_block"]
+        assert "check this" in out["quote_block"]
+
+    @pytest.mark.asyncio
+    async def test_quote_with_image_returns_cached_paths(self):
+        adapter = self._make_adapter()
+
+        async def fake_process(atts):
+            return {
+                "image_urls": ["/tmp/cached_q.jpg"],
+                "image_media_types": ["image/jpeg"],
+                "voice_transcripts": [],
+                "attachment_info": "",
+            }
+
+        adapter._process_attachments = fake_process  # type: ignore[assignment]
+
+        d = {
+            "message_type": 103,
+            "msg_elements": [{
+                "content": "look at this",
+                "attachments": [{"content_type": "image/jpeg", "url": "https://x"}],
+            }],
+        }
+        out = await adapter._process_quoted_context(d)
+        assert out["image_urls"] == ["/tmp/cached_q.jpg"]
+        assert out["image_media_types"] == ["image/jpeg"]
+        assert "look at this" in out["quote_block"]
+
+    @pytest.mark.asyncio
+    async def test_quote_with_image_only_no_text(self):
+        """Images-only quote still surfaces a marker so the LLM has context."""
+        adapter = self._make_adapter()
+
+        async def fake_process(atts):
+            return {
+                "image_urls": ["/tmp/only.png"],
+                "image_media_types": ["image/png"],
+                "voice_transcripts": [],
+                "attachment_info": "",
+            }
+
+        adapter._process_attachments = fake_process  # type: ignore[assignment]
+
+        d = {
+            "message_type": 103,
+            "msg_elements": [{
+                "content": "",
+                "attachments": [{"content_type": "image/png", "url": "https://x"}],
+            }],
+        }
+        out = await adapter._process_quoted_context(d)
+        assert out["quote_block"]
+        assert out["image_urls"] == ["/tmp/only.png"]
+
+    @pytest.mark.asyncio
+    async def test_multiple_elements_concatenated(self):
+        adapter = self._make_adapter()
+
+        async def fake_process(atts):
+            assert len(atts) == 2
+            return {
+                "image_urls": [], "image_media_types": [],
+                "voice_transcripts": [], "attachment_info": "",
+            }
+
+        adapter._process_attachments = fake_process  # type: ignore[assignment]
+
+        d = {
+            "message_type": 103,
+            "msg_elements": [
+                {"content": "first", "attachments": [{"content_type": "image/png", "url": "a"}]},
+                {"content": "second", "attachments": [{"content_type": "image/png", "url": "b"}]},
+            ],
+        }
+        out = await adapter._process_quoted_context(d)
+        assert "first" in out["quote_block"]
+        assert "second" in out["quote_block"]
+
+    @pytest.mark.asyncio
+    async def test_invalid_message_type_string_returns_empty(self):
+        adapter = self._make_adapter()
+        out = await adapter._process_quoted_context(
+            {"message_type": "not-a-number", "msg_elements": [{"content": "x"}]}
+        )
+        assert out["quote_block"] == ""
+
+
+class TestMergeQuoteInto:
+    def test_empty_quote_returns_original(self):
+        from gateway.platforms.qqbot.adapter import QQAdapter
+        assert QQAdapter._merge_quote_into("hello", "") == "hello"
+
+    def test_empty_text_returns_only_quote(self):
+        from gateway.platforms.qqbot.adapter import QQAdapter
+        assert QQAdapter._merge_quote_into("", "[Quoted]") == "[Quoted]"
+
+    def test_both_present_joined_with_blank_line(self):
+        from gateway.platforms.qqbot.adapter import QQAdapter
+        merged = QQAdapter._merge_quote_into("hi there", "[Quoted]:\nctx")
+        assert merged == "[Quoted]:\nctx\n\nhi there"