mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-13 03:52:00 +00:00
feat(qqbot): process attachments in quoted (reply) messages
When a user replies while quoting another message, QQ sets
'message_type = 103' and pushes the referenced message's content +
attachments inside 'msg_elements[0]'. The old adapter ignored
msg_elements entirely, so:
- Bare quote-replies (no user text) surfaced nothing to the LLM.
- Quoted images/files/voice were never downloaded or described.
- Quoted voice messages specifically produced no transcript — the model
had no way to see what the user was referring to when saying 'about
this voice note…'.
This commit adds _process_quoted_context(d) which extracts msg_elements,
unions their attachments, and runs them through the SAME
_process_attachments pipeline as the main message body. Quoted voice
gets an STT transcript (tried via QQ's asr_refer_text first, then the
configured STT provider); quoted images get cached just like main-body
images; quoted files surface with their original filename intact (not
the CDN URL hash).
The quoted content is prepended to the user's text as a '[Quoted message]:'
block so the LLM sees the full referential context on one turn.
Images-only quotes surface a '[Quoted message]: (image)' marker so the
model knows an image was referenced even if no text came with it.
All four inbound handlers (_handle_c2c_message, _handle_group_message,
_handle_guild_message, _handle_dm_message) now call the helper uniformly
— one merge pattern, not four divergent implementations.
Filename preservation is carried by _process_attachments' existing
'[Attachment: {filename or ct}]' line; nothing else needed for that.
12 new tests under TestProcessQuotedContext and TestMergeQuoteInto cover:
- Non-quote messages short-circuit to empty
- message_type=103 with no msg_elements is harmless
- Text-only quotes render with '[Quoted message]:' prefix
- Voice attachments in the quote flow through STT
- File attachments in the quote preserve the original filename
- Image attachments surface cached paths + media types
- Images-only quote still emits a marker
- Multiple msg_elements are concatenated
- Malformed message_type values return empty
- _merge_quote_into prepends with a blank-line separator
Full qqbot suite: 130 passed (72 existing + 19 chunked + 27 keyboards
+ 12 quoted).
Co-authored-by: WideLee <limkuan24@gmail.com>
This commit is contained in:
parent
de584cd1dd
commit
5b121c6e35
2 changed files with 352 additions and 0 deletions
|
|
@ -1031,6 +1031,13 @@ class QQAdapter(BasePlatformAdapter):
|
||||||
len(voice_transcripts),
|
len(voice_transcripts),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Merge any quoted-message context (message_type=103 → msg_elements[0]).
|
||||||
|
quoted = await self._process_quoted_context(d)
|
||||||
|
text = self._merge_quote_into(text, quoted["quote_block"])
|
||||||
|
if quoted["image_urls"]:
|
||||||
|
image_urls = image_urls + quoted["image_urls"]
|
||||||
|
image_media_types = image_media_types + quoted["image_media_types"]
|
||||||
|
|
||||||
if not text.strip() and not image_urls:
|
if not text.strip() and not image_urls:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -1089,6 +1096,13 @@ class QQAdapter(BasePlatformAdapter):
|
||||||
else attachment_info
|
else attachment_info
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Merge any quoted-message context (message_type=103 → msg_elements[0]).
|
||||||
|
quoted = await self._process_quoted_context(d)
|
||||||
|
text = self._merge_quote_into(text, quoted["quote_block"])
|
||||||
|
if quoted["image_urls"]:
|
||||||
|
image_urls = image_urls + quoted["image_urls"]
|
||||||
|
image_media_types = image_media_types + quoted["image_media_types"]
|
||||||
|
|
||||||
if not text.strip() and not image_urls:
|
if not text.strip() and not image_urls:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -1156,6 +1170,13 @@ class QQAdapter(BasePlatformAdapter):
|
||||||
else attachment_info
|
else attachment_info
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Merge any quoted-message context (message_type=103 → msg_elements[0]).
|
||||||
|
quoted = await self._process_quoted_context(d)
|
||||||
|
text = self._merge_quote_into(text, quoted["quote_block"])
|
||||||
|
if quoted["image_urls"]:
|
||||||
|
image_urls = image_urls + quoted["image_urls"]
|
||||||
|
image_media_types = image_media_types + quoted["image_media_types"]
|
||||||
|
|
||||||
if not text.strip() and not image_urls:
|
if not text.strip() and not image_urls:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -1220,6 +1241,13 @@ class QQAdapter(BasePlatformAdapter):
|
||||||
else attachment_info
|
else attachment_info
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Merge any quoted-message context (message_type=103 → msg_elements[0]).
|
||||||
|
quoted = await self._process_quoted_context(d)
|
||||||
|
text = self._merge_quote_into(text, quoted["quote_block"])
|
||||||
|
if quoted["image_urls"]:
|
||||||
|
image_urls = image_urls + quoted["image_urls"]
|
||||||
|
image_media_types = image_media_types + quoted["image_media_types"]
|
||||||
|
|
||||||
if not text.strip() and not image_urls:
|
if not text.strip() and not image_urls:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -1240,6 +1268,113 @@ class QQAdapter(BasePlatformAdapter):
|
||||||
)
|
)
|
||||||
await self.handle_message(event)
|
await self.handle_message(event)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Quoted-message handling
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def _process_quoted_context(
|
||||||
|
self,
|
||||||
|
d: Dict[str, Any],
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Process the quoted message a user is replying to.
|
||||||
|
|
||||||
|
When a user replies while quoting another message, the platform sets
|
||||||
|
``message_type = 103`` and pushes the referenced message's content and
|
||||||
|
attachments inside ``msg_elements[0]``. The old adapter ignored
|
||||||
|
``msg_elements`` entirely, so:
|
||||||
|
|
||||||
|
- Quoted text was surfaced only when the user typed something of
|
||||||
|
their own — bare quote-replies showed nothing.
|
||||||
|
- Quoted attachments (images, voice, files) were never downloaded
|
||||||
|
or described.
|
||||||
|
- Quoted voice messages specifically produced no transcript, so the
|
||||||
|
LLM had no way to see what the user was referring to.
|
||||||
|
|
||||||
|
This method parses ``msg_elements`` and runs the quoted attachments
|
||||||
|
through the same :meth:`_process_attachments` pipeline as the main
|
||||||
|
message body, so quoted voice messages get STT transcripts and
|
||||||
|
quoted images are cached identically.
|
||||||
|
|
||||||
|
:param d: Raw inbound message dict (from the WS dispatch payload).
|
||||||
|
:returns: Dict with keys:
|
||||||
|
|
||||||
|
- ``quote_block``: string to prepend to the user's text body
|
||||||
|
(empty when there's nothing quoted).
|
||||||
|
- ``image_urls``: list of cached quoted-image paths.
|
||||||
|
- ``image_media_types``: parallel list of image MIME types.
|
||||||
|
"""
|
||||||
|
empty = {
|
||||||
|
"quote_block": "",
|
||||||
|
"image_urls": [],
|
||||||
|
"image_media_types": [],
|
||||||
|
}
|
||||||
|
# Short-circuit: only message_type 103 indicates a quote.
|
||||||
|
try:
|
||||||
|
if int(d.get("message_type", 0) or 0) != 103:
|
||||||
|
return empty
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return empty
|
||||||
|
|
||||||
|
elements = d.get("msg_elements")
|
||||||
|
if not isinstance(elements, list) or not elements:
|
||||||
|
return empty
|
||||||
|
|
||||||
|
# msg_elements[0] carries the referenced message. Additional elements
|
||||||
|
# (if any) are very rare in practice; we concatenate their text and
|
||||||
|
# union their attachments for completeness.
|
||||||
|
quoted_text_parts: List[str] = []
|
||||||
|
all_attachments: List[Dict[str, Any]] = []
|
||||||
|
for elem in elements:
|
||||||
|
if not isinstance(elem, dict):
|
||||||
|
continue
|
||||||
|
etext = str(elem.get("content", "")).strip()
|
||||||
|
if etext:
|
||||||
|
quoted_text_parts.append(etext)
|
||||||
|
eatts = elem.get("attachments")
|
||||||
|
if isinstance(eatts, list):
|
||||||
|
for a in eatts:
|
||||||
|
if isinstance(a, dict):
|
||||||
|
all_attachments.append(a)
|
||||||
|
|
||||||
|
att_result = await self._process_attachments(all_attachments)
|
||||||
|
quoted_voice = att_result.get("voice_transcripts") or []
|
||||||
|
quoted_info = att_result.get("attachment_info") or ""
|
||||||
|
quoted_images = att_result.get("image_urls") or []
|
||||||
|
quoted_image_types = att_result.get("image_media_types") or []
|
||||||
|
|
||||||
|
lines: List[str] = []
|
||||||
|
if quoted_text_parts:
|
||||||
|
lines.append(" ".join(quoted_text_parts))
|
||||||
|
for t in quoted_voice:
|
||||||
|
lines.append(t)
|
||||||
|
if quoted_info:
|
||||||
|
lines.append(quoted_info)
|
||||||
|
|
||||||
|
if not lines and not quoted_images:
|
||||||
|
return empty
|
||||||
|
|
||||||
|
if lines:
|
||||||
|
quote_block = "[Quoted message]:\n" + "\n".join(lines)
|
||||||
|
else:
|
||||||
|
# Images-only quote: give the LLM at least a marker so it knows
|
||||||
|
# context was referenced.
|
||||||
|
quote_block = "[Quoted message]: (image)"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"quote_block": quote_block,
|
||||||
|
"image_urls": quoted_images,
|
||||||
|
"image_media_types": quoted_image_types,
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _merge_quote_into(text: str, quote_block: str) -> str:
|
||||||
|
"""Prepend ``quote_block`` to *text*, separated by a blank line."""
|
||||||
|
if not quote_block:
|
||||||
|
return text
|
||||||
|
if text.strip():
|
||||||
|
return f"{quote_block}\n\n{text}".strip()
|
||||||
|
return quote_block
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Attachment processing
|
# Attachment processing
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
|
|
|
||||||
|
|
@ -1301,3 +1301,220 @@ class TestAdapterInteractionDispatch:
|
||||||
"user_openid": "u",
|
"user_openid": "u",
|
||||||
"data": {"resolved": {"button_data": "approve:s:deny"}},
|
"data": {"resolved": {"button_data": "approve:s:deny"}},
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Quoted-message handling (message_type=103 → msg_elements)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class TestProcessQuotedContext:
|
||||||
|
"""Verify the quoted-message pipeline: text + voice STT + images + files."""
|
||||||
|
|
||||||
|
def _make_adapter(self):
|
||||||
|
from gateway.platforms.qqbot.adapter import QQAdapter
|
||||||
|
return QQAdapter(_make_config(app_id="a", client_secret="b"))
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_non_quote_message_returns_empty(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
d = {"message_type": 0, "content": "hi"}
|
||||||
|
out = await adapter._process_quoted_context(d)
|
||||||
|
assert out == {"quote_block": "", "image_urls": [], "image_media_types": []}
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_quote_type_but_no_elements_returns_empty(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
d = {"message_type": 103}
|
||||||
|
out = await adapter._process_quoted_context(d)
|
||||||
|
assert out["quote_block"] == ""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_quote_with_text_only(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
# Stub out _process_attachments since there are no attachments anyway.
|
||||||
|
async def fake_process(_a):
|
||||||
|
return {"image_urls": [], "image_media_types": [],
|
||||||
|
"voice_transcripts": [], "attachment_info": ""}
|
||||||
|
adapter._process_attachments = fake_process # type: ignore[assignment]
|
||||||
|
|
||||||
|
d = {
|
||||||
|
"message_type": 103,
|
||||||
|
"msg_elements": [
|
||||||
|
{"content": "Did you see this file?", "attachments": []},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
out = await adapter._process_quoted_context(d)
|
||||||
|
assert out["quote_block"].startswith("[Quoted message]:")
|
||||||
|
assert "Did you see this file?" in out["quote_block"]
|
||||||
|
assert out["image_urls"] == []
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_quote_with_voice_attachment_runs_stt(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
|
||||||
|
# Capture what attachments are passed into _process_attachments.
|
||||||
|
captured = []
|
||||||
|
|
||||||
|
async def fake_process(atts):
|
||||||
|
captured.append(atts)
|
||||||
|
return {
|
||||||
|
"image_urls": [],
|
||||||
|
"image_media_types": [],
|
||||||
|
"voice_transcripts": ["[Voice] hello from the quoted audio"],
|
||||||
|
"attachment_info": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
adapter._process_attachments = fake_process # type: ignore[assignment]
|
||||||
|
|
||||||
|
d = {
|
||||||
|
"message_type": 103,
|
||||||
|
"msg_elements": [{
|
||||||
|
"content": "",
|
||||||
|
"attachments": [
|
||||||
|
{"content_type": "audio/silk",
|
||||||
|
"url": "https://qq-cdn/x.silk",
|
||||||
|
"filename": "rec.silk"}
|
||||||
|
],
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
out = await adapter._process_quoted_context(d)
|
||||||
|
|
||||||
|
# The quoted voice attachment must actually flow through STT.
|
||||||
|
assert captured and len(captured[0]) == 1
|
||||||
|
assert captured[0][0]["content_type"] == "audio/silk"
|
||||||
|
assert "[Quoted message]:" in out["quote_block"]
|
||||||
|
assert "hello from the quoted audio" in out["quote_block"]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_quote_with_file_preserves_filename(self):
|
||||||
|
"""Quoted file attachments must surface the original filename, not the CDN hash."""
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
|
||||||
|
async def fake_process(atts):
|
||||||
|
# Mirror _process_attachments's behaviour: non-image/voice attachments
|
||||||
|
# show up in attachment_info using the real filename.
|
||||||
|
parts = []
|
||||||
|
for a in atts:
|
||||||
|
fn = a.get("filename") or a.get("content_type", "file")
|
||||||
|
parts.append(f"[Attachment: {fn}]")
|
||||||
|
return {
|
||||||
|
"image_urls": [], "image_media_types": [],
|
||||||
|
"voice_transcripts": [],
|
||||||
|
"attachment_info": "\n".join(parts),
|
||||||
|
}
|
||||||
|
|
||||||
|
adapter._process_attachments = fake_process # type: ignore[assignment]
|
||||||
|
|
||||||
|
d = {
|
||||||
|
"message_type": 103,
|
||||||
|
"msg_elements": [{
|
||||||
|
"content": "check this",
|
||||||
|
"attachments": [
|
||||||
|
{"content_type": "application/zip",
|
||||||
|
"url": "https://qq-cdn/abc123",
|
||||||
|
"filename": "quarterly-report.zip"},
|
||||||
|
],
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
out = await adapter._process_quoted_context(d)
|
||||||
|
assert "quarterly-report.zip" in out["quote_block"]
|
||||||
|
assert "check this" in out["quote_block"]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_quote_with_image_returns_cached_paths(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
|
||||||
|
async def fake_process(atts):
|
||||||
|
return {
|
||||||
|
"image_urls": ["/tmp/cached_q.jpg"],
|
||||||
|
"image_media_types": ["image/jpeg"],
|
||||||
|
"voice_transcripts": [],
|
||||||
|
"attachment_info": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
adapter._process_attachments = fake_process # type: ignore[assignment]
|
||||||
|
|
||||||
|
d = {
|
||||||
|
"message_type": 103,
|
||||||
|
"msg_elements": [{
|
||||||
|
"content": "look at this",
|
||||||
|
"attachments": [{"content_type": "image/jpeg", "url": "https://x"}],
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
out = await adapter._process_quoted_context(d)
|
||||||
|
assert out["image_urls"] == ["/tmp/cached_q.jpg"]
|
||||||
|
assert out["image_media_types"] == ["image/jpeg"]
|
||||||
|
assert "look at this" in out["quote_block"]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_quote_with_image_only_no_text(self):
|
||||||
|
"""Images-only quote still surfaces a marker so the LLM has context."""
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
|
||||||
|
async def fake_process(atts):
|
||||||
|
return {
|
||||||
|
"image_urls": ["/tmp/only.png"],
|
||||||
|
"image_media_types": ["image/png"],
|
||||||
|
"voice_transcripts": [],
|
||||||
|
"attachment_info": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
adapter._process_attachments = fake_process # type: ignore[assignment]
|
||||||
|
|
||||||
|
d = {
|
||||||
|
"message_type": 103,
|
||||||
|
"msg_elements": [{
|
||||||
|
"content": "",
|
||||||
|
"attachments": [{"content_type": "image/png", "url": "https://x"}],
|
||||||
|
}],
|
||||||
|
}
|
||||||
|
out = await adapter._process_quoted_context(d)
|
||||||
|
assert out["quote_block"]
|
||||||
|
assert out["image_urls"] == ["/tmp/only.png"]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_multiple_elements_concatenated(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
|
||||||
|
async def fake_process(atts):
|
||||||
|
assert len(atts) == 2
|
||||||
|
return {
|
||||||
|
"image_urls": [], "image_media_types": [],
|
||||||
|
"voice_transcripts": [], "attachment_info": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
adapter._process_attachments = fake_process # type: ignore[assignment]
|
||||||
|
|
||||||
|
d = {
|
||||||
|
"message_type": 103,
|
||||||
|
"msg_elements": [
|
||||||
|
{"content": "first", "attachments": [{"content_type": "image/png", "url": "a"}]},
|
||||||
|
{"content": "second", "attachments": [{"content_type": "image/png", "url": "b"}]},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
out = await adapter._process_quoted_context(d)
|
||||||
|
assert "first" in out["quote_block"]
|
||||||
|
assert "second" in out["quote_block"]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_invalid_message_type_string_returns_empty(self):
|
||||||
|
adapter = self._make_adapter()
|
||||||
|
out = await adapter._process_quoted_context(
|
||||||
|
{"message_type": "not-a-number", "msg_elements": [{"content": "x"}]}
|
||||||
|
)
|
||||||
|
assert out["quote_block"] == ""
|
||||||
|
|
||||||
|
|
||||||
|
class TestMergeQuoteInto:
|
||||||
|
def test_empty_quote_returns_original(self):
|
||||||
|
from gateway.platforms.qqbot.adapter import QQAdapter
|
||||||
|
assert QQAdapter._merge_quote_into("hello", "") == "hello"
|
||||||
|
|
||||||
|
def test_empty_text_returns_only_quote(self):
|
||||||
|
from gateway.platforms.qqbot.adapter import QQAdapter
|
||||||
|
assert QQAdapter._merge_quote_into("", "[Quoted]") == "[Quoted]"
|
||||||
|
|
||||||
|
def test_both_present_joined_with_blank_line(self):
|
||||||
|
from gateway.platforms.qqbot.adapter import QQAdapter
|
||||||
|
merged = QQAdapter._merge_quote_into("hi there", "[Quoted]:\nctx")
|
||||||
|
assert merged == "[Quoted]:\nctx\n\nhi there"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue