mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(dingtalk): repair _extract_text for dingtalk-stream >= 0.20 SDK shape
The cherry-picked SDK compat fix (previous commit) wired process() to
parse CallbackMessage.data into a ChatbotMessage, but _extract_text()
was still written against the pre-0.20 payload shape:
* message.text changed from dict {content: ...} → TextContent object.
The old code's str(text) fallback produced 'TextContent(content=...)'
as the agent's input, so every received message came in mangled.
* rich_text moved from message.rich_text (list) to
message.rich_text_content.rich_text_list.
This preserves legacy fallbacks (dict-shaped text, bare rich_text list)
while handling the current SDK layout via hasattr(text, 'content').
Adds regression tests covering:
* webhook domain allowlist (api.*, oapi.*, and hostile lookalikes)
* _IncomingHandler.process is a coroutine function
* _extract_text against TextContent object, dict, rich_text_content,
legacy rich_text, and empty-message cases
Also adds kevinskysunny to scripts/release.py AUTHOR_MAP (release CI
blocks unmapped emails).
This commit is contained in:
parent
c3d2895b18
commit
3438d274f6
3 changed files with 158 additions and 10 deletions
|
|
@ -238,18 +238,35 @@ class DingTalkAdapter(BasePlatformAdapter):
|
|||
|
||||
@staticmethod
|
||||
def _extract_text(message: "ChatbotMessage") -> str:
|
||||
"""Extract plain text from a DingTalk chatbot message."""
|
||||
text = getattr(message, "text", None) or ""
|
||||
if isinstance(text, dict):
|
||||
content = text.get("content", "").strip()
|
||||
else:
|
||||
content = str(text).strip()
|
||||
"""Extract plain text from a DingTalk chatbot message.
|
||||
|
||||
Handles both legacy and current dingtalk-stream SDK payload shapes:
|
||||
* legacy: ``message.text`` was a dict ``{"content": "..."}``
|
||||
* >= 0.20: ``message.text`` is a ``TextContent`` dataclass whose
|
||||
``__str__`` returns ``"TextContent(content=...)"`` — never fall
|
||||
back to ``str(text)`` without extracting ``.content`` first.
|
||||
* rich text moved from ``message.rich_text`` (list) to
|
||||
``message.rich_text_content.rich_text_list`` (list of dicts).
|
||||
"""
|
||||
text = getattr(message, "text", None)
|
||||
content = ""
|
||||
if text is not None:
|
||||
if isinstance(text, dict):
|
||||
content = (text.get("content") or "").strip()
|
||||
elif hasattr(text, "content"):
|
||||
content = str(text.content or "").strip()
|
||||
else:
|
||||
content = str(text).strip()
|
||||
|
||||
# Fall back to rich text if present
|
||||
if not content:
|
||||
rich_text = getattr(message, "rich_text", None)
|
||||
if rich_text and isinstance(rich_text, list):
|
||||
parts = [item["text"] for item in rich_text
|
||||
rich_list = None
|
||||
rtc = getattr(message, "rich_text_content", None)
|
||||
if rtc is not None and hasattr(rtc, "rich_text_list"):
|
||||
rich_list = rtc.rich_text_list
|
||||
if rich_list is None:
|
||||
rich_list = getattr(message, "rich_text", None)
|
||||
if rich_list and isinstance(rich_list, list):
|
||||
parts = [item["text"] for item in rich_list
|
||||
if isinstance(item, dict) and item.get("text")]
|
||||
content = " ".join(parts).strip()
|
||||
return content
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ AUTHOR_MAP = {
|
|||
"xaydinoktay@gmail.com": "aydnOktay",
|
||||
"abdullahfarukozden@gmail.com": "Farukest",
|
||||
"lovre.pesut@gmail.com": "rovle",
|
||||
"kevinskysunny@gmail.com": "kevinskysunny",
|
||||
"hakanerten02@hotmail.com": "teyrebaz33",
|
||||
"ruzzgarcn@gmail.com": "Ruzzgar",
|
||||
"alireza78.crypto@gmail.com": "alireza78a",
|
||||
|
|
|
|||
|
|
@ -273,3 +273,133 @@ class TestPlatformEnum:
|
|||
|
||||
def test_dingtalk_in_platform_enum(self):
|
||||
assert Platform.DINGTALK.value == "dingtalk"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SDK compatibility regression tests (dingtalk-stream >= 0.20 / 0.24)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestWebhookDomainAllowlist:
|
||||
"""Guard the webhook origin allowlist against regression.
|
||||
|
||||
The SDK started returning reply webhooks on ``oapi.dingtalk.com`` in
|
||||
addition to ``api.dingtalk.com``. Both must be accepted, and hostile
|
||||
lookalikes must still be rejected (SSRF defence-in-depth).
|
||||
"""
|
||||
|
||||
def test_api_domain_accepted(self):
|
||||
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
|
||||
assert _DINGTALK_WEBHOOK_RE.match(
|
||||
"https://api.dingtalk.com/robot/send?access_token=x"
|
||||
)
|
||||
|
||||
def test_oapi_domain_accepted(self):
|
||||
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
|
||||
assert _DINGTALK_WEBHOOK_RE.match(
|
||||
"https://oapi.dingtalk.com/robot/send?access_token=x"
|
||||
)
|
||||
|
||||
def test_http_rejected(self):
|
||||
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
|
||||
assert not _DINGTALK_WEBHOOK_RE.match("http://api.dingtalk.com/robot/send")
|
||||
|
||||
def test_suffix_attack_rejected(self):
|
||||
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
|
||||
assert not _DINGTALK_WEBHOOK_RE.match(
|
||||
"https://api.dingtalk.com.evil.example/"
|
||||
)
|
||||
|
||||
def test_unsanctioned_subdomain_rejected(self):
|
||||
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
|
||||
# Only api.* and oapi.* are allowed — e.g. eapi.dingtalk.com must not slip through
|
||||
assert not _DINGTALK_WEBHOOK_RE.match("https://eapi.dingtalk.com/robot/send")
|
||||
|
||||
|
||||
class TestHandlerProcessIsAsync:
|
||||
"""dingtalk-stream >= 0.20 requires ``process`` to be a coroutine."""
|
||||
|
||||
def test_process_is_coroutine_function(self):
|
||||
from gateway.platforms.dingtalk import _IncomingHandler
|
||||
assert asyncio.iscoroutinefunction(_IncomingHandler.process)
|
||||
|
||||
|
||||
class TestExtractText:
|
||||
"""_extract_text must handle both legacy and current SDK payload shapes.
|
||||
|
||||
Before SDK 0.20 ``message.text`` was a ``dict`` with a ``content`` key.
|
||||
From 0.20 onward it is a ``TextContent`` dataclass whose ``__str__``
|
||||
returns ``"TextContent(content=...)"`` — falling back to ``str(text)``
|
||||
leaks that repr into the agent's input.
|
||||
"""
|
||||
|
||||
def test_text_as_dict_legacy(self):
|
||||
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||
msg = MagicMock()
|
||||
msg.text = {"content": "hello world"}
|
||||
msg.rich_text_content = None
|
||||
msg.rich_text = None
|
||||
assert DingTalkAdapter._extract_text(msg) == "hello world"
|
||||
|
||||
def test_text_as_textcontent_object(self):
|
||||
"""SDK >= 0.20 shape: object with ``.content`` attribute."""
|
||||
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||
|
||||
class FakeTextContent:
|
||||
content = "hello from new sdk"
|
||||
|
||||
def __str__(self): # mimic real SDK repr
|
||||
return f"TextContent(content={self.content})"
|
||||
|
||||
msg = MagicMock()
|
||||
msg.text = FakeTextContent()
|
||||
msg.rich_text_content = None
|
||||
msg.rich_text = None
|
||||
result = DingTalkAdapter._extract_text(msg)
|
||||
assert result == "hello from new sdk"
|
||||
assert "TextContent(" not in result
|
||||
|
||||
def test_text_content_attr_with_empty_string(self):
|
||||
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||
|
||||
class FakeTextContent:
|
||||
content = ""
|
||||
|
||||
msg = MagicMock()
|
||||
msg.text = FakeTextContent()
|
||||
msg.rich_text_content = None
|
||||
msg.rich_text = None
|
||||
assert DingTalkAdapter._extract_text(msg) == ""
|
||||
|
||||
def test_rich_text_content_new_shape(self):
|
||||
"""SDK >= 0.20 exposes rich text as ``message.rich_text_content.rich_text_list``."""
|
||||
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||
|
||||
class FakeRichText:
|
||||
rich_text_list = [{"text": "hello "}, {"text": "world"}]
|
||||
|
||||
msg = MagicMock()
|
||||
msg.text = None
|
||||
msg.rich_text_content = FakeRichText()
|
||||
msg.rich_text = None
|
||||
result = DingTalkAdapter._extract_text(msg)
|
||||
assert "hello" in result and "world" in result
|
||||
|
||||
def test_rich_text_legacy_shape(self):
|
||||
"""Legacy ``message.rich_text`` list remains supported."""
|
||||
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||
msg = MagicMock()
|
||||
msg.text = None
|
||||
msg.rich_text_content = None
|
||||
msg.rich_text = [{"text": "legacy "}, {"text": "rich"}]
|
||||
result = DingTalkAdapter._extract_text(msg)
|
||||
assert "legacy" in result and "rich" in result
|
||||
|
||||
def test_empty_message(self):
|
||||
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||
msg = MagicMock()
|
||||
msg.text = None
|
||||
msg.rich_text_content = None
|
||||
msg.rich_text = None
|
||||
assert DingTalkAdapter._extract_text(msg) == ""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue