fix(dingtalk): repair _extract_text for dingtalk-stream >= 0.20 SDK shape

The cherry-picked SDK compat fix (previous commit) wired process() to
parse CallbackMessage.data into a ChatbotMessage, but _extract_text()
was still written against the pre-0.20 payload shape:

  * message.text changed from dict {content: ...} → TextContent object.
    The old code's str(text) fallback produced 'TextContent(content=...)'
    as the agent's input, so every received message came in mangled.
  * rich_text moved from message.rich_text (list) to
    message.rich_text_content.rich_text_list.

This preserves legacy fallbacks (dict-shaped text, bare rich_text list)
while handling the current SDK layout via hasattr(text, 'content').

Adds regression tests covering:
  * webhook domain allowlist (api.*, oapi.*, and hostile lookalikes)
  * _IncomingHandler.process is a coroutine function
  * _extract_text against TextContent object, dict, rich_text_content,
    legacy rich_text, and empty-message cases

Also adds kevinskysunny to scripts/release.py AUTHOR_MAP (release CI
blocks unmapped emails).
This commit is contained in:
Teknium 2026-04-17 00:38:16 -07:00 committed by Teknium
parent c3d2895b18
commit 3438d274f6
3 changed files with 158 additions and 10 deletions

View file

@ -238,18 +238,35 @@ class DingTalkAdapter(BasePlatformAdapter):
@staticmethod
def _extract_text(message: "ChatbotMessage") -> str:
"""Extract plain text from a DingTalk chatbot message."""
text = getattr(message, "text", None) or ""
if isinstance(text, dict):
content = text.get("content", "").strip()
else:
content = str(text).strip()
"""Extract plain text from a DingTalk chatbot message.
Handles both legacy and current dingtalk-stream SDK payload shapes:
* legacy: ``message.text`` was a dict ``{"content": "..."}``
* >= 0.20: ``message.text`` is a ``TextContent`` dataclass whose
``__str__`` returns ``"TextContent(content=...)"`` never fall
back to ``str(text)`` without extracting ``.content`` first.
* rich text moved from ``message.rich_text`` (list) to
``message.rich_text_content.rich_text_list`` (list of dicts).
"""
text = getattr(message, "text", None)
content = ""
if text is not None:
if isinstance(text, dict):
content = (text.get("content") or "").strip()
elif hasattr(text, "content"):
content = str(text.content or "").strip()
else:
content = str(text).strip()
# Fall back to rich text if present
if not content:
rich_text = getattr(message, "rich_text", None)
if rich_text and isinstance(rich_text, list):
parts = [item["text"] for item in rich_text
rich_list = None
rtc = getattr(message, "rich_text_content", None)
if rtc is not None and hasattr(rtc, "rich_text_list"):
rich_list = rtc.rich_text_list
if rich_list is None:
rich_list = getattr(message, "rich_text", None)
if rich_list and isinstance(rich_list, list):
parts = [item["text"] for item in rich_list
if isinstance(item, dict) and item.get("text")]
content = " ".join(parts).strip()
return content

View file

@ -82,6 +82,7 @@ AUTHOR_MAP = {
"xaydinoktay@gmail.com": "aydnOktay",
"abdullahfarukozden@gmail.com": "Farukest",
"lovre.pesut@gmail.com": "rovle",
"kevinskysunny@gmail.com": "kevinskysunny",
"hakanerten02@hotmail.com": "teyrebaz33",
"ruzzgarcn@gmail.com": "Ruzzgar",
"alireza78.crypto@gmail.com": "alireza78a",

View file

@ -273,3 +273,133 @@ class TestPlatformEnum:
def test_dingtalk_in_platform_enum(self):
assert Platform.DINGTALK.value == "dingtalk"
# ---------------------------------------------------------------------------
# SDK compatibility regression tests (dingtalk-stream >= 0.20 / 0.24)
# ---------------------------------------------------------------------------
class TestWebhookDomainAllowlist:
"""Guard the webhook origin allowlist against regression.
The SDK started returning reply webhooks on ``oapi.dingtalk.com`` in
addition to ``api.dingtalk.com``. Both must be accepted, and hostile
lookalikes must still be rejected (SSRF defence-in-depth).
"""
def test_api_domain_accepted(self):
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
assert _DINGTALK_WEBHOOK_RE.match(
"https://api.dingtalk.com/robot/send?access_token=x"
)
def test_oapi_domain_accepted(self):
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
assert _DINGTALK_WEBHOOK_RE.match(
"https://oapi.dingtalk.com/robot/send?access_token=x"
)
def test_http_rejected(self):
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
assert not _DINGTALK_WEBHOOK_RE.match("http://api.dingtalk.com/robot/send")
def test_suffix_attack_rejected(self):
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
assert not _DINGTALK_WEBHOOK_RE.match(
"https://api.dingtalk.com.evil.example/"
)
def test_unsanctioned_subdomain_rejected(self):
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
# Only api.* and oapi.* are allowed — e.g. eapi.dingtalk.com must not slip through
assert not _DINGTALK_WEBHOOK_RE.match("https://eapi.dingtalk.com/robot/send")
class TestHandlerProcessIsAsync:
"""dingtalk-stream >= 0.20 requires ``process`` to be a coroutine."""
def test_process_is_coroutine_function(self):
from gateway.platforms.dingtalk import _IncomingHandler
assert asyncio.iscoroutinefunction(_IncomingHandler.process)
class TestExtractText:
"""_extract_text must handle both legacy and current SDK payload shapes.
Before SDK 0.20 ``message.text`` was a ``dict`` with a ``content`` key.
From 0.20 onward it is a ``TextContent`` dataclass whose ``__str__``
returns ``"TextContent(content=...)"`` falling back to ``str(text)``
leaks that repr into the agent's input.
"""
def test_text_as_dict_legacy(self):
from gateway.platforms.dingtalk import DingTalkAdapter
msg = MagicMock()
msg.text = {"content": "hello world"}
msg.rich_text_content = None
msg.rich_text = None
assert DingTalkAdapter._extract_text(msg) == "hello world"
def test_text_as_textcontent_object(self):
"""SDK >= 0.20 shape: object with ``.content`` attribute."""
from gateway.platforms.dingtalk import DingTalkAdapter
class FakeTextContent:
content = "hello from new sdk"
def __str__(self): # mimic real SDK repr
return f"TextContent(content={self.content})"
msg = MagicMock()
msg.text = FakeTextContent()
msg.rich_text_content = None
msg.rich_text = None
result = DingTalkAdapter._extract_text(msg)
assert result == "hello from new sdk"
assert "TextContent(" not in result
def test_text_content_attr_with_empty_string(self):
from gateway.platforms.dingtalk import DingTalkAdapter
class FakeTextContent:
content = ""
msg = MagicMock()
msg.text = FakeTextContent()
msg.rich_text_content = None
msg.rich_text = None
assert DingTalkAdapter._extract_text(msg) == ""
def test_rich_text_content_new_shape(self):
"""SDK >= 0.20 exposes rich text as ``message.rich_text_content.rich_text_list``."""
from gateway.platforms.dingtalk import DingTalkAdapter
class FakeRichText:
rich_text_list = [{"text": "hello "}, {"text": "world"}]
msg = MagicMock()
msg.text = None
msg.rich_text_content = FakeRichText()
msg.rich_text = None
result = DingTalkAdapter._extract_text(msg)
assert "hello" in result and "world" in result
def test_rich_text_legacy_shape(self):
"""Legacy ``message.rich_text`` list remains supported."""
from gateway.platforms.dingtalk import DingTalkAdapter
msg = MagicMock()
msg.text = None
msg.rich_text_content = None
msg.rich_text = [{"text": "legacy "}, {"text": "rich"}]
result = DingTalkAdapter._extract_text(msg)
assert "legacy" in result and "rich" in result
def test_empty_message(self):
from gateway.platforms.dingtalk import DingTalkAdapter
msg = MagicMock()
msg.text = None
msg.rich_text_content = None
msg.rich_text = None
assert DingTalkAdapter._extract_text(msg) == ""