hermes-agent/gateway/platforms/dingtalk.py
kagura-agent 47a0dd1024 fix(dingtalk): fire-and-forget message processing & session_webhook fallback
Fixes #11463: DingTalk channel receives messages but fails to reply
with 'No session_webhook available'.

Two changes:

1. **Fire-and-forget message processing**: process() now dispatches
   _on_message as a background task via asyncio.create_task instead of
   awaiting it. This ensures the SDK ACK is returned immediately,
   preventing heartbeat timeouts and disconnections when message
   processing takes longer than the SDK's ACK deadline.

2. **session_webhook extraction fallback**: If ChatbotMessage.from_dict()
   fails to map the sessionWebhook field (possible across SDK versions),
   the handler now falls back to extracting it directly from the raw
   callback data dict using both 'sessionWebhook' and 'session_webhook'
   key variants.

Added 3 tests covering webhook extraction, fallback behavior, and
fire-and-forget ACK timing.
2026-04-17 06:26:18 -07:00

521 lines
21 KiB
Python

"""
DingTalk platform adapter using Stream Mode.
Uses dingtalk-stream SDK for real-time message reception without webhooks.
Responses are sent via DingTalk's session webhook (markdown format).
Requires:
pip install dingtalk-stream httpx
DINGTALK_CLIENT_ID and DINGTALK_CLIENT_SECRET env vars
Configuration in config.yaml:
platforms:
dingtalk:
enabled: true
# Optional group-chat gating (mirrors Slack/Telegram/Discord):
require_mention: true # or DINGTALK_REQUIRE_MENTION env var
# free_response_chats: # conversations that skip require_mention
# - cidABC==
# mention_patterns: # regex wake-words (e.g. Chinese bot names)
# - "^小马"
# allowed_users: # staff_id or sender_id list; "*" = any
# - "manager1234"
extra:
client_id: "your-app-key" # or DINGTALK_CLIENT_ID env var
client_secret: "your-secret" # or DINGTALK_CLIENT_SECRET env var
"""
import asyncio
import json
import logging
import os
import re
import uuid
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Set
try:
import dingtalk_stream
from dingtalk_stream import ChatbotHandler, ChatbotMessage
DINGTALK_STREAM_AVAILABLE = True
except ImportError:
DINGTALK_STREAM_AVAILABLE = False
dingtalk_stream = None # type: ignore[assignment]
try:
import httpx
HTTPX_AVAILABLE = True
except ImportError:
HTTPX_AVAILABLE = False
httpx = None # type: ignore[assignment]
from gateway.config import Platform, PlatformConfig
from gateway.platforms.helpers import MessageDeduplicator
from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
MessageType,
SendResult,
)
logger = logging.getLogger(__name__)
MAX_MESSAGE_LENGTH = 20000
RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
_SESSION_WEBHOOKS_MAX = 500
_DINGTALK_WEBHOOK_RE = re.compile(r'^https://(?:api|oapi)\.dingtalk\.com/')
def check_dingtalk_requirements() -> bool:
"""Check if DingTalk dependencies are available and configured."""
if not DINGTALK_STREAM_AVAILABLE or not HTTPX_AVAILABLE:
return False
if not os.getenv("DINGTALK_CLIENT_ID") or not os.getenv("DINGTALK_CLIENT_SECRET"):
return False
return True
class DingTalkAdapter(BasePlatformAdapter):
"""DingTalk chatbot adapter using Stream Mode.
The dingtalk-stream SDK maintains a long-lived WebSocket connection.
Incoming messages arrive via a ChatbotHandler callback. Replies are
sent via the incoming message's session_webhook URL using httpx.
"""
MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.DINGTALK)
extra = config.extra or {}
self._client_id: str = extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID", "")
self._client_secret: str = extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET", "")
self._stream_client: Any = None
self._stream_task: Optional[asyncio.Task] = None
self._http_client: Optional["httpx.AsyncClient"] = None
# Message deduplication
self._dedup = MessageDeduplicator(max_size=1000)
# Map chat_id -> session_webhook for reply routing
self._session_webhooks: Dict[str, str] = {}
# Group-chat gating (mirrors Slack/Telegram/Discord/WhatsApp conventions)
self._mention_patterns: List[re.Pattern] = self._compile_mention_patterns()
self._allowed_users: Set[str] = self._load_allowed_users()
# -- Connection lifecycle -----------------------------------------------
async def connect(self) -> bool:
"""Connect to DingTalk via Stream Mode."""
if not DINGTALK_STREAM_AVAILABLE:
logger.warning("[%s] dingtalk-stream not installed. Run: pip install dingtalk-stream", self.name)
return False
if not HTTPX_AVAILABLE:
logger.warning("[%s] httpx not installed. Run: pip install httpx", self.name)
return False
if not self._client_id or not self._client_secret:
logger.warning("[%s] DINGTALK_CLIENT_ID and DINGTALK_CLIENT_SECRET required", self.name)
return False
try:
self._http_client = httpx.AsyncClient(timeout=30.0)
credential = dingtalk_stream.Credential(self._client_id, self._client_secret)
self._stream_client = dingtalk_stream.DingTalkStreamClient(credential)
# Capture the current event loop for cross-thread dispatch
loop = asyncio.get_running_loop()
handler = _IncomingHandler(self, loop)
self._stream_client.register_callback_handler(
dingtalk_stream.ChatbotMessage.TOPIC, handler
)
self._stream_task = asyncio.create_task(self._run_stream())
self._mark_connected()
logger.info("[%s] Connected via Stream Mode", self.name)
return True
except Exception as e:
logger.error("[%s] Failed to connect: %s", self.name, e)
return False
async def _run_stream(self) -> None:
"""Run the stream client with auto-reconnection."""
backoff_idx = 0
while self._running:
try:
logger.debug("[%s] Starting stream client...", self.name)
await self._stream_client.start()
except asyncio.CancelledError:
return
except Exception as e:
if not self._running:
return
logger.warning("[%s] Stream client error: %s", self.name, e)
if not self._running:
return
delay = RECONNECT_BACKOFF[min(backoff_idx, len(RECONNECT_BACKOFF) - 1)]
logger.info("[%s] Reconnecting in %ds...", self.name, delay)
await asyncio.sleep(delay)
backoff_idx += 1
async def disconnect(self) -> None:
"""Disconnect from DingTalk."""
self._running = False
self._mark_disconnected()
websocket = getattr(self._stream_client, "websocket", None)
if websocket is not None:
try:
await websocket.close()
except Exception as e:
logger.debug("[%s] websocket close during disconnect failed: %s", self.name, e)
if self._stream_task:
self._stream_task.cancel()
try:
await asyncio.wait_for(self._stream_task, timeout=2.0)
except (asyncio.CancelledError, asyncio.TimeoutError):
logger.debug("[%s] stream task did not exit cleanly during disconnect", self.name)
self._stream_task = None
if self._http_client:
await self._http_client.aclose()
self._http_client = None
self._stream_client = None
self._session_webhooks.clear()
self._dedup.clear()
logger.info("[%s] Disconnected", self.name)
# -- Group gating --------------------------------------------------------
def _dingtalk_require_mention(self) -> bool:
"""Return whether group chats should require an explicit bot trigger."""
configured = self.config.extra.get("require_mention")
if configured is not None:
if isinstance(configured, str):
return configured.lower() in ("true", "1", "yes", "on")
return bool(configured)
return os.getenv("DINGTALK_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on")
def _dingtalk_free_response_chats(self) -> Set[str]:
raw = self.config.extra.get("free_response_chats")
if raw is None:
raw = os.getenv("DINGTALK_FREE_RESPONSE_CHATS", "")
if isinstance(raw, list):
return {str(part).strip() for part in raw if str(part).strip()}
return {part.strip() for part in str(raw).split(",") if part.strip()}
def _compile_mention_patterns(self) -> List[re.Pattern]:
"""Compile optional regex wake-word patterns for group triggers."""
patterns = self.config.extra.get("mention_patterns") if self.config.extra else None
if patterns is None:
raw = os.getenv("DINGTALK_MENTION_PATTERNS", "").strip()
if raw:
try:
loaded = json.loads(raw)
except Exception:
loaded = [part.strip() for part in raw.splitlines() if part.strip()]
if not loaded:
loaded = [part.strip() for part in raw.split(",") if part.strip()]
patterns = loaded
if patterns is None:
return []
if isinstance(patterns, str):
patterns = [patterns]
if not isinstance(patterns, list):
logger.warning(
"[%s] dingtalk mention_patterns must be a list or string; got %s",
self.name,
type(patterns).__name__,
)
return []
compiled: List[re.Pattern] = []
for pattern in patterns:
if not isinstance(pattern, str) or not pattern.strip():
continue
try:
compiled.append(re.compile(pattern, re.IGNORECASE))
except re.error as exc:
logger.warning("[%s] Invalid DingTalk mention pattern %r: %s", self.name, pattern, exc)
if compiled:
logger.info("[%s] Loaded %d DingTalk mention pattern(s)", self.name, len(compiled))
return compiled
def _load_allowed_users(self) -> Set[str]:
"""Load allowed-users list from config.extra or env var.
IDs are matched case-insensitively against the sender's ``staff_id`` and
``sender_id``. A wildcard ``*`` disables the check.
"""
raw = self.config.extra.get("allowed_users") if self.config.extra else None
if raw is None:
raw = os.getenv("DINGTALK_ALLOWED_USERS", "")
if isinstance(raw, list):
items = [str(part).strip() for part in raw if str(part).strip()]
else:
items = [part.strip() for part in str(raw).split(",") if part.strip()]
return {item.lower() for item in items}
def _is_user_allowed(self, sender_id: str, sender_staff_id: str) -> bool:
if not self._allowed_users or "*" in self._allowed_users:
return True
candidates = {(sender_id or "").lower(), (sender_staff_id or "").lower()}
candidates.discard("")
return bool(candidates & self._allowed_users)
def _message_mentions_bot(self, message: "ChatbotMessage") -> bool:
"""True if the bot was @-mentioned in a group message.
dingtalk-stream sets ``is_in_at_list`` on the incoming ChatbotMessage
when the bot is addressed via @-mention.
"""
return bool(getattr(message, "is_in_at_list", False))
def _message_matches_mention_patterns(self, text: str) -> bool:
if not text or not self._mention_patterns:
return False
return any(pattern.search(text) for pattern in self._mention_patterns)
def _should_process_message(self, message: "ChatbotMessage", text: str, is_group: bool, chat_id: str) -> bool:
"""Apply DingTalk group trigger rules.
DMs remain unrestricted (subject to ``allowed_users`` which is enforced
earlier). Group messages are accepted when:
- the chat is explicitly allowlisted in ``free_response_chats``
- ``require_mention`` is disabled
- the bot is @mentioned (``is_in_at_list``)
- the text matches a configured regex wake-word pattern
"""
if not is_group:
return True
if chat_id and chat_id in self._dingtalk_free_response_chats():
return True
if not self._dingtalk_require_mention():
return True
if self._message_mentions_bot(message):
return True
return self._message_matches_mention_patterns(text)
# -- Inbound message processing -----------------------------------------
async def _on_message(self, message: "ChatbotMessage") -> None:
"""Process an incoming DingTalk chatbot message."""
msg_id = getattr(message, "message_id", None) or uuid.uuid4().hex
if self._dedup.is_duplicate(msg_id):
logger.debug("[%s] Duplicate message %s, skipping", self.name, msg_id)
return
text = self._extract_text(message)
if not text:
logger.debug("[%s] Empty message, skipping", self.name)
return
# Chat context
conversation_id = getattr(message, "conversation_id", "") or ""
conversation_type = getattr(message, "conversation_type", "1")
is_group = str(conversation_type) == "2"
sender_id = getattr(message, "sender_id", "") or ""
sender_nick = getattr(message, "sender_nick", "") or sender_id
sender_staff_id = getattr(message, "sender_staff_id", "") or ""
chat_id = conversation_id or sender_id
chat_type = "group" if is_group else "dm"
# Allowed-users gate (applies to both DM and group)
if not self._is_user_allowed(sender_id, sender_staff_id):
logger.debug(
"[%s] Dropping message from non-allowlisted user staff_id=%s sender_id=%s",
self.name, sender_staff_id, sender_id,
)
return
# Group mention/pattern gate
if not self._should_process_message(message, text, is_group, chat_id):
logger.debug(
"[%s] Dropping group message that failed mention gate message_id=%s chat_id=%s",
self.name, msg_id, chat_id,
)
return
# Store session webhook for reply routing (validate origin to prevent SSRF)
session_webhook = getattr(message, "session_webhook", None) or ""
if session_webhook and chat_id and _DINGTALK_WEBHOOK_RE.match(session_webhook):
if len(self._session_webhooks) >= _SESSION_WEBHOOKS_MAX:
# Evict oldest entry to cap memory growth
try:
self._session_webhooks.pop(next(iter(self._session_webhooks)))
except StopIteration:
pass
self._session_webhooks[chat_id] = session_webhook
source = self.build_source(
chat_id=chat_id,
chat_name=getattr(message, "conversation_title", None),
chat_type=chat_type,
user_id=sender_id,
user_name=sender_nick,
user_id_alt=sender_staff_id if sender_staff_id else None,
)
# Parse timestamp
create_at = getattr(message, "create_at", None)
try:
timestamp = datetime.fromtimestamp(int(create_at) / 1000, tz=timezone.utc) if create_at else datetime.now(tz=timezone.utc)
except (ValueError, OSError, TypeError):
timestamp = datetime.now(tz=timezone.utc)
event = MessageEvent(
text=text,
message_type=MessageType.TEXT,
source=source,
message_id=msg_id,
raw_message=message,
timestamp=timestamp,
)
logger.debug("[%s] Message from %s in %s: %s",
self.name, sender_nick, chat_id[:20] if chat_id else "?", text[:50])
await self.handle_message(event)
@staticmethod
def _extract_text(message: "ChatbotMessage") -> str:
"""Extract plain text from a DingTalk chatbot message.
Handles both legacy and current dingtalk-stream SDK payload shapes:
* legacy: ``message.text`` was a dict ``{"content": "..."}``
* >= 0.20: ``message.text`` is a ``TextContent`` dataclass whose
``__str__`` returns ``"TextContent(content=...)"`` — never fall
back to ``str(text)`` without extracting ``.content`` first.
* rich text moved from ``message.rich_text`` (list) to
``message.rich_text_content.rich_text_list`` (list of dicts).
"""
text = getattr(message, "text", None)
content = ""
if text is not None:
if isinstance(text, dict):
content = (text.get("content") or "").strip()
elif hasattr(text, "content"):
content = str(text.content or "").strip()
else:
content = str(text).strip()
if not content:
rich_list = None
rtc = getattr(message, "rich_text_content", None)
if rtc is not None and hasattr(rtc, "rich_text_list"):
rich_list = rtc.rich_text_list
if rich_list is None:
rich_list = getattr(message, "rich_text", None)
if rich_list and isinstance(rich_list, list):
parts = [item["text"] for item in rich_list
if isinstance(item, dict) and item.get("text")]
content = " ".join(parts).strip()
return content
# -- Outbound messaging -------------------------------------------------
async def send(
self,
chat_id: str,
content: str,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send a markdown reply via DingTalk session webhook."""
metadata = metadata or {}
session_webhook = metadata.get("session_webhook") or self._session_webhooks.get(chat_id)
if not session_webhook:
return SendResult(success=False,
error="No session_webhook available. Reply must follow an incoming message.")
if not self._http_client:
return SendResult(success=False, error="HTTP client not initialized")
payload = {
"msgtype": "markdown",
"markdown": {"title": "Hermes", "text": content[:self.MAX_MESSAGE_LENGTH]},
}
try:
resp = await self._http_client.post(session_webhook, json=payload, timeout=15.0)
if resp.status_code < 300:
return SendResult(success=True, message_id=uuid.uuid4().hex[:12])
body = resp.text
logger.warning("[%s] Send failed HTTP %d: %s", self.name, resp.status_code, body[:200])
return SendResult(success=False, error=f"HTTP {resp.status_code}: {body[:200]}")
except httpx.TimeoutException:
return SendResult(success=False, error="Timeout sending message to DingTalk")
except Exception as e:
logger.error("[%s] Send error: %s", self.name, e)
return SendResult(success=False, error=str(e))
async def send_typing(self, chat_id: str, metadata=None) -> None:
"""DingTalk does not support typing indicators."""
pass
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
"""Return basic info about a DingTalk conversation."""
return {"name": chat_id, "type": "group" if "group" in chat_id.lower() else "dm"}
# ---------------------------------------------------------------------------
# Internal stream handler
# ---------------------------------------------------------------------------
class _IncomingHandler(ChatbotHandler if DINGTALK_STREAM_AVAILABLE else object):
"""dingtalk-stream ChatbotHandler that forwards messages to the adapter."""
def __init__(self, adapter: DingTalkAdapter, loop: asyncio.AbstractEventLoop):
if DINGTALK_STREAM_AVAILABLE:
super().__init__()
self._adapter = adapter
self._loop = loop
async def process(self, callback_message):
"""Called by dingtalk-stream when a message arrives.
dingtalk-stream >= 0.24 passes a CallbackMessage whose `.data` contains
the chatbot payload. Convert it to ChatbotMessage via
``ChatbotMessage.from_dict()``.
Message processing is dispatched as a background task so that this
method returns the ACK immediately — blocking here would prevent the
SDK from sending heartbeats, eventually causing a disconnect.
"""
try:
data = callback_message.data
chatbot_msg = ChatbotMessage.from_dict(data)
# Ensure session_webhook is populated even if the SDK's
# from_dict() did not map it (field name mismatch across
# SDK versions).
if not getattr(chatbot_msg, "session_webhook", None):
webhook = (
data.get("sessionWebhook")
or data.get("session_webhook")
or ""
)
if webhook:
chatbot_msg.session_webhook = webhook
# Fire-and-forget: return ACK immediately, process in background.
asyncio.create_task(self._safe_on_message(chatbot_msg))
except Exception:
logger.exception("[DingTalk] Error preparing incoming message")
return dingtalk_stream.AckMessage.STATUS_OK, "OK"
async def _safe_on_message(self, chatbot_msg: "ChatbotMessage") -> None:
"""Wrapper that catches exceptions from _on_message."""
try:
await self._adapter._on_message(chatbot_msg)
except Exception:
logger.exception("[DingTalk] Error processing incoming message")