fix(discord): hydrate channel context when replying to a message (#49212)

* fix(discord): hydrate channel context when replying to a message

Replying to a message in a free-response (non-mention, threads-off)
channel previously received only the 500-char "[Replying to: ...]"
snippet — the history-backfill gate fired only for mention-gated
channels and threads, so a reply got no surrounding channel context.

Replies now route through the same _fetch_channel_context hydration
that threads use. When the user replied to a specific (often older)
message, a reply-anchored window is scanned ending at that message so
the agent sees the exchange around what was pointed at, even when the
target sits before the self-message partition. The two windows are
merged chronologically and de-duplicated by message id.

Also hardens the recent-window scan to skip non-conversational status
bumps before the self-message partition check, and makes author-name
resolution defensive against partial/deleted authors.

* fix(discord): duck-type reply-target resolution instead of isinstance(discord.Message)

The e2e suite stubs the discord module, so discord.Message is a MagicMock
and isinstance(_resolved, discord.Message) raises 'isinstance() arg 2 must
be a type'. Any object with an int .id works as a scan anchor, so resolve
the reply target by duck-typing on .id and fall back to a _Snowflake from
the reference message_id.
This commit is contained in:
Teknium 2026-06-19 12:03:08 -07:00 committed by GitHub
parent 16642e2769
commit ba49fb51a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 299 additions and 30 deletions

View file

@ -26,6 +26,19 @@ from typing import Callable, Dict, List, Optional, Any, Tuple
logger = logging.getLogger(__name__)
class _Snowflake:
"""Minimal object exposing ``.id`` — satisfies discord.py's Snowflake
protocol for ``channel.history(before=...)`` without constructing a
``discord.Object`` (which test doubles that stub the discord module
cannot build). Used to anchor reply-context scans inclusively.
"""
__slots__ = ("id",)
def __init__(self, id: int) -> None: # noqa: A002 - matches discord API
self.id = id
VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}
_DISCORD_COMMAND_SYNC_STATE_SUBDIR = "gateway"
@ -4255,6 +4268,7 @@ class DiscordAdapter(BasePlatformAdapter):
self,
channel: Any,
before: "DiscordMessage",
reply_target: Optional[Any] = None,
) -> str:
"""Fetch recent channel messages for conversational context.
@ -4262,6 +4276,13 @@ class DiscordAdapter(BasePlatformAdapter):
a message sent by this bot (the natural partition point between
bot turns) or reaches ``history_backfill_limit``.
When ``reply_target`` is provided (the user replied to a specific
message), a second backward scan is run ending at that target so the
agent sees the conversation surrounding what the user pointed at
even when the reply target sits *before* the most recent bot turn and
would otherwise be cut off by the self-message partition. The two
windows are merged chronologically and de-duplicated by message ID.
Returns a formatted block like::
[Recent channel messages]
@ -4295,7 +4316,47 @@ class DiscordAdapter(BasePlatformAdapter):
pass # Malformed cache entry — fall back to cold-start scan
try:
collected = []
def _keep(msg) -> Optional[str]:
"""Return a formatted ``[name] content`` line, or None to skip.
Encapsulates the system-message / non-conversational / other-bot
filtering so both the primary and reply-anchored scans apply
identical rules. Does NOT enforce the self-message partition
callers decide where to stop.
"""
if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
return None
content = getattr(msg, "clean_content", msg.content) or ""
if (
str(getattr(msg, "id", "")) in self._nonconversational_messages
or _looks_like_nonconversational_history_message(content)
):
return None
# Respect DISCORD_ALLOW_BOTS for other bots. For history
# context, "mentions" is treated as "all" — we are deciding
# what context to show, not whether to respond.
if (
getattr(msg.author, "bot", False)
and msg.author != self._client.user
and not include_other_bots
):
return None
if not content and msg.attachments:
content = "(attachment)"
if not content:
return None
name = (
getattr(msg.author, "display_name", None)
or getattr(msg.author, "name", None)
or "unknown"
)
if getattr(msg.author, "bot", False):
name = f"{name} [bot]"
return f"[{name}] {content}"
# ── Primary window: recent channel activity since the last bot turn ──
collected: List[Tuple[str, str]] = [] # (message_id, line)
seen_ids: set = set()
# IMPORTANT: pass oldest_first=False explicitly. discord.py 2.x
# silently flips the default to True when `after=` is supplied,
# which would select the *earliest* N messages after our last
@ -4309,45 +4370,89 @@ class DiscordAdapter(BasePlatformAdapter):
after=_after_obj,
oldest_first=False,
):
# Skip system messages (pins, joins, thread renames, etc.)
if msg.type not in {discord.MessageType.default, discord.MessageType.reply}:
continue
content = getattr(msg, "clean_content", msg.content) or ""
# Non-conversational lifecycle/status bumps (self-improvement
# reviews, background-process notices, restart banners) must be
# skipped BEFORE the partition check — otherwise a delayed
# status bump authored by us would be mistaken for the real
# last bot turn and hide messages that came after it.
_content = getattr(msg, "clean_content", msg.content) or ""
if (
str(getattr(msg, "id", "")) in self._nonconversational_messages
or _looks_like_nonconversational_history_message(content)
or _looks_like_nonconversational_history_message(_content)
):
continue
# Stop at our own message — this is the partition point.
# Everything before this is already in the session transcript.
# (Redundant when _after_obj is set, but needed for cold start.)
# Stop at our own (conversational) message — this is the
# partition point. Everything before this is already in the
# session transcript. (Redundant when _after_obj is set, but
# needed for cold start.)
if msg.author == self._client.user:
break
# Respect DISCORD_ALLOW_BOTS for other bots.
# For history context, "mentions" is treated as "all" — we are
# deciding what context to show, not whether to respond.
if getattr(msg.author, "bot", False) and not include_other_bots:
line = _keep(msg)
if line is None:
continue
mid = str(getattr(msg, "id", ""))
collected.append((mid, line))
if mid:
seen_ids.add(mid)
if not content and msg.attachments:
content = "(attachment)"
if not content:
continue
# ── Reply window: context around the message the user pointed at ──
# When the user replied to a specific message that sits BEFORE the
# primary window's partition point, the surrounding exchange isn't
# captured above. Fetch a small window ending just after the reply
# target so the agent sees what it was referencing. This window is
# NOT partitioned on the self-message boundary — the whole point is
# to surface older context the transcript lacks.
reply_collected: List[Tuple[str, str]] = []
reply_target_id = str(getattr(reply_target, "id", "")) if reply_target else ""
if reply_target is not None and reply_target_id and reply_target_id not in seen_ids:
# Reuse the same cap as the primary scan but keep the reply
# window modest — it's anchored context, not a full backfill.
reply_limit = max(1, min(limit, 10))
# `before` is exclusive in discord.py, so to *include* the
# target we anchor at target_id + 1. Use a minimal snowflake
# shim (any object exposing ``.id`` satisfies discord.py's
# Snowflake protocol) rather than discord.Object, so this path
# works under test doubles that stub the discord module too.
try:
_before_obj = _Snowflake(int(reply_target_id) + 1)
except (ValueError, TypeError):
_before_obj = before
async for msg in channel.history(
limit=reply_limit,
before=_before_obj,
oldest_first=False,
):
line = _keep(msg)
if line is None:
continue
mid = str(getattr(msg, "id", ""))
if mid and mid in seen_ids:
continue
reply_collected.append((mid, line))
if mid:
seen_ids.add(mid)
name = msg.author.display_name
if getattr(msg.author, "bot", False):
name = f"{name} [bot]"
collected.append(f"[{name}] {content}")
if not collected:
if not collected and not reply_collected:
return ""
# channel.history returns newest-first (oldest_first=False); reverse for chronological order
# channel.history returns newest-first; reverse each window for
# chronological order, then present reply context first (it is
# older) followed by the recent activity.
collected.reverse()
return "[Recent channel messages]\n" + "\n".join(collected)
reply_collected.reverse()
blocks: List[str] = []
if reply_collected:
blocks.append(
"[Context around the replied-to message]\n"
+ "\n".join(line for _id, line in reply_collected)
)
if collected:
blocks.append(
"[Recent channel messages]\n"
+ "\n".join(line for _id, line in collected)
)
return "\n\n".join(blocks)
except discord.Forbidden:
logger.debug("[%s] Missing permissions to fetch channel history", self.name)
@ -5381,14 +5486,40 @@ class DiscordAdapter(BasePlatformAdapter):
# - any thread (in_bot_thread bypasses the mention check, but
# processing-window gaps and post-restart context still need
# recovery)
# - any reply (the user pointed at a specific message; hydrate
# the context around it even in a free-response channel where
# no mention gap exists — otherwise replies get only the short
# "[Replying to: ...]" snippet with no surrounding context)
# DMs skip entirely because every DM message triggers the bot,
# so the session transcript already has everything.
# Auto-threaded messages also skip — we just created the thread,
# there's nothing prior to backfill.
_has_mention_gap = require_mention and not is_free_channel and not in_bot_thread
if (_has_mention_gap or is_thread) and auto_threaded_channel is None:
_is_reply = message.reference is not None
# Resolve the replied-to message into an object exposing ``.id``.
# discord.py may give us a full Message (resolved), a
# DeletedReferencedMessage, or nothing. Duck-type on ``.id``
# rather than isinstance(discord.Message) — under test doubles the
# discord module (and thus discord.Message) can be a mock, which is
# not a valid isinstance() second argument. Any object with an int
# id works as a scan anchor; otherwise fall back to a bare snowflake
# built from the reference's message_id.
_reply_target = None
if _is_reply:
_resolved = getattr(message.reference, "resolved", None)
_resolved_id = getattr(_resolved, "id", None) if _resolved is not None else None
if _resolved_id is not None:
_reply_target = _resolved
else:
_ref_mid = getattr(message.reference, "message_id", None)
if _ref_mid is not None:
with suppress(ValueError, TypeError):
_reply_target = _Snowflake(int(_ref_mid))
if (_has_mention_gap or is_thread or _is_reply) and auto_threaded_channel is None:
_backfill_text = await self._fetch_channel_context(
message.channel, before=message,
message.channel, before=message, reply_target=_reply_target,
)
if _backfill_text:
_channel_context = _backfill_text

View file

@ -27,6 +27,8 @@ def _ensure_discord_mock():
discord_mod.Color = SimpleNamespace(orange=lambda: 1, green=lambda: 2, blue=lambda: 3, red=lambda: 4, purple=lambda: 5)
discord_mod.Interaction = object
discord_mod.Embed = MagicMock
discord_mod.Object = lambda *, id: SimpleNamespace(id=id)
discord_mod.Message = type("Message", (), {})
discord_mod.app_commands = SimpleNamespace(
describe=lambda **kwargs: (lambda fn: fn),
choices=lambda **kwargs: (lambda fn: fn),
@ -721,6 +723,84 @@ async def test_fetch_channel_context_skips_self_improvement_boundary_message(ada
)
@pytest.mark.asyncio
async def test_fetch_channel_context_hydrates_around_reply_target(adapter, monkeypatch):
"""Replying to an older message pulls the surrounding exchange into context.
The reply target sits *before* the self-message partition point, so the
primary scan alone would miss it. The reply-anchored window must surface
the target and its neighbours under a distinct header, with the recent
activity still appearing afterwards.
"""
monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all")
adapter.config.extra["history_backfill_limit"] = 10
bot_user = adapter._client.user
human = SimpleNamespace(id=56, display_name="Alice", name="Alice", bot=False)
other = SimpleNamespace(id=58, display_name="Carol", name="Carol", bot=False)
channel = FakeHistoryChannel(
[
# Recent activity (after our last response, captured by primary scan)
make_history_message(author=human, content="latest note", msg_id=6),
make_history_message(author=bot_user, content="our prior response", msg_id=5),
# Older exchange — behind the partition, only reachable via reply anchor
make_history_message(author=bot_user, content="the bot answer being replied to", msg_id=3),
make_history_message(author=other, content="older question", msg_id=2),
make_history_message(author=human, content="even older", msg_id=1),
],
channel_id=123,
)
# User replied to the bot's older answer (msg_id=3).
reply_target = SimpleNamespace(id=3)
trigger = make_message(channel=channel, content="follow-up about that")
result = await adapter._fetch_channel_context(
channel, before=trigger, reply_target=reply_target,
)
# Reply context comes first (older), then recent activity. The reply
# window is NOT cut off at the self-message boundary, so msg_id=3 (a bot
# message) and its neighbours appear.
assert "[Context around the replied-to message]" in result
assert "the bot answer being replied to" in result
assert "older question" in result
assert "[Recent channel messages]" in result
assert "latest note" in result
assert result.index("[Context around the replied-to message]") < result.index("[Recent channel messages]")
@pytest.mark.asyncio
async def test_fetch_channel_context_reply_target_in_primary_window_not_duplicated(adapter, monkeypatch):
"""When the reply target is already in the recent window, don't double it."""
monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all")
adapter.config.extra["history_backfill_limit"] = 10
bot_user = adapter._client.user
human = SimpleNamespace(id=56, display_name="Alice", name="Alice", bot=False)
channel = FakeHistoryChannel(
[
make_history_message(author=human, content="recent reply target", msg_id=4),
make_history_message(author=human, content="another recent", msg_id=3),
make_history_message(author=bot_user, content="our prior response", msg_id=2),
],
channel_id=123,
)
reply_target = SimpleNamespace(id=4) # already inside the primary window
trigger = make_message(channel=channel, content="re: that")
result = await adapter._fetch_channel_context(
channel, before=trigger, reply_target=reply_target,
)
# No separate reply block, and the target text appears exactly once.
assert "[Context around the replied-to message]" not in result
assert result.count("recent reply target") == 1
def test_nonconversational_fallback_requires_self_improvement_emoji():
assert discord_platform._looks_like_nonconversational_history_message(
"💾 Self-improvement review: Memory updated"
@ -1016,3 +1096,61 @@ async def test_discord_auto_thread_skips_backfill(adapter, monkeypatch):
adapter._auto_create_thread.assert_awaited_once()
adapter._fetch_channel_context.assert_not_awaited()
@pytest.mark.asyncio
async def test_discord_reply_in_free_channel_triggers_backfill(adapter, monkeypatch):
"""Replying to a message hydrates context even in a free-response channel.
This is the gap the reply-context feature closes: with no mention
requirement there is no "mention gap", so the old gate skipped backfill
and a reply received only the short "[Replying to: ...]" snippet. A reply
must now route through _fetch_channel_context with the replied-to message
as the anchor.
"""
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false") # free-response
monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
adapter.config.extra["history_backfill"] = True
adapter._fetch_channel_context = AsyncMock(
return_value="[Context around the replied-to message]\n[Hermes [bot]] earlier answer"
)
message = make_message(channel=FakeTextChannel(channel_id=321), content="what about edge cases?")
# Simulate a Discord reply: reference points at an earlier message id.
message.reference = SimpleNamespace(message_id=42, resolved=None)
await adapter._handle_message(message)
adapter._fetch_channel_context.assert_awaited_once()
# The reply target is passed as the anchor, carrying the referenced id.
call = adapter._fetch_channel_context.await_args
assert getattr(call.kwargs.get("reply_target"), "id", None) == 42
event = adapter.handle_message.await_args.args[0]
assert event.channel_context == (
"[Context around the replied-to message]\n[Hermes [bot]] earlier answer"
)
@pytest.mark.asyncio
async def test_discord_non_reply_free_channel_skips_backfill(adapter, monkeypatch):
"""A plain (non-reply) message in a free-response channel still skips backfill.
Guards against the reply gate accidentally widening to every free-channel
message only replies (and the existing mention-gap / thread cases) should
hydrate context.
"""
monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
adapter.config.extra["history_backfill"] = True
adapter._fetch_channel_context = AsyncMock(return_value="[Recent channel messages]\n[Alice] noise")
message = make_message(channel=FakeTextChannel(channel_id=321), content="just chatting")
assert message.reference is None # not a reply
await adapter._handle_message(message)
adapter._fetch_channel_context.assert_not_awaited()