fix(telegram): stripped-text fallbacks, re-finalize skip, and tail-only delete guard

Follow-ups on top of the two salvaged GodsBoy commits, all live-validated
against the real Telegram Bot API:

- _edit_overflow_split finalize fallbacks degrade to _strip_mdv2() clean
  text instead of putting raw **markdown** markers on screen (salvaged
  from PR #43463 minus its format-first sizing — live probes show
  Telegram's 4096 limit counts PARSED text, so MarkdownV2 escape
  inflation cannot cause MESSAGE_TOO_LONG and sizing against formatted
  wire length only causes premature splits and fragment messages).
- Skip the redundant requires-finalize edit after a got_done edit that
  split-and-delivered (salvaged from PR #43463): re-finalizing re-splits
  the full text into the adopted continuation and duplicates chunks.
- _send_fallback_final only deletes the stale partial message when the
  fallback re-sent the COMPLETE final text. When the prefix dedup sent
  only the missing tail, the partial IS the head of the answer; deleting
  it left users with only the second half of long responses (live-
  reproduced: flood-control during a long stream -> head deleted,
  ratio 0.54 of content visible). This is the third bug behind the
  'Telegram cut messages' reports and was present on main and both PRs.
This commit is contained in:
Teknium 2026-06-10 13:32:13 -07:00
parent da818510ec
commit 3b4c715e1c
4 changed files with 156 additions and 15 deletions

View file

@ -2348,10 +2348,15 @@ class TelegramAdapter(BasePlatformAdapter):
)
except Exception as fmt_err:
if "not modified" not in str(fmt_err).lower():
logger.warning(
"[%s] Overflow split: MarkdownV2 first-chunk edit "
"failed, falling back to plain text: %s",
self.name, fmt_err,
)
await self._bot.edit_message_text(
chat_id=int(chat_id),
message_id=int(message_id),
text=first_chunk,
text=_strip_mdv2(first_chunk),
)
else:
await self._bot.edit_message_text(
@ -2393,7 +2398,14 @@ class TelegramAdapter(BasePlatformAdapter):
)
for use_markdown in (True, False) if finalize else (False,):
try:
text = self.format_message(chunk) if use_markdown else chunk
if use_markdown:
text = self.format_message(chunk)
else:
# Plain attempt: on finalize the MarkdownV2 attempt
# failed, so degrade to clean stripped text, never
# the raw chunk (raw ** / ``` markers would render
# literally); streaming previews stay raw.
text = _strip_mdv2(chunk) if finalize else chunk
sent_msg = await self._bot.send_message(
chat_id=int(chat_id),
text=text,
@ -2419,7 +2431,7 @@ class TelegramAdapter(BasePlatformAdapter):
try:
sent_msg = await self._bot.send_message(
chat_id=int(chat_id),
text=chunk,
text=_strip_mdv2(chunk) if finalize else chunk,
**retry_thread_kwargs,
**self._link_preview_kwargs(),
**self._notification_kwargs(metadata),

View file

@ -147,6 +147,9 @@ class GatewayStreamConsumer:
self._edit_supported = True # Disabled when progressive edits are no longer usable
self._last_edit_time = 0.0
self._last_sent_text = "" # Track last-sent text to skip redundant edits
# True when the most recent _send_or_edit split-and-delivered across
# continuation messages (the adapter adopted a new message id).
self._last_edit_overflowed = False
self._fallback_final_send = False
self._fallback_prefix = ""
# True when fallback is sending only the missing tail after a partial
@ -586,14 +589,20 @@ class GatewayStreamConsumer:
if self._accumulated:
if self._fallback_final_send:
await self._send_fallback_final(self._accumulated)
elif (
current_update_visible
and not self._adapter_requires_finalize
elif current_update_visible and (
not self._adapter_requires_finalize
or self._last_edit_overflowed
):
# Mid-stream edit above already delivered the
# final accumulated content. Skip the redundant
# final edit — but only for adapters that don't
# need an explicit finalize signal.
# final edit for adapters that don't need an
# explicit finalize signal, and for any adapter
# when that edit split-and-delivered across
# continuations: the split edit carried
# finalize=True itself, and re-finalizing with
# the full text would overflow-split again into
# the adopted continuation, duplicating chunks
# on screen.
self._final_response_sent = True
self._final_content_delivered = True
elif self._message_id:
@ -882,7 +891,12 @@ class GatewayStreamConsumer:
self._notify_new_message()
# Remove the frozen partial message so the user only sees the
# complete fallback response. Best-effort — if the platform doesn't
# complete fallback response. ONLY safe when the fallback re-sent
# the FULL final text (continuation == final_text). When the
# prefix-based dedup above sent only the missing TAIL, the partial
# message IS the head of the answer — deleting it leaves the user
# with only the last part of the response (the "Gemini sent only
# the second half" symptom). Best-effort — if the platform doesn't
# implement ``delete_message``, the delete fails (flood control still
# active, bot lacks permission, message too old to delete), the
# partial remains but at least the full answer was delivered.
@ -890,6 +904,7 @@ class GatewayStreamConsumer:
stale_message_id
and stale_message_id != last_message_id
and not self._fallback_preserve_partial_messages
and continuation == final_text
):
delete_fn = getattr(self.adapter, "delete_message", None)
if delete_fn is not None:
@ -1228,6 +1243,7 @@ class GatewayStreamConsumer:
return True
# Failure already disabled drafts for this run; fall through to
# the regular edit/send path below.
self._last_edit_overflowed = False
try:
if self._message_id is not None:
if self._edit_supported:
@ -1284,6 +1300,7 @@ class GatewayStreamConsumer:
and result.message_id
and result.message_id != self._message_id
):
self._last_edit_overflowed = True
self._message_id = str(result.message_id)
self._message_created_ts = time.monotonic()
self._last_sent_text = ""

View file

@ -794,9 +794,11 @@ class TestSegmentBreakOnToolBoundary:
)
@pytest.mark.asyncio
async def test_fallback_final_deletes_partial_after_chunks_succeed(self):
"""After fallback chunks land, the frozen partial must be deleted so
the user sees only the complete response (#16668)."""
async def test_fallback_final_deletes_partial_after_full_resend(self):
"""After fallback re-sends the COMPLETE response, the frozen partial
must be deleted so the user sees only the complete response (#16668).
Full resend happens when the visible prefix doesn't match the final
text (e.g. post-segment-break content, #10807)."""
adapter = MagicMock()
adapter.send = AsyncMock(
return_value=SimpleNamespace(success=True, message_id="msg_new"),
@ -810,14 +812,49 @@ class TestSegmentBreakOnToolBoundary:
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
consumer = GatewayStreamConsumer(adapter, "chat_123", config)
# Seed the consumer as if it already edited a partial message that
# later got stuck (flood control etc.) — _message_id is the stale id.
# The stale partial shows pre-tool text that is NOT a prefix of the
# final response — fallback re-sends the complete final text.
consumer._message_id = "msg_partial"
consumer._last_sent_text = "Let me check that for you…"
await consumer._send_fallback_final("Working on it. Done!")
adapter.delete_message.assert_awaited_once_with("chat_123", "msg_partial")
assert consumer._final_response_sent is True
@pytest.mark.asyncio
async def test_fallback_final_keeps_partial_after_tail_only_send(self):
"""When the fallback sends only the missing TAIL (visible prefix
matches the final text), the partial message IS the head of the
answer deleting it would leave the user with only the last part
of the response (the 'model sent only the second half' bug)."""
adapter = MagicMock()
adapter.send = AsyncMock(
return_value=SimpleNamespace(success=True, message_id="msg_new"),
)
adapter.edit_message = AsyncMock(
return_value=SimpleNamespace(success=True),
)
adapter.delete_message = AsyncMock(return_value=None)
adapter.MAX_MESSAGE_LENGTH = 4096
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
consumer = GatewayStreamConsumer(adapter, "chat_123", config)
# Visible partial is a true prefix of the final response — the
# fallback dedup sends only the tail.
consumer._message_id = "msg_partial"
consumer._last_sent_text = "Working on i"
await consumer._send_fallback_final("Working on it. Done!")
adapter.delete_message.assert_awaited_once_with("chat_123", "msg_partial")
# Tail was sent...
sent_contents = [
c.kwargs.get("content", "") for c in adapter.send.call_args_list
]
assert any("Done!" in s and "Working on i" not in s for s in sent_contents)
# ...and the head-bearing partial was NOT deleted.
adapter.delete_message.assert_not_awaited()
assert consumer._final_response_sent is True
@pytest.mark.asyncio

View file

@ -466,6 +466,81 @@ class TestCancelledBestEffortDeliveryFinalizes:
assert consumer.final_content_delivered is True
class TestGotDoneOverflowSplitNotRefinalized:
"""A got_done finalize edit that split-and-delivered across continuation
messages must not be followed by the redundant requires-finalize edit.
After a split, the consumer adopts the last continuation as the live
message and the redundant finalize edit re-submits the FULL accumulated
text against it; the adapter pre-flights that into another overflow
split, editing chunk 1 over the continuation and re-sending the rest,
so the user sees duplicated chunks. The finalize signal was already
carried by the split edit itself.
"""
def _consumer(self, adapter):
# High interval/threshold so the only edit is the got_done finalize.
return GatewayStreamConsumer(
adapter=adapter,
chat_id="chat",
config=StreamConsumerConfig(
edit_interval=10.0, buffer_threshold=10_000, cursor="",
),
)
@pytest.mark.asyncio
async def test_split_finalize_edit_is_not_refinalized(self):
adapter = _make_adapter()
adapter.REQUIRES_EDIT_FINALIZE = True
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(
success=True,
message_id="cont_2",
continuation_message_ids=("cont_2",),
))
consumer = self._consumer(adapter)
consumer.on_delta("oversize **markdown** final reply")
task = asyncio.create_task(consumer.run())
await asyncio.sleep(0.05) # preview send lands; no interval edits
consumer.finish()
await task
finalize_edits = [
c for c in adapter.edit_message.call_args_list
if c.kwargs.get("finalize")
]
assert len(finalize_edits) == 1, (
"split finalize edit must not be re-finalized; the redundant "
"edit re-splits the full text into the adopted continuation "
"and duplicates chunks on screen"
)
assert consumer.final_response_sent is True
assert consumer.final_content_delivered is True
@pytest.mark.asyncio
async def test_non_split_finalize_edit_still_gets_explicit_refinalize(self):
"""The narrow fix must not regress the requires-finalize contract:
a normal (non-split) got_done edit is still followed by the
explicit finalize edit (#25010 semantics unchanged)."""
adapter = _make_adapter()
adapter.REQUIRES_EDIT_FINALIZE = True
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(
success=True, message_id="initial_preview",
))
consumer = self._consumer(adapter)
consumer.on_delta("short final reply")
task = asyncio.create_task(consumer.run())
await asyncio.sleep(0.05)
consumer.finish()
await task
finalize_edits = [
c for c in adapter.edit_message.call_args_list
if c.kwargs.get("finalize")
]
assert len(finalize_edits) == 2
assert consumer.final_response_sent is True
class TestStreamConsumerConfigFreshFinalField:
"""The dataclass field must exist and default to 0 (disabled)."""