diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index ce6820abca..5453df60e8 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -205,11 +205,20 @@ class GatewayStreamConsumer: await self._send_or_edit(self._accumulated) return - # Tool boundary: the should_edit block above already flushed - # accumulated text without a cursor. Reset state so the next - # text chunk creates a fresh message below any tool-progress - # messages the gateway sent in between. - if got_segment_break: + # Tool boundary: reset message state so the next text chunk + # creates a fresh message below any tool-progress messages. + # + # Exception: when _message_id is "__no_edit__" the platform + # never returned a real message ID (e.g. Signal, webhook with + # github_comment delivery). Resetting to None would re-enter + # the "first send" path on every tool boundary and post one + # platform message per tool call — that is what caused 155 + # comments under a single PR. Instead, keep all state so the + # full continuation is delivered once via _send_fallback_final. + # (When editing fails mid-stream due to flood control the id is + # a real string like "msg_1", not "__no_edit__", so that case + # still resets and creates a fresh segment as intended.) + if got_segment_break and self._message_id != "__no_edit__": self._message_id = None self._accumulated = "" self._last_sent_text = "" diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index b0b3a514af..d698fc0883 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -16,8 +16,18 @@ from collections.abc import Callable, Mapping from dataclasses import dataclass from typing import Any -from prompt_toolkit.auto_suggest import AutoSuggest, Suggestion -from prompt_toolkit.completion import Completer, Completion +# prompt_toolkit is an optional CLI dependency — only needed for +# SlashCommandCompleter and SlashCommandAutoSuggest. Gateway and test +# environments that lack it must still be able to import this module +# for resolve_command, gateway_help_lines, and COMMAND_REGISTRY. +try: + from prompt_toolkit.auto_suggest import AutoSuggest, Suggestion + from prompt_toolkit.completion import Completer, Completion +except ImportError: # pragma: no cover + AutoSuggest = object # type: ignore[assignment,misc] + Completer = object # type: ignore[assignment,misc] + Suggestion = None # type: ignore[assignment] + Completion = None # type: ignore[assignment] # --------------------------------------------------------------------------- diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py index d5a20331b6..5cebb20eee 100644 --- a/tests/gateway/test_stream_consumer.py +++ b/tests/gateway/test_stream_consumer.py @@ -437,6 +437,45 @@ class TestSegmentBreakOnToolBoundary: # Only one send call (the initial message) assert adapter.send.call_count == 1 + @pytest.mark.asyncio + async def test_no_message_id_segment_breaks_do_not_resend(self): + """On a platform that never returns a message_id (e.g. webhook with + github_comment delivery), tool-call segment breaks must NOT trigger + a new adapter.send() per boundary. The fix: _message_id == '__no_edit__' + suppresses the reset so all text accumulates and is sent once.""" + adapter = MagicMock() + # No message_id on first send, then one more for the fallback final + adapter.send = AsyncMock(side_effect=[ + SimpleNamespace(success=True, message_id=None), + SimpleNamespace(success=True, message_id=None), + ]) + adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True)) + adapter.MAX_MESSAGE_LENGTH = 4096 + + config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5) + consumer = GatewayStreamConsumer(adapter, "chat_123", config) + + # Simulate: text → tool boundary → text → tool boundary → text (3 segments) + consumer.on_delta("Phase 1 text") + consumer.on_delta(None) # tool call boundary + consumer.on_delta("Phase 2 text") + consumer.on_delta(None) # another tool call boundary + consumer.on_delta("Phase 3 text") + consumer.finish() + + await consumer.run() + + # Before the fix this would post 3 comments (one per segment). + # After the fix: only the initial partial + one fallback-final continuation. + assert adapter.send.call_count == 2, ( + f"Expected 2 sends (initial + fallback), got {adapter.send.call_count}" + ) + assert consumer.already_sent + # The continuation must contain the text from segments 2 and 3 + final_text = adapter.send.call_args_list[1][1]["content"] + assert "Phase 2" in final_text + assert "Phase 3" in final_text + @pytest.mark.asyncio async def test_fallback_final_splits_long_continuation_without_dropping_text(self): """Long continuation tails should be chunked when fallback final-send runs."""