diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index ec0323d4738..7026b55cf1b 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1743,6 +1743,55 @@ class BasePlatformAdapter(ABC): """ return SendResult(success=False, error="Not supported") + async def send_clarify( + self, + chat_id: str, + question: str, + choices: Optional[list], + clarify_id: str, + session_key: str, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Send a clarify prompt to the user. + + Two render modes: + + * **Multiple choice** (``choices`` is a non-empty list) — adapters + that override this should render inline buttons (one per choice + plus a final "Other" / free-text option). Button callbacks + MUST resolve via + ``tools.clarify_gateway.resolve_gateway_clarify(clarify_id, response)`` + with the chosen string. Picking the "Other" button calls + ``mark_awaiting_text(clarify_id)`` so the next message in the + session is captured as the response. + + * **Open-ended** (``choices`` is None or empty) — render the + question as a plain text message; the next user message in the + session is captured by the gateway's text-intercept and + resolves the clarify automatically (see + ``GatewayRunner._maybe_intercept_clarify_text``). + + The default implementation falls back to a numbered text list, + which works on every platform — the user replies with a number + ("2") or with the literal choice text, and the gateway intercepts + and resolves. Adapters with native button UIs (Telegram, Discord) + SHOULD override this for a richer UX. + """ + if choices: + lines = [f"❓ {question}", ""] + for i, choice in enumerate(choices, start=1): + lines.append(f" {i}. {choice}") + lines.append("") + lines.append("Reply with the number, the option text, or your own answer.") + text = "\n".join(lines) + else: + text = f"❓ {question}" + return await self.send( + chat_id=chat_id, + content=text, + metadata=metadata, + ) + async def send_private_notice( self, chat_id: str, @@ -2831,6 +2880,58 @@ class BasePlatformAdapter(ABC): logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True) return + # Clarify text-capture bypass: if the agent is blocked on a + # clarify_tool call awaiting a free-form text response (open- + # ended clarify, or user picked "Other"), the next non-command + # message in this session MUST reach the runner so the + # clarify-intercept can resolve it and unblock the agent. + # + # Without this bypass: the message gets queued in + # _pending_messages AND triggers an interrupt, killing the + # agent run mid-clarify and discarding the user's answer. + # Same shape as the /approve deadlock fix (PR #4926) — both + # cases are "agent thread blocked on Event.wait, message must + # reach the resolver before being treated as a new turn." + if not cmd: + try: + from tools import clarify_gateway as _clarify_mod + _has_text_clarify = ( + _clarify_mod.get_pending_for_session(session_key) is not None + ) + except Exception: + _has_text_clarify = False + + if _has_text_clarify: + logger.debug( + "[%s] Routing message to clarify text-intercept for %s", + self.name, session_key, + ) + try: + _thread_meta = _thread_metadata_for_source( + event.source, _reply_anchor_for_event(event) + ) + response = await self._message_handler(event) + _text, _eph_ttl = self._unwrap_ephemeral(response) + if _text: + _r = await self._send_with_retry( + chat_id=event.source.chat_id, + content=_text, + reply_to=_reply_anchor_for_event(event), + metadata=_thread_meta, + ) + if _eph_ttl > 0 and _r.success and _r.message_id: + self._schedule_ephemeral_delete( + chat_id=event.source.chat_id, + message_id=_r.message_id, + ttl_seconds=_eph_ttl, + ) + except Exception as e: + logger.error( + "[%s] Clarify text-intercept dispatch failed: %s", + self.name, e, exc_info=True, + ) + return + if self._busy_session_handler is not None: try: if await self._busy_session_handler(event, session_key): diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index e91a38ac6b1..a821160cfc8 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -427,6 +427,9 @@ class TelegramAdapter(BasePlatformAdapter): # Slash-confirm button state: confirm_id → session_key (for /reload-mcp # and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm). self._slash_confirm_state: Dict[str, str] = {} + # Clarify button state: clarify_id → session_key (for the clarify tool's + # multiple-choice prompts; see GatewayRunner clarify_callback wiring). + self._clarify_state: Dict[str, str] = {} # Notification mode for message sends. # "important" — only final responses, approvals, and slash confirmations # trigger notifications; tool progress, streaming, status @@ -2215,6 +2218,80 @@ class TelegramAdapter(BasePlatformAdapter): logger.warning("[%s] send_slash_confirm failed: %s", self.name, e) return SendResult(success=False, error=str(e)) + async def send_clarify( + self, + chat_id: str, + question: str, + choices: Optional[list], + clarify_id: str, + session_key: str, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Render a clarify prompt with one inline button per choice. + + Multi-choice mode (``choices`` non-empty): renders one button per + option plus a final "✏️ Other (type answer)" button. Picking the + "Other" button flips the entry into text-capture mode so the next + message becomes the response. + + Open-ended mode (``choices`` empty): renders the question as plain + text — no buttons. The next message in the session is captured by + the gateway's text-intercept and resolves the clarify. + """ + if not self._bot: + return SendResult(success=False, error="Not connected") + + try: + text = f"❓ {_html.escape(question)}" + thread_id = self._metadata_thread_id(metadata) + + kwargs: Dict[str, Any] = { + "chat_id": int(chat_id), + "text": text, + "parse_mode": ParseMode.HTML, + **self._link_preview_kwargs(), + } + + if choices: + # Telegram caps callback_data at 64 bytes; keep "cl::" + # short. Button label is also capped (~64 chars in practice). + rows = [] + for idx, choice in enumerate(choices): + label = str(choice) + if len(label) > 60: + label = label[:57] + "..." + rows.append([ + InlineKeyboardButton( + f"{idx + 1}. {label}", + callback_data=f"cl:{clarify_id}:{idx}", + ) + ]) + rows.append([ + InlineKeyboardButton( + "✏️ Other (type answer)", + callback_data=f"cl:{clarify_id}:other", + ) + ]) + kwargs["reply_markup"] = InlineKeyboardMarkup(rows) + + reply_to_id = self._reply_to_message_id_for_send(None, metadata) + kwargs["reply_to_message_id"] = reply_to_id + kwargs.update( + self._thread_kwargs_for_send( + chat_id, + thread_id, + metadata, + reply_to_message_id=reply_to_id, + ) + ) + + msg = await self._send_message_with_thread_fallback(**kwargs) + self._clarify_state[clarify_id] = session_key + return SendResult(success=True, message_id=str(msg.message_id)) + except Exception as e: + logger.warning("[%s] send_clarify failed: %s", self.name, e) + return SendResult(success=False, error=str(e)) + async def send_model_picker( self, chat_id: str, @@ -2700,6 +2777,111 @@ class TelegramAdapter(BasePlatformAdapter): logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True) return + # --- Clarify callbacks (cl:clarify_id:idx | cl:clarify_id:other) --- + if data.startswith("cl:"): + parts = data.split(":", 2) + if len(parts) == 3: + clarify_id = parts[1] + choice_token = parts[2] + + caller_id = str(getattr(query.from_user, "id", "")) + if not self._is_callback_user_authorized( + caller_id, + chat_id=query_chat_id, + chat_type=str(query_chat_type) if query_chat_type is not None else None, + thread_id=str(query_thread_id) if query_thread_id is not None else None, + user_name=query_user_name, + ): + await query.answer(text="⛔ You are not authorized to answer this prompt.") + return + + session_key = self._clarify_state.get(clarify_id) + if not session_key: + await query.answer(text="This prompt has already been resolved.") + return + + user_display = getattr(query.from_user, "first_name", "User") + + if choice_token == "other": + # Flip into text-capture mode and tell the user to type + # their answer. The gateway's text-intercept will pick + # up the next message in this session and resolve the + # clarify. Do NOT pop _clarify_state yet — we still + # need it if the user is slow to respond and the entry + # is cleared by something else. + try: + from tools.clarify_gateway import mark_awaiting_text + mark_awaiting_text(clarify_id) + except Exception as exc: + logger.warning("[%s] mark_awaiting_text failed: %s", self.name, exc) + + await query.answer(text="✏️ Type your answer in the chat.") + try: + await query.edit_message_text( + text=f"❓ {query.message.text or ''}\n\nAwaiting typed response from {_html.escape(user_display)}…", + parse_mode=ParseMode.HTML, + reply_markup=None, + ) + except Exception: + pass + return + + # Numeric choice → resolve immediately with the chosen text + try: + idx = int(choice_token) + except (ValueError, TypeError): + await query.answer(text="Invalid choice.") + return + + # Look up the choice text from the entry registered in the + # clarify primitive. Fall back to the index if the entry + # has been cleaned up (race with timeout / session reset). + resolved_text: Optional[str] = None + try: + from tools.clarify_gateway import _entries as _clarify_entries # type: ignore + entry = _clarify_entries.get(clarify_id) + if entry and entry.choices and 0 <= idx < len(entry.choices): + resolved_text = entry.choices[idx] + except Exception: + resolved_text = None + + if resolved_text is None: + # Race: entry vanished. Echo the index as a number so + # the agent at least sees an intentional response + # rather than nothing. + resolved_text = f"choice {idx + 1}" + + # Pop state and resolve + self._clarify_state.pop(clarify_id, None) + try: + from tools.clarify_gateway import resolve_gateway_clarify + resolved = resolve_gateway_clarify(clarify_id, resolved_text) + except Exception as exc: + logger.error("[%s] resolve_gateway_clarify failed: %s", self.name, exc) + resolved = False + + await query.answer(text=f"✓ {resolved_text[:60]}") + try: + await query.edit_message_text( + text=f"❓ {_html.escape(query.message.text or '')}\n\n{_html.escape(user_display)}: {_html.escape(resolved_text)}", + parse_mode=ParseMode.HTML, + reply_markup=None, + ) + except Exception: + pass + + if resolved: + logger.info( + "Telegram clarify button resolved (id=%s, choice=%r, user=%s)", + clarify_id, resolved_text, user_display, + ) + else: + logger.warning( + "Telegram clarify button: resolve_gateway_clarify returned False (id=%s)", + clarify_id, + ) + return + # --- Update prompt callbacks --- if not data.startswith("update_prompt:"): return diff --git a/gateway/run.py b/gateway/run.py index 559adae89bf..bda0cbf9831 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -5828,6 +5828,37 @@ class GatewayRunner: ) _update_prompts.pop(_quick_key, None) + # Intercept messages that are responses to a pending clarify + # request that is awaiting free-form text (either an open-ended + # clarify with no choices, or one where the user picked the + # "Other" button). The first non-empty user message in the + # session resolves the clarify and unblocks the agent thread — + # we do NOT route it to the agent as a new turn. + try: + from tools import clarify_gateway as _clarify_mod + _pending_clarify = _clarify_mod.get_pending_for_session(_quick_key) + except Exception: + _pending_clarify = None + if _pending_clarify is not None: + _raw_clarify_reply = (event.text or "").strip() + # Skip slash commands — the user clearly wanted to issue a + # command, not answer the clarify. Leave the clarify pending + # so the user can retry; if it times out, the agent unblocks + # with an empty response. + if _raw_clarify_reply and not _raw_clarify_reply.startswith("/"): + _resolved = _clarify_mod.resolve_gateway_clarify( + _pending_clarify.clarify_id, _raw_clarify_reply, + ) + if _resolved: + logger.info( + "Gateway intercepted clarify text response (session=%s, id=%s)", + _quick_key, _pending_clarify.clarify_id, + ) + # Acknowledge with empty string so adapters that emit + # the agent's response don't double-post. The agent + # itself will produce the next user-facing message. + return "" + # Intercept messages that are responses to a pending /reload-mcp # (or future) slash-confirm prompt. Recognized confirm replies are # /approve, /always, /cancel (plus short aliases). Anything else @@ -14957,6 +14988,76 @@ class GatewayRunner: if _pdc is not None: _pdc[session_key] = _release_bg_review_messages + # ------------------------------------------------------------------ + # Clarify callback: present a clarify prompt and block on a response. + # + # Runs on the agent's worker thread (see clarify_tool's synchronous + # callback contract). Bridges sync→async by scheduling the + # adapter's send_clarify on the gateway event loop, then blocks on + # the clarify primitive's threading.Event with a configurable + # timeout. Returns the user's response string, or a sentinel + # explaining that no response arrived (so the agent can adapt + # rather than hang forever). + # ------------------------------------------------------------------ + def _clarify_callback_sync(question: str, choices) -> str: + from tools import clarify_gateway as _clarify_mod + import uuid as _uuid + + if not _status_adapter: + return "" + + clarify_id = _uuid.uuid4().hex[:10] + _clarify_mod.register( + clarify_id=clarify_id, + session_key=session_key or "", + question=question, + choices=list(choices) if choices else None, + ) + + # Pause typing — like approval, we don't want a "thinking..." + # status to obscure the prompt or block the user from typing + # an "Other" response on platforms that disable input while + # typing is active (Slack Assistant API). + try: + _status_adapter.pause_typing_for_chat(_status_chat_id) + except Exception: + pass + + send_ok = False + try: + fut = asyncio.run_coroutine_threadsafe( + _status_adapter.send_clarify( + chat_id=_status_chat_id, + question=question, + choices=list(choices) if choices else None, + clarify_id=clarify_id, + session_key=session_key or "", + metadata=_status_thread_metadata, + ), + _loop_for_step, + ) + result = fut.result(timeout=15) + send_ok = bool(getattr(result, "success", False)) + except Exception as exc: + logger.warning("Clarify send failed: %s", exc) + send_ok = False + + if not send_ok: + # Couldn't deliver the prompt — clean up and return + # sentinel so the agent can fall back to a sensible + # default rather than hanging. + _clarify_mod.clear_session(session_key or "") + return "[clarify prompt could not be delivered]" + + timeout = _clarify_mod.get_clarify_timeout() + response = _clarify_mod.wait_for_response(clarify_id, timeout=float(timeout)) + if response is None or response == "": + # Timeout or session-boundary cancellation + return f"[user did not respond within {int(timeout / 60)}m]" + return response + + agent.clarify_callback = _clarify_callback_sync + # Store agent reference for interrupt support agent_holder[0] = agent # Capture the full tool definitions for transcript logging @@ -15228,6 +15329,14 @@ class GatewayRunner: result = agent.run_conversation(_run_message, conversation_history=agent_history, task_id=session_id) finally: unregister_gateway_notify(_approval_session_key) + # Cancel any pending clarify entries so blocked agent + # threads don't hang past the end of the run (interrupt, + # completion, gateway shutdown). Idempotent. + try: + from tools.clarify_gateway import clear_session as _clear_clarify_session + _clear_clarify_session(_approval_session_key) + except Exception: + pass reset_current_session_key(_approval_session_token) result_holder[0] = result diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 038aca518fb..dc3e414948b 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -477,6 +477,12 @@ DEFAULT_CONFIG = { # threshold before escalating to a full timeout. The warning fires # once per run and does not interrupt the agent. 0 = disable warning. "gateway_timeout_warning": 900, + # Maximum time (seconds) the gateway will block an agent waiting for + # a clarify-tool response from the user. Hit this and the agent + # unblocks with "[user did not respond within Xm]" so it can adapt + # rather than pinning the running-agent guard forever. CLI clarify + # blocks indefinitely (input() is synchronous) and ignores this. + "clarify_timeout": 600, # Periodic "still working" notification interval (seconds). # Sends a status message every N seconds so the user knows the # agent hasn't died during long tasks. 0 = disable notifications. diff --git a/tests/gateway/test_telegram_clarify_buttons.py b/tests/gateway/test_telegram_clarify_buttons.py new file mode 100644 index 00000000000..b9e7bd5130f --- /dev/null +++ b/tests/gateway/test_telegram_clarify_buttons.py @@ -0,0 +1,451 @@ +"""Tests for Telegram inline keyboard clarify buttons. + +Mirrors test_telegram_approval_buttons.py for the new ``send_clarify`` and +``cl:`` callback dispatch added in feat/clarify-gateway-buttons. +""" + +import asyncio +import os +import sys +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +# --------------------------------------------------------------------------- +# Ensure the repo root is importable +# --------------------------------------------------------------------------- +_repo = str(Path(__file__).resolve().parents[2]) +if _repo not in sys.path: + sys.path.insert(0, _repo) + + +# --------------------------------------------------------------------------- +# Minimal Telegram mock so TelegramAdapter can be imported (mirrors +# test_telegram_approval_buttons.py) +# --------------------------------------------------------------------------- +def _ensure_telegram_mock(): + if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"): + return + + mod = MagicMock() + mod.ext.ContextTypes.DEFAULT_TYPE = type(None) + mod.constants.ParseMode.MARKDOWN = "Markdown" + mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2" + mod.constants.ParseMode.HTML = "HTML" + mod.constants.ChatType.PRIVATE = "private" + mod.constants.ChatType.GROUP = "group" + mod.constants.ChatType.SUPERGROUP = "supergroup" + mod.constants.ChatType.CHANNEL = "channel" + mod.error.NetworkError = type("NetworkError", (OSError,), {}) + mod.error.TimedOut = type("TimedOut", (OSError,), {}) + mod.error.BadRequest = type("BadRequest", (Exception,), {}) + + for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"): + sys.modules.setdefault(name, mod) + sys.modules.setdefault("telegram.error", mod.error) + + +_ensure_telegram_mock() + +from gateway.platforms.telegram import TelegramAdapter +from gateway.config import Platform, PlatformConfig + + +def _make_adapter(extra=None): + config = PlatformConfig(enabled=True, token="test-token", extra=extra or {}) + adapter = TelegramAdapter(config) + adapter._bot = AsyncMock() + adapter._app = MagicMock() + return adapter + + +def _clear_clarify_state(): + from tools import clarify_gateway as cm + with cm._lock: + cm._entries.clear() + cm._session_index.clear() + cm._notify_cbs.clear() + + +# =========================================================================== +# send_clarify — render +# =========================================================================== + +class TestTelegramSendClarify: + """Verify the rendered prompt has buttons or none, and stores state.""" + + def setup_method(self): + _clear_clarify_state() + + @pytest.mark.asyncio + async def test_multi_choice_renders_buttons_and_other(self): + adapter = _make_adapter() + mock_msg = MagicMock() + mock_msg.message_id = 100 + adapter._bot.send_message = AsyncMock(return_value=mock_msg) + + result = await adapter.send_clarify( + chat_id="12345", + question="Which option?", + choices=["alpha", "beta", "gamma"], + clarify_id="cid1", + session_key="sk1", + ) + + assert result.success is True + assert result.message_id == "100" + + kwargs = adapter._bot.send_message.call_args[1] + assert kwargs["chat_id"] == 12345 + assert "Which option?" in kwargs["text"] + # InlineKeyboardMarkup with N+1 buttons (3 choices + Other) + markup = kwargs["reply_markup"] + assert markup is not None + # Mocked InlineKeyboardMarkup — just verify it was constructed + # with rows. We check state instead of poking the mock structure. + assert "cid1" in adapter._clarify_state + assert adapter._clarify_state["cid1"] == "sk1" + + @pytest.mark.asyncio + async def test_open_ended_no_keyboard(self): + adapter = _make_adapter() + mock_msg = MagicMock() + mock_msg.message_id = 101 + adapter._bot.send_message = AsyncMock(return_value=mock_msg) + + result = await adapter.send_clarify( + chat_id="12345", + question="What is your name?", + choices=None, + clarify_id="cid2", + session_key="sk2", + ) + + assert result.success is True + kwargs = adapter._bot.send_message.call_args[1] + # No reply_markup means no buttons — open-ended path + assert "reply_markup" not in kwargs + assert "What is your name?" in kwargs["text"] + assert adapter._clarify_state["cid2"] == "sk2" + + @pytest.mark.asyncio + async def test_not_connected(self): + adapter = _make_adapter() + adapter._bot = None + result = await adapter.send_clarify( + chat_id="12345", + question="?", + choices=["a"], + clarify_id="cid3", + session_key="sk3", + ) + assert result.success is False + + @pytest.mark.asyncio + async def test_truncates_long_choice_label(self): + adapter = _make_adapter() + mock_msg = MagicMock() + mock_msg.message_id = 102 + adapter._bot.send_message = AsyncMock(return_value=mock_msg) + + long_choice = "x" * 200 # > 60 char cap + result = await adapter.send_clarify( + chat_id="12345", + question="?", + choices=[long_choice], + clarify_id="cid4", + session_key="sk4", + ) + assert result.success is True + # The truncation logic replaces with "..." past 57 chars; we don't + # inspect the mock's button labels directly (auto-MagicMock), but + # we can verify the call didn't raise on absurdly long input. + + @pytest.mark.asyncio + async def test_html_escapes_question(self): + adapter = _make_adapter() + mock_msg = MagicMock() + mock_msg.message_id = 103 + adapter._bot.send_message = AsyncMock(return_value=mock_msg) + + await adapter.send_clarify( + chat_id="12345", + question="", + choices=["x"], + clarify_id="cid5", + session_key="sk5", + ) + kwargs = adapter._bot.send_message.call_args[1] + # Must NOT contain raw