mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
feat(gateway): wire clarify tool with inline keyboard buttons on Telegram (#24199)
The clarify tool returned 'not available in this execution context' for every gateway-mode agent because gateway/run.py never passed clarify_callback into the AIAgent constructor. Schema actively encouraged calling it; users never saw the question. Changes: - tools/clarify_gateway.py — new event-based primitive mirroring tools/approval.py: register/wait_for_response/resolve_gateway_clarify with per-session FIFO, threading.Event blocking with 1s heartbeat slices (so the inactivity watchdog keeps ticking), and clear_session for boundary cleanup. - gateway/platforms/base.py — abstract send_clarify with a numbered-text fallback so every adapter (Discord, Slack, WhatsApp, Signal, Matrix, etc.) gets a working clarify out of the box. Plus an active-session bypass: when the agent is blocked on a text-awaiting clarify, the next non-command message routes inline to the runner's intercept instead of being queued + triggering an interrupt. Same shape as the /approve deadlock fix from PR #4926. - gateway/platforms/telegram.py — concrete send_clarify renders one inline button per choice plus '✏️ Other (type answer)'. cl: callback handler resolves numeric choices immediately, flips to text-capture mode for Other, with the same authorization guards as exec/slash approvals. - gateway/run.py — clarify_callback wired at the cached-agent per-turn callback assignment site (only the user-facing agent path; cron and hygiene-compress agents have no human attached). Bridges sync→async via run_coroutine_threadsafe, blocks with the configured timeout, and returns a '[user did not respond within Xm]' sentinel on timeout so the agent adapts rather than pinning the running-agent guard. Text- intercept added to _handle_message before slash-confirm intercept (skipping slash commands). clear_session called in the run's finally to cancel any orphan entries. - hermes_cli/config.py — agent.clarify_timeout default 600s. - website/docs/user-guide/messaging/telegram.md — Interactive Prompts section. Tests: - tests/tools/test_clarify_gateway.py (14 tests) — full primitive coverage: button resolve, open-ended auto-await, Other flip, timeout None, unknown-id idempotency, clear_session cancellation, FIFO ordering, register/unregister notify, config default. - tests/gateway/test_telegram_clarify_buttons.py (12 tests) — render paths (multi-choice/open-ended/long-label/HTML-escape/not-connected), callback dispatch (numeric resolve/Other flip/already-resolved/ unauthorized/invalid-token), and base-adapter text fallback. Out of scope: bot-to-bot, guest mode, checklists, poll media, live photos. Closes #24191.
This commit is contained in:
parent
76bbb94be4
commit
29d7c244c5
8 changed files with 1347 additions and 0 deletions
|
|
@ -1743,6 +1743,55 @@ class BasePlatformAdapter(ABC):
|
|||
"""
|
||||
return SendResult(success=False, error="Not supported")
|
||||
|
||||
async def send_clarify(
|
||||
self,
|
||||
chat_id: str,
|
||||
question: str,
|
||||
choices: Optional[list],
|
||||
clarify_id: str,
|
||||
session_key: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Send a clarify prompt to the user.
|
||||
|
||||
Two render modes:
|
||||
|
||||
* **Multiple choice** (``choices`` is a non-empty list) — adapters
|
||||
that override this should render inline buttons (one per choice
|
||||
plus a final "Other" / free-text option). Button callbacks
|
||||
MUST resolve via
|
||||
``tools.clarify_gateway.resolve_gateway_clarify(clarify_id, response)``
|
||||
with the chosen string. Picking the "Other" button calls
|
||||
``mark_awaiting_text(clarify_id)`` so the next message in the
|
||||
session is captured as the response.
|
||||
|
||||
* **Open-ended** (``choices`` is None or empty) — render the
|
||||
question as a plain text message; the next user message in the
|
||||
session is captured by the gateway's text-intercept and
|
||||
resolves the clarify automatically (see
|
||||
``GatewayRunner._maybe_intercept_clarify_text``).
|
||||
|
||||
The default implementation falls back to a numbered text list,
|
||||
which works on every platform — the user replies with a number
|
||||
("2") or with the literal choice text, and the gateway intercepts
|
||||
and resolves. Adapters with native button UIs (Telegram, Discord)
|
||||
SHOULD override this for a richer UX.
|
||||
"""
|
||||
if choices:
|
||||
lines = [f"❓ {question}", ""]
|
||||
for i, choice in enumerate(choices, start=1):
|
||||
lines.append(f" {i}. {choice}")
|
||||
lines.append("")
|
||||
lines.append("Reply with the number, the option text, or your own answer.")
|
||||
text = "\n".join(lines)
|
||||
else:
|
||||
text = f"❓ {question}"
|
||||
return await self.send(
|
||||
chat_id=chat_id,
|
||||
content=text,
|
||||
metadata=metadata,
|
||||
)
|
||||
|
||||
async def send_private_notice(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
|
@ -2831,6 +2880,58 @@ class BasePlatformAdapter(ABC):
|
|||
logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
|
||||
return
|
||||
|
||||
# Clarify text-capture bypass: if the agent is blocked on a
|
||||
# clarify_tool call awaiting a free-form text response (open-
|
||||
# ended clarify, or user picked "Other"), the next non-command
|
||||
# message in this session MUST reach the runner so the
|
||||
# clarify-intercept can resolve it and unblock the agent.
|
||||
#
|
||||
# Without this bypass: the message gets queued in
|
||||
# _pending_messages AND triggers an interrupt, killing the
|
||||
# agent run mid-clarify and discarding the user's answer.
|
||||
# Same shape as the /approve deadlock fix (PR #4926) — both
|
||||
# cases are "agent thread blocked on Event.wait, message must
|
||||
# reach the resolver before being treated as a new turn."
|
||||
if not cmd:
|
||||
try:
|
||||
from tools import clarify_gateway as _clarify_mod
|
||||
_has_text_clarify = (
|
||||
_clarify_mod.get_pending_for_session(session_key) is not None
|
||||
)
|
||||
except Exception:
|
||||
_has_text_clarify = False
|
||||
|
||||
if _has_text_clarify:
|
||||
logger.debug(
|
||||
"[%s] Routing message to clarify text-intercept for %s",
|
||||
self.name, session_key,
|
||||
)
|
||||
try:
|
||||
_thread_meta = _thread_metadata_for_source(
|
||||
event.source, _reply_anchor_for_event(event)
|
||||
)
|
||||
response = await self._message_handler(event)
|
||||
_text, _eph_ttl = self._unwrap_ephemeral(response)
|
||||
if _text:
|
||||
_r = await self._send_with_retry(
|
||||
chat_id=event.source.chat_id,
|
||||
content=_text,
|
||||
reply_to=_reply_anchor_for_event(event),
|
||||
metadata=_thread_meta,
|
||||
)
|
||||
if _eph_ttl > 0 and _r.success and _r.message_id:
|
||||
self._schedule_ephemeral_delete(
|
||||
chat_id=event.source.chat_id,
|
||||
message_id=_r.message_id,
|
||||
ttl_seconds=_eph_ttl,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"[%s] Clarify text-intercept dispatch failed: %s",
|
||||
self.name, e, exc_info=True,
|
||||
)
|
||||
return
|
||||
|
||||
if self._busy_session_handler is not None:
|
||||
try:
|
||||
if await self._busy_session_handler(event, session_key):
|
||||
|
|
|
|||
|
|
@ -427,6 +427,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
# Slash-confirm button state: confirm_id → session_key (for /reload-mcp
|
||||
# and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
|
||||
self._slash_confirm_state: Dict[str, str] = {}
|
||||
# Clarify button state: clarify_id → session_key (for the clarify tool's
|
||||
# multiple-choice prompts; see GatewayRunner clarify_callback wiring).
|
||||
self._clarify_state: Dict[str, str] = {}
|
||||
# Notification mode for message sends.
|
||||
# "important" — only final responses, approvals, and slash confirmations
|
||||
# trigger notifications; tool progress, streaming, status
|
||||
|
|
@ -2215,6 +2218,80 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
logger.warning("[%s] send_slash_confirm failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_clarify(
|
||||
self,
|
||||
chat_id: str,
|
||||
question: str,
|
||||
choices: Optional[list],
|
||||
clarify_id: str,
|
||||
session_key: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
"""Render a clarify prompt with one inline button per choice.
|
||||
|
||||
Multi-choice mode (``choices`` non-empty): renders one button per
|
||||
option plus a final "✏️ Other (type answer)" button. Picking the
|
||||
"Other" button flips the entry into text-capture mode so the next
|
||||
message becomes the response.
|
||||
|
||||
Open-ended mode (``choices`` empty): renders the question as plain
|
||||
text — no buttons. The next message in the session is captured by
|
||||
the gateway's text-intercept and resolves the clarify.
|
||||
"""
|
||||
if not self._bot:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
try:
|
||||
text = f"❓ {_html.escape(question)}"
|
||||
thread_id = self._metadata_thread_id(metadata)
|
||||
|
||||
kwargs: Dict[str, Any] = {
|
||||
"chat_id": int(chat_id),
|
||||
"text": text,
|
||||
"parse_mode": ParseMode.HTML,
|
||||
**self._link_preview_kwargs(),
|
||||
}
|
||||
|
||||
if choices:
|
||||
# Telegram caps callback_data at 64 bytes; keep "cl:<id>:<idx>"
|
||||
# short. Button label is also capped (~64 chars in practice).
|
||||
rows = []
|
||||
for idx, choice in enumerate(choices):
|
||||
label = str(choice)
|
||||
if len(label) > 60:
|
||||
label = label[:57] + "..."
|
||||
rows.append([
|
||||
InlineKeyboardButton(
|
||||
f"{idx + 1}. {label}",
|
||||
callback_data=f"cl:{clarify_id}:{idx}",
|
||||
)
|
||||
])
|
||||
rows.append([
|
||||
InlineKeyboardButton(
|
||||
"✏️ Other (type answer)",
|
||||
callback_data=f"cl:{clarify_id}:other",
|
||||
)
|
||||
])
|
||||
kwargs["reply_markup"] = InlineKeyboardMarkup(rows)
|
||||
|
||||
reply_to_id = self._reply_to_message_id_for_send(None, metadata)
|
||||
kwargs["reply_to_message_id"] = reply_to_id
|
||||
kwargs.update(
|
||||
self._thread_kwargs_for_send(
|
||||
chat_id,
|
||||
thread_id,
|
||||
metadata,
|
||||
reply_to_message_id=reply_to_id,
|
||||
)
|
||||
)
|
||||
|
||||
msg = await self._send_message_with_thread_fallback(**kwargs)
|
||||
self._clarify_state[clarify_id] = session_key
|
||||
return SendResult(success=True, message_id=str(msg.message_id))
|
||||
except Exception as e:
|
||||
logger.warning("[%s] send_clarify failed: %s", self.name, e)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_model_picker(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
|
@ -2700,6 +2777,111 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
|
||||
return
|
||||
|
||||
# --- Clarify callbacks (cl:clarify_id:idx | cl:clarify_id:other) ---
|
||||
if data.startswith("cl:"):
|
||||
parts = data.split(":", 2)
|
||||
if len(parts) == 3:
|
||||
clarify_id = parts[1]
|
||||
choice_token = parts[2]
|
||||
|
||||
caller_id = str(getattr(query.from_user, "id", ""))
|
||||
if not self._is_callback_user_authorized(
|
||||
caller_id,
|
||||
chat_id=query_chat_id,
|
||||
chat_type=str(query_chat_type) if query_chat_type is not None else None,
|
||||
thread_id=str(query_thread_id) if query_thread_id is not None else None,
|
||||
user_name=query_user_name,
|
||||
):
|
||||
await query.answer(text="⛔ You are not authorized to answer this prompt.")
|
||||
return
|
||||
|
||||
session_key = self._clarify_state.get(clarify_id)
|
||||
if not session_key:
|
||||
await query.answer(text="This prompt has already been resolved.")
|
||||
return
|
||||
|
||||
user_display = getattr(query.from_user, "first_name", "User")
|
||||
|
||||
if choice_token == "other":
|
||||
# Flip into text-capture mode and tell the user to type
|
||||
# their answer. The gateway's text-intercept will pick
|
||||
# up the next message in this session and resolve the
|
||||
# clarify. Do NOT pop _clarify_state yet — we still
|
||||
# need it if the user is slow to respond and the entry
|
||||
# is cleared by something else.
|
||||
try:
|
||||
from tools.clarify_gateway import mark_awaiting_text
|
||||
mark_awaiting_text(clarify_id)
|
||||
except Exception as exc:
|
||||
logger.warning("[%s] mark_awaiting_text failed: %s", self.name, exc)
|
||||
|
||||
await query.answer(text="✏️ Type your answer in the chat.")
|
||||
try:
|
||||
await query.edit_message_text(
|
||||
text=f"❓ {query.message.text or ''}\n\n<i>Awaiting typed response from {_html.escape(user_display)}…</i>",
|
||||
parse_mode=ParseMode.HTML,
|
||||
reply_markup=None,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
# Numeric choice → resolve immediately with the chosen text
|
||||
try:
|
||||
idx = int(choice_token)
|
||||
except (ValueError, TypeError):
|
||||
await query.answer(text="Invalid choice.")
|
||||
return
|
||||
|
||||
# Look up the choice text from the entry registered in the
|
||||
# clarify primitive. Fall back to the index if the entry
|
||||
# has been cleaned up (race with timeout / session reset).
|
||||
resolved_text: Optional[str] = None
|
||||
try:
|
||||
from tools.clarify_gateway import _entries as _clarify_entries # type: ignore
|
||||
entry = _clarify_entries.get(clarify_id)
|
||||
if entry and entry.choices and 0 <= idx < len(entry.choices):
|
||||
resolved_text = entry.choices[idx]
|
||||
except Exception:
|
||||
resolved_text = None
|
||||
|
||||
if resolved_text is None:
|
||||
# Race: entry vanished. Echo the index as a number so
|
||||
# the agent at least sees an intentional response
|
||||
# rather than nothing.
|
||||
resolved_text = f"choice {idx + 1}"
|
||||
|
||||
# Pop state and resolve
|
||||
self._clarify_state.pop(clarify_id, None)
|
||||
try:
|
||||
from tools.clarify_gateway import resolve_gateway_clarify
|
||||
resolved = resolve_gateway_clarify(clarify_id, resolved_text)
|
||||
except Exception as exc:
|
||||
logger.error("[%s] resolve_gateway_clarify failed: %s", self.name, exc)
|
||||
resolved = False
|
||||
|
||||
await query.answer(text=f"✓ {resolved_text[:60]}")
|
||||
try:
|
||||
await query.edit_message_text(
|
||||
text=f"❓ {_html.escape(query.message.text or '')}\n\n<b>{_html.escape(user_display)}:</b> {_html.escape(resolved_text)}",
|
||||
parse_mode=ParseMode.HTML,
|
||||
reply_markup=None,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if resolved:
|
||||
logger.info(
|
||||
"Telegram clarify button resolved (id=%s, choice=%r, user=%s)",
|
||||
clarify_id, resolved_text, user_display,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Telegram clarify button: resolve_gateway_clarify returned False (id=%s)",
|
||||
clarify_id,
|
||||
)
|
||||
return
|
||||
|
||||
# --- Update prompt callbacks ---
|
||||
if not data.startswith("update_prompt:"):
|
||||
return
|
||||
|
|
|
|||
109
gateway/run.py
109
gateway/run.py
|
|
@ -5828,6 +5828,37 @@ class GatewayRunner:
|
|||
)
|
||||
_update_prompts.pop(_quick_key, None)
|
||||
|
||||
# Intercept messages that are responses to a pending clarify
|
||||
# request that is awaiting free-form text (either an open-ended
|
||||
# clarify with no choices, or one where the user picked the
|
||||
# "Other" button). The first non-empty user message in the
|
||||
# session resolves the clarify and unblocks the agent thread —
|
||||
# we do NOT route it to the agent as a new turn.
|
||||
try:
|
||||
from tools import clarify_gateway as _clarify_mod
|
||||
_pending_clarify = _clarify_mod.get_pending_for_session(_quick_key)
|
||||
except Exception:
|
||||
_pending_clarify = None
|
||||
if _pending_clarify is not None:
|
||||
_raw_clarify_reply = (event.text or "").strip()
|
||||
# Skip slash commands — the user clearly wanted to issue a
|
||||
# command, not answer the clarify. Leave the clarify pending
|
||||
# so the user can retry; if it times out, the agent unblocks
|
||||
# with an empty response.
|
||||
if _raw_clarify_reply and not _raw_clarify_reply.startswith("/"):
|
||||
_resolved = _clarify_mod.resolve_gateway_clarify(
|
||||
_pending_clarify.clarify_id, _raw_clarify_reply,
|
||||
)
|
||||
if _resolved:
|
||||
logger.info(
|
||||
"Gateway intercepted clarify text response (session=%s, id=%s)",
|
||||
_quick_key, _pending_clarify.clarify_id,
|
||||
)
|
||||
# Acknowledge with empty string so adapters that emit
|
||||
# the agent's response don't double-post. The agent
|
||||
# itself will produce the next user-facing message.
|
||||
return ""
|
||||
|
||||
# Intercept messages that are responses to a pending /reload-mcp
|
||||
# (or future) slash-confirm prompt. Recognized confirm replies are
|
||||
# /approve, /always, /cancel (plus short aliases). Anything else
|
||||
|
|
@ -14957,6 +14988,76 @@ class GatewayRunner:
|
|||
if _pdc is not None:
|
||||
_pdc[session_key] = _release_bg_review_messages
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Clarify callback: present a clarify prompt and block on a response.
|
||||
#
|
||||
# Runs on the agent's worker thread (see clarify_tool's synchronous
|
||||
# callback contract). Bridges sync→async by scheduling the
|
||||
# adapter's send_clarify on the gateway event loop, then blocks on
|
||||
# the clarify primitive's threading.Event with a configurable
|
||||
# timeout. Returns the user's response string, or a sentinel
|
||||
# explaining that no response arrived (so the agent can adapt
|
||||
# rather than hang forever).
|
||||
# ------------------------------------------------------------------
|
||||
def _clarify_callback_sync(question: str, choices) -> str:
|
||||
from tools import clarify_gateway as _clarify_mod
|
||||
import uuid as _uuid
|
||||
|
||||
if not _status_adapter:
|
||||
return ""
|
||||
|
||||
clarify_id = _uuid.uuid4().hex[:10]
|
||||
_clarify_mod.register(
|
||||
clarify_id=clarify_id,
|
||||
session_key=session_key or "",
|
||||
question=question,
|
||||
choices=list(choices) if choices else None,
|
||||
)
|
||||
|
||||
# Pause typing — like approval, we don't want a "thinking..."
|
||||
# status to obscure the prompt or block the user from typing
|
||||
# an "Other" response on platforms that disable input while
|
||||
# typing is active (Slack Assistant API).
|
||||
try:
|
||||
_status_adapter.pause_typing_for_chat(_status_chat_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
send_ok = False
|
||||
try:
|
||||
fut = asyncio.run_coroutine_threadsafe(
|
||||
_status_adapter.send_clarify(
|
||||
chat_id=_status_chat_id,
|
||||
question=question,
|
||||
choices=list(choices) if choices else None,
|
||||
clarify_id=clarify_id,
|
||||
session_key=session_key or "",
|
||||
metadata=_status_thread_metadata,
|
||||
),
|
||||
_loop_for_step,
|
||||
)
|
||||
result = fut.result(timeout=15)
|
||||
send_ok = bool(getattr(result, "success", False))
|
||||
except Exception as exc:
|
||||
logger.warning("Clarify send failed: %s", exc)
|
||||
send_ok = False
|
||||
|
||||
if not send_ok:
|
||||
# Couldn't deliver the prompt — clean up and return
|
||||
# sentinel so the agent can fall back to a sensible
|
||||
# default rather than hanging.
|
||||
_clarify_mod.clear_session(session_key or "")
|
||||
return "[clarify prompt could not be delivered]"
|
||||
|
||||
timeout = _clarify_mod.get_clarify_timeout()
|
||||
response = _clarify_mod.wait_for_response(clarify_id, timeout=float(timeout))
|
||||
if response is None or response == "":
|
||||
# Timeout or session-boundary cancellation
|
||||
return f"[user did not respond within {int(timeout / 60)}m]"
|
||||
return response
|
||||
|
||||
agent.clarify_callback = _clarify_callback_sync
|
||||
|
||||
# Store agent reference for interrupt support
|
||||
agent_holder[0] = agent
|
||||
# Capture the full tool definitions for transcript logging
|
||||
|
|
@ -15228,6 +15329,14 @@ class GatewayRunner:
|
|||
result = agent.run_conversation(_run_message, conversation_history=agent_history, task_id=session_id)
|
||||
finally:
|
||||
unregister_gateway_notify(_approval_session_key)
|
||||
# Cancel any pending clarify entries so blocked agent
|
||||
# threads don't hang past the end of the run (interrupt,
|
||||
# completion, gateway shutdown). Idempotent.
|
||||
try:
|
||||
from tools.clarify_gateway import clear_session as _clear_clarify_session
|
||||
_clear_clarify_session(_approval_session_key)
|
||||
except Exception:
|
||||
pass
|
||||
reset_current_session_key(_approval_session_token)
|
||||
result_holder[0] = result
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue