hermes-agent/tests/tools/test_clarify_gateway.py
Teknium 29d7c244c5
feat(gateway): wire clarify tool with inline keyboard buttons on Telegram (#24199)
The clarify tool returned 'not available in this execution context' for
every gateway-mode agent because gateway/run.py never passed
clarify_callback into the AIAgent constructor. Schema actively encouraged
calling it; users never saw the question.

Changes:

- tools/clarify_gateway.py — new event-based primitive mirroring
  tools/approval.py: register/wait_for_response/resolve_gateway_clarify
  with per-session FIFO, threading.Event blocking with 1s heartbeat
  slices (so the inactivity watchdog keeps ticking), and
  clear_session for boundary cleanup.

- gateway/platforms/base.py — abstract send_clarify with a numbered-text
  fallback so every adapter (Discord, Slack, WhatsApp, Signal, Matrix,
  etc.) gets a working clarify out of the box. Plus an active-session
  bypass: when the agent is blocked on a text-awaiting clarify, the next
  non-command message routes inline to the runner's intercept instead
  of being queued + triggering an interrupt. Same shape as the /approve
  deadlock fix from PR #4926.

- gateway/platforms/telegram.py — concrete send_clarify renders one
  inline button per choice plus '✏️ Other (type answer)'. cl: callback
  handler resolves numeric choices immediately, flips to text-capture
  mode for Other, with the same authorization guards as exec/slash
  approvals.

- gateway/run.py — clarify_callback wired at the cached-agent per-turn
  callback assignment site (only the user-facing agent path; cron and
  hygiene-compress agents have no human attached). Bridges sync→async
  via run_coroutine_threadsafe, blocks with the configured timeout, and
  returns a '[user did not respond within Xm]' sentinel on timeout so
  the agent adapts rather than pinning the running-agent guard. Text-
  intercept added to _handle_message before slash-confirm intercept
  (skipping slash commands). clear_session called in the run's finally
  to cancel any orphan entries.

- hermes_cli/config.py — agent.clarify_timeout default 600s.

- website/docs/user-guide/messaging/telegram.md — Interactive Prompts
  section.

Tests:

- tests/tools/test_clarify_gateway.py (14 tests) — full primitive
  coverage: button resolve, open-ended auto-await, Other flip, timeout
  None, unknown-id idempotency, clear_session cancellation, FIFO
  ordering, register/unregister notify, config default.

- tests/gateway/test_telegram_clarify_buttons.py (12 tests) — render
  paths (multi-choice/open-ended/long-label/HTML-escape/not-connected),
  callback dispatch (numeric resolve/Other flip/already-resolved/
  unauthorized/invalid-token), and base-adapter text fallback.

Out of scope: bot-to-bot, guest mode, checklists, poll media, live
photos. Closes #24191.
2026-05-12 16:33:33 -07:00

207 lines
7.4 KiB
Python

"""Tests for the gateway-side clarify primitive (tools/clarify_gateway.py).
The clarify tool needs to ask the user a question and block the agent
thread until they respond. These tests cover the module-level state
machine: register, wait, resolve via button, resolve via text-fallback,
"Other"-button text-capture flip, timeout, session boundary cleanup.
"""
from __future__ import annotations
import threading
import time
from concurrent.futures import ThreadPoolExecutor
import pytest
def _clear_clarify_state():
"""Reset module-level state between tests."""
from tools import clarify_gateway as cm
with cm._lock:
cm._entries.clear()
cm._session_index.clear()
cm._notify_cbs.clear()
class TestClarifyPrimitive:
"""Core register/wait/resolve mechanics."""
def setup_method(self):
_clear_clarify_state()
def test_button_choice_resolves_wait(self):
"""resolve_gateway_clarify unblocks wait_for_response with the chosen string."""
from tools import clarify_gateway as cm
cm.register("id1", "sk1", "Pick one", ["A", "B", "C"])
def resolver():
time.sleep(0.05)
cm.resolve_gateway_clarify("id1", "B")
threading.Thread(target=resolver).start()
result = cm.wait_for_response("id1", timeout=2.0)
assert result == "B"
def test_open_ended_auto_awaits_text(self):
"""Clarify with no choices is in text-capture mode immediately."""
from tools import clarify_gateway as cm
entry = cm.register("id2", "sk2", "Free form?", None)
assert entry.awaiting_text is True
# get_pending_for_session returns the entry so the gateway
# text-intercept can find it.
pending = cm.get_pending_for_session("sk2")
assert pending is not None
assert pending.clarify_id == "id2"
def test_button_choice_does_not_auto_await(self):
"""Multi-choice clarify should NOT be in text-capture mode initially."""
from tools import clarify_gateway as cm
entry = cm.register("id3", "sk3", "Pick", ["X", "Y"])
assert entry.awaiting_text is False
assert cm.get_pending_for_session("sk3") is None
def test_other_button_flips_to_text_mode(self):
"""mark_awaiting_text makes get_pending_for_session find the entry."""
from tools import clarify_gateway as cm
cm.register("id4", "sk4", "Pick", ["X", "Y"])
assert cm.get_pending_for_session("sk4") is None
flipped = cm.mark_awaiting_text("id4")
assert flipped is True
pending = cm.get_pending_for_session("sk4")
assert pending is not None
assert pending.clarify_id == "id4"
def test_mark_awaiting_text_unknown_id(self):
"""mark_awaiting_text on a non-existent id returns False."""
from tools import clarify_gateway as cm
assert cm.mark_awaiting_text("nope") is False
def test_timeout_returns_none(self):
"""wait_for_response returns None when no resolve fires within the timeout."""
from tools import clarify_gateway as cm
cm.register("id5", "sk5", "Q?", ["A"])
result = cm.wait_for_response("id5", timeout=0.2)
assert result is None
def test_resolve_unknown_id_returns_false(self):
"""resolve_gateway_clarify is idempotent on unknown ids."""
from tools import clarify_gateway as cm
assert cm.resolve_gateway_clarify("nope", "anything") is False
def test_resolve_after_wait_completes_is_noop(self):
"""A late resolve on a finished entry doesn't blow up."""
from tools import clarify_gateway as cm
cm.register("id6", "sk6", "Q?", ["A"])
# Time out, entry gets cleaned up
cm.wait_for_response("id6", timeout=0.1)
# Late button click — should not raise
result = cm.resolve_gateway_clarify("id6", "A")
assert result is False
def test_clear_session_cancels_pending_entries(self):
"""clear_session unblocks blocked threads with empty response."""
from tools import clarify_gateway as cm
cm.register("id7", "sk7", "Q?", ["A"])
def waiter():
return cm.wait_for_response("id7", timeout=10.0)
with ThreadPoolExecutor(1) as pool:
fut = pool.submit(waiter)
time.sleep(0.05)
cancelled = cm.clear_session("sk7")
assert cancelled == 1
result = fut.result(timeout=2.0)
# clear_session sets response="" then the wait returns it
assert result == ""
def test_has_pending(self):
from tools import clarify_gateway as cm
cm.register("id8", "sk8", "Q?", ["A"])
assert cm.has_pending("sk8") is True
assert cm.has_pending("nonexistent") is False
def test_notify_register_unregister_clears_pending(self):
"""unregister_notify cancels any pending clarify so threads unwind."""
from tools import clarify_gateway as cm
cm.register("id9", "sk9", "Q?", ["A"])
def waiter():
return cm.wait_for_response("id9", timeout=10.0)
with ThreadPoolExecutor(1) as pool:
fut = pool.submit(waiter)
time.sleep(0.05)
cm.register_notify("sk9", lambda entry: None)
cm.unregister_notify("sk9")
# unregister_notify calls clear_session; thread unwinds
result = fut.result(timeout=2.0)
assert result == ""
def test_session_index_isolation(self):
"""Entries from different sessions don't leak across get_pending lookups."""
from tools import clarify_gateway as cm
cm.register("idA", "alpha", "Q?", None) # auto-await text
cm.register("idB", "beta", "Q?", None) # auto-await text
a = cm.get_pending_for_session("alpha")
b = cm.get_pending_for_session("beta")
assert a is not None and a.clarify_id == "idA"
assert b is not None and b.clarify_id == "idB"
def test_clarify_timeout_config_default(self):
"""get_clarify_timeout returns 600 by default."""
from tools import clarify_gateway as cm
timeout = cm.get_clarify_timeout()
# Default 600s OR whatever is in the user's loaded config.
# Floor check: must be a positive int, not crashed.
assert isinstance(timeout, int)
assert timeout > 0
class TestGatewayTextIntercept:
"""The gateway's _handle_message intercepts text replies to pending clarifies."""
def setup_method(self):
_clear_clarify_state()
def test_get_pending_for_session_returns_oldest_text_awaiting(self):
"""When two clarifies are pending, get_pending_for_session returns the
first that is awaiting_text (the older one if both)."""
from tools import clarify_gateway as cm
# Older multi-choice (not awaiting text)
cm.register("first", "sk", "Q1?", ["A"])
# Newer open-ended (awaiting text)
cm.register("second", "sk", "Q2?", None)
pending = cm.get_pending_for_session("sk")
# The newer one is awaiting text; the older isn't.
assert pending is not None
assert pending.clarify_id == "second"
# Now flip the first to text mode too. Both are awaiting text,
# FIFO returns the older one.
cm.mark_awaiting_text("first")
pending2 = cm.get_pending_for_session("sk")
assert pending2 is not None
assert pending2.clarify_id == "first"