hermes-agent/tools/clarify_tool.py
teknium1 2c3aebcadc fix(clarify): unwrap dict choices at the source so every surface gets clean text
The Discord fix (previous commit) handles dict-shaped clarify choices at the
Discord adapter only. The same dict-repr leak originates upstream at
tools/clarify_tool.py's str(c).strip() normalization — the single
platform-agnostic point both the CLI and every gateway adapter flow through.

When an LLM emits [{"description": "..."}] instead of bare strings, str(c)
produced {'description': '...'} which leaked onto the CLI panel
(cli.py:13048/13081), was returned verbatim as the user's answer
(cli.py:11945), and hit Telegram's numbered list too.

Add _flatten_choice (same label->description->text->title unwrap as the
Discord adapter, name/value excluded, keyless dicts dropped) and apply it at
the normalization line. Fixes CLI + Telegram + all platforms at the root;
the Discord smart-truncation now operates on already-clean text.

Adds johnjacobkenny to AUTHOR_MAP for the salvaged commit.
2026-06-19 06:31:08 -07:00

179 lines
6.7 KiB
Python

#!/usr/bin/env python3
"""
Clarify Tool Module - Interactive Clarifying Questions
Allows the agent to present structured multiple-choice questions or open-ended
prompts to the user. In CLI mode, choices are navigable with arrow keys. On
messaging platforms, choices are rendered as a numbered list.
The actual user-interaction logic lives in the platform layer (cli.py for CLI,
gateway/run.py for messaging). This module defines the schema, validation, and
a thin dispatcher that delegates to a platform-provided callback.
"""
import json
from typing import List, Optional, Callable
# Maximum number of predefined choices the agent can offer.
# A 5th "Other (type your answer)" option is always appended by the UI.
MAX_CHOICES = 4
def _flatten_choice(c) -> str:
"""Coerce a single choice into its user-facing display string.
The schema declares choices as bare strings, but LLMs sometimes emit
dict-shaped choices like ``[{"description": "..."}]``. A naive ``str(c)``
turns the whole dict into its Python repr — ``{'description': '...'}`` —
which then leaks onto every surface that renders the choice (CLI panel,
Discord buttons, Telegram numbered list) AND is returned verbatim as the
user's answer. Normalising here, at the one platform-agnostic entry point,
fixes the whole class in one place instead of per-adapter.
Dict unwrap order is the canonical LLM tool-call user-facing keys:
``label`` → ``description`` → ``text`` → ``title``. ``name`` and ``value``
are deliberately excluded — they're component-shaped fields that could
carry raw enum values or short identifiers, not human-readable labels. A
dict with none of the canonical keys is dropped (returns ""), since a
garbage label is worse than no choice at all.
"""
if c is None:
return ""
if isinstance(c, str):
return c.strip()
if isinstance(c, dict):
for key in ("label", "description", "text", "title"):
v = c.get(key)
if isinstance(v, str) and v.strip():
return v.strip()
return ""
if isinstance(c, (list, tuple)):
return " ".join(_flatten_choice(x) for x in c).strip()
return str(c).strip()
def clarify_tool(
question: str,
choices: Optional[List[str]] = None,
callback: Optional[Callable] = None,
) -> str:
"""
Ask the user a question, optionally with multiple-choice options.
Args:
question: The question text to present.
choices: Up to 4 predefined answer choices. When omitted the
question is purely open-ended.
callback: Platform-provided function that handles the actual UI
interaction. Signature: callback(question, choices) -> str.
Injected by the agent runner (cli.py / gateway).
Returns:
JSON string with the user's response.
"""
if not question or not question.strip():
return tool_error("Question text is required.")
question = question.strip()
# Validate and trim choices
if choices is not None:
if not isinstance(choices, list):
return tool_error("choices must be a list of strings.")
# LLMs sometimes emit dict-shaped choices (e.g. [{"description": "..."}])
# instead of bare strings. _flatten_choice unwraps them to their
# user-facing text here — the single platform-agnostic entry point —
# so the CLI panel, Discord buttons, and Telegram list all render clean
# text and the resolved answer is never a raw Python dict repr.
choices = [s for s in (_flatten_choice(c) for c in choices) if s]
if len(choices) > MAX_CHOICES:
choices = choices[:MAX_CHOICES]
if not choices:
choices = None # empty list → open-ended
if callback is None:
return json.dumps(
{"error": "Clarify tool is not available in this execution context."},
ensure_ascii=False,
)
try:
user_response = callback(question, choices)
except Exception as exc:
return json.dumps(
{"error": f"Failed to get user input: {exc}"},
ensure_ascii=False,
)
return json.dumps({
"question": question,
"choices_offered": choices,
"user_response": str(user_response).strip(),
}, ensure_ascii=False)
def check_clarify_requirements() -> bool:
"""Clarify tool has no external requirements -- always available."""
return True
# =============================================================================
# OpenAI Function-Calling Schema
# =============================================================================
CLARIFY_SCHEMA = {
"name": "clarify",
"description": (
"Ask the user a question when you need clarification, feedback, or a "
"decision before proceeding. Supports two modes:\n\n"
"1. **Multiple choice** — provide up to 4 choices. The user picks one "
"or types their own answer via a 5th 'Other' option.\n"
"2. **Open-ended** — omit choices entirely. The user types a free-form "
"response.\n\n"
"Use this tool when:\n"
"- The task is ambiguous and you need the user to choose an approach\n"
"- You want post-task feedback ('How did that work out?')\n"
"- You want to offer to save a skill or update memory\n"
"- A decision has meaningful trade-offs the user should weigh in on\n\n"
"Do NOT use this tool for simple yes/no confirmation of dangerous "
"commands (the terminal tool handles that). Prefer making a reasonable "
"default choice yourself when the decision is low-stakes."
),
"parameters": {
"type": "object",
"properties": {
"question": {
"type": "string",
"description": "The question to present to the user.",
},
"choices": {
"type": "array",
"items": {"type": "string"},
"maxItems": MAX_CHOICES,
"description": (
"Up to 4 answer choices. Omit this parameter entirely to "
"ask an open-ended question. When provided, the UI "
"automatically appends an 'Other (type your answer)' option."
),
},
},
"required": ["question"],
},
}
# --- Registry ---
from tools.registry import registry, tool_error
registry.register(
name="clarify",
toolset="clarify",
schema=CLARIFY_SCHEMA,
handler=lambda args, **kw: clarify_tool(
question=args.get("question", ""),
choices=args.get("choices"),
callback=kw.get("callback")),
check_fn=check_clarify_requirements,
emoji="",
)