hermes-agent/tests/cli/test_cli_approval_ui.py
kshitijk4poor 8e71b5136b fix(cli): paint approval/clarify/sudo/secret modal prompts directly, not via the throttle (#41098)
In classic CLI mode the dangerous-command approval prompt (and the clarify,
sudo, and secret-capture prompts) could fail to render: the user saw
'⏱ Timeout — denying command' after 60s without ever seeing the panel,
making approvals.mode: manual unusable.

Root cause. These prompts run their wait loop on the agent/background thread:
they set modal state that a ConditionalContainer's filter reads, then call
self._invalidate() to repaint so the panel appears. _invalidate() is a
THROTTLED wrapper built for high-frequency background repaints (spinner frames,
streaming) — it (a) returns early while a SIGWINCH resize-recovery is pending,
and (b) otherwise only repaints if 250ms elapsed since the last paint. Under
either condition the modal's entry paint is silently dropped, the
ConditionalContainer never re-evaluates, and the prompt times out unseen.

The throttle never belonged on these paths. Originally the callbacks painted
with a direct self._app.invalidate() and worked; a throttle PR blanket-replaced
every invalidate (including these rare, one-shot, user-blocking modal paints)
with the throttled _invalidate(); a later commit removed an idle 1Hz repaint
that had been masking dropped modal paints, surfacing the bug. Notably the
modal KEY-BINDING handlers (↑/↓/Enter) already paint with a direct
event.app.invalidate(), never the throttle — the background-thread callbacks
were the inconsistent ones.

Fix. Add a small _paint_now() helper that paints directly (guarded for a
missing _app, exception-safe) and route the four modal paths' entry, response,
countdown, and teardown paints through it — matching the key-handler idiom.
This covers approval, clarify, sudo, and the secret-capture teardown
(_submit_secret_response, which previously used the throttled _invalidate() so
its panel could linger after submit). _invalidate() is left untouched and its
docstring now states it is for high-frequency background repaints only;
modal/interactive paints must use _paint_now()/_app.invalidate() directly. This
also fixes the resize-recovery edge case for free (a direct paint never
consults the resize guard) without a throttle-bypass flag that could be
cargo-culted onto hot paths. Countdown refresh cadence tightened 5s->1s so the
timer stays visible while waiting, and a copy-pasted duplicate countdown block
in _clarify_callback is removed.

Tests: TestModalPaintNow drives all three wait-loop callbacks on a background
thread with BOTH gates active (_resize_recovery_pending=True + a recent
_last_invalidate in the throttle window) and asserts the panel paints on entry
AND repaints on teardown; plus a secret-teardown test, a direct
_paint_now-vs-_invalidate gate test, and a no-_app safety test. Each modal test
fails if its paint is reverted to _invalidate(). 17 in-file tests pass; full
tests/cli suite green (900).

Diagnosis credit: the throttle-drop root cause was identified by @sanidhyasin
in #41116; @islam666 independently reached the same direct-invalidate approach
in #41166; original report #41098 by @jodonnel.
2026-06-08 00:46:43 +05:30

541 lines
21 KiB
Python

import queue
import threading
import time
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import cli as cli_module
from cli import HermesCLI
class _FakeBuffer:
def __init__(self, text="", cursor_position=None):
self.text = text
self.cursor_position = len(text) if cursor_position is None else cursor_position
def reset(self, append_to_history=False):
self.text = ""
self.cursor_position = 0
def _make_cli_stub():
cli = HermesCLI.__new__(HermesCLI)
cli._approval_state = None
cli._approval_deadline = 0
cli._approval_lock = threading.Lock()
cli._sudo_state = None
cli._sudo_deadline = 0
cli._modal_input_snapshot = None
cli._invalidate = MagicMock()
cli._app = SimpleNamespace(invalidate=MagicMock(), current_buffer=_FakeBuffer())
return cli
def _make_background_cli_stub():
cli = _make_cli_stub()
cli._background_task_counter = 0
cli._background_tasks = {}
cli._ensure_runtime_credentials = MagicMock(return_value=True)
cli._resolve_turn_agent_config = MagicMock(return_value={
"model": "test-model",
"runtime": {
"api_key": "test-key",
"base_url": "https://example.test/v1",
"provider": "test",
"api_mode": "chat_completions",
},
"request_overrides": None,
})
cli.max_turns = 90
cli.enabled_toolsets = []
cli._session_db = None
cli.reasoning_config = {}
cli.service_tier = None
cli._providers_only = None
cli._providers_ignore = None
cli._providers_order = None
cli._provider_sort = None
cli._provider_require_params = None
cli._provider_data_collection = None
cli._openrouter_min_coding_score = None
cli._fallback_model = None
cli._agent_running = False
cli._spinner_text = ""
cli.bell_on_complete = False
cli.final_response_markdown = "strip"
return cli
class TestCliApprovalUi:
def test_sudo_prompt_restores_existing_draft_after_response(self):
cli = _make_cli_stub()
cli._app.current_buffer = _FakeBuffer("draft command", cursor_position=5)
result = {}
def _run_callback():
result["value"] = cli._sudo_password_callback()
with patch.object(cli_module, "_cprint"):
thread = threading.Thread(target=_run_callback, daemon=True)
thread.start()
deadline = time.time() + 2
while cli._sudo_state is None and time.time() < deadline:
time.sleep(0.01)
assert cli._sudo_state is not None
assert cli._app.current_buffer.text == ""
cli._app.current_buffer.text = "secret"
cli._app.current_buffer.cursor_position = len("secret")
cli._sudo_state["response_queue"].put("secret")
thread.join(timeout=2)
assert result["value"] == "secret"
assert cli._app.current_buffer.text == "draft command"
assert cli._app.current_buffer.cursor_position == 5
def test_approval_callback_includes_view_for_long_commands(self):
cli = _make_cli_stub()
command = "sudo dd if=/tmp/githubcli-keyring.gpg of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress"
result = {}
def _run_callback():
result["value"] = cli._approval_callback(command, "disk copy")
thread = threading.Thread(target=_run_callback, daemon=True)
thread.start()
deadline = time.time() + 2
while cli._approval_state is None and time.time() < deadline:
time.sleep(0.01)
assert cli._approval_state is not None
assert "view" in cli._approval_state["choices"]
cli._approval_state["response_queue"].put("deny")
thread.join(timeout=2)
assert result["value"] == "deny"
def test_handle_approval_selection_view_expands_in_place(self):
cli = _make_cli_stub()
cli._approval_state = {
"command": "sudo dd if=/tmp/in of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress",
"description": "disk copy",
"choices": ["once", "session", "always", "deny", "view"],
"selected": 4,
"response_queue": queue.Queue(),
}
cli._handle_approval_selection()
assert cli._approval_state is not None
assert cli._approval_state["show_full"] is True
assert "view" not in cli._approval_state["choices"]
assert cli._approval_state["selected"] == 3
assert cli._approval_state["response_queue"].empty()
def test_approval_display_places_title_inside_box_not_border(self):
cli = _make_cli_stub()
cli._approval_state = {
"command": "sudo dd if=/tmp/in of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress",
"description": "disk copy",
"choices": ["once", "session", "always", "deny", "view"],
"selected": 0,
"response_queue": queue.Queue(),
}
fragments = cli._get_approval_display_fragments()
rendered = "".join(text for _style, text in fragments)
lines = rendered.splitlines()
assert lines[0].startswith("")
assert "Dangerous Command" not in lines[0]
assert any("Dangerous Command" in line for line in lines[1:3])
assert "Show full command" in rendered
assert "githubcli-archive-keyring.gpg" not in rendered
def test_approval_display_shows_full_command_after_view(self):
cli = _make_cli_stub()
full_command = "sudo dd if=/tmp/in of=/usr/share/keyrings/githubcli-archive-keyring.gpg bs=4M status=progress"
cli._approval_state = {
"command": full_command,
"description": "disk copy",
"choices": ["once", "session", "always", "deny"],
"selected": 0,
"show_full": True,
"response_queue": queue.Queue(),
}
fragments = cli._get_approval_display_fragments()
rendered = "".join(text for _style, text in fragments)
assert "..." not in rendered
assert "githubcli-" in rendered
assert "archive-" in rendered
assert "keyring.gpg" in rendered
assert "status=progress" in rendered
def test_approval_display_preserves_command_and_choices_with_long_description(self):
"""Regression: long tirith descriptions used to push approve/deny off-screen.
The panel must always render the command and every choice, even when
the description would otherwise wrap into 10+ lines. The description
gets truncated with a marker instead.
"""
cli = _make_cli_stub()
long_desc = (
"Security scan — [CRITICAL] Destructive shell command with wildcard expansion: "
"The command performs a recursive deletion of log files which may contain "
"audit information relevant to active incident investigations, running services "
"that rely on log files for state, rotated archives, and other system artifacts. "
"Review whether this is intended before approving. Consider whether a targeted "
"deletion with more specific filters would better match the intent."
)
cli._approval_state = {
"command": "rm -rf /var/log/apache2/*.log",
"description": long_desc,
"choices": ["once", "session", "always", "deny"],
"selected": 0,
"response_queue": queue.Queue(),
}
# Simulate a compact terminal where the old unbounded panel would overflow.
import shutil as _shutil
with patch("cli.shutil.get_terminal_size",
return_value=_shutil.os.terminal_size((100, 20))):
fragments = cli._get_approval_display_fragments()
rendered = "".join(text for _style, text in fragments)
# Command must be fully visible (rm -rf /var/log/apache2/*.log is short).
assert "rm -rf /var/log/apache2/*.log" in rendered
# Every choice must render — this is the core bug: approve/deny were
# getting clipped off the bottom of the panel.
assert "Allow once" in rendered
assert "Allow for this session" in rendered
assert "Add to permanent allowlist" in rendered
assert "Deny" in rendered
# The bottom border must render (i.e. the panel is self-contained).
assert rendered.rstrip().endswith("")
# The description gets truncated — marker should appear.
assert "(description truncated)" in rendered
def test_approval_display_skips_description_on_very_short_terminal(self):
"""On a 12-row terminal, only the command and choices have room.
The description is dropped entirely rather than partially shown, so the
choices never get clipped.
"""
cli = _make_cli_stub()
cli._approval_state = {
"command": "rm -rf /var/log/apache2/*.log",
"description": "recursive delete",
"choices": ["once", "session", "always", "deny"],
"selected": 0,
"response_queue": queue.Queue(),
}
import shutil as _shutil
with patch("cli.shutil.get_terminal_size",
return_value=_shutil.os.terminal_size((100, 12))):
fragments = cli._get_approval_display_fragments()
rendered = "".join(text for _style, text in fragments)
# Command visible.
assert "rm -rf /var/log/apache2/*.log" in rendered
# All four choices visible.
for label in ("Allow once", "Allow for this session",
"Add to permanent allowlist", "Deny"):
assert label in rendered, f"choice {label!r} missing"
def test_approval_display_truncates_giant_command_in_view_mode(self):
"""If the user hits /view on a massive command, choices still render.
The command gets truncated with a marker; the description gets dropped
if there's no remaining row budget.
"""
cli = _make_cli_stub()
# 50 lines of command when wrapped at ~64 chars.
giant_cmd = "bash -c 'echo " + ("x" * 3000) + "'"
cli._approval_state = {
"command": giant_cmd,
"description": "shell command via -c/-lc flag",
"choices": ["once", "session", "always", "deny"],
"selected": 0,
"show_full": True,
"response_queue": queue.Queue(),
}
import shutil as _shutil
with patch("cli.shutil.get_terminal_size",
return_value=_shutil.os.terminal_size((100, 24))):
fragments = cli._get_approval_display_fragments()
rendered = "".join(text for _style, text in fragments)
# All four choices visible even with a huge command.
for label in ("Allow once", "Allow for this session",
"Add to permanent allowlist", "Deny"):
assert label in rendered, f"choice {label!r} missing"
# Command got truncated with a marker.
assert "(command truncated" in rendered
def test_background_task_registers_thread_local_approval_callbacks(self):
"""Background /btw tasks must use the prompt_toolkit approval UI.
The foreground chat path registers dangerous-command callbacks inside
its worker thread because tools.terminal_tool stores them in
threading.local(). /background used to skip that, so dangerous commands
fell back to raw input() in a background thread and timed out under
prompt_toolkit.
"""
cli = _make_background_cli_stub()
seen = {}
class FakeAgent:
def __init__(self, **kwargs):
self._print_fn = None
self.thinking_callback = None
def run_conversation(self, **kwargs):
from tools.terminal_tool import (
_get_approval_callback,
_get_sudo_password_callback,
)
seen["approval"] = _get_approval_callback()
seen["sudo"] = _get_sudo_password_callback()
return {
"final_response": "done",
"messages": [],
"completed": True,
"failed": False,
}
with patch.object(cli_module, "AIAgent", FakeAgent), \
patch.object(cli_module, "_cprint"), \
patch.object(cli_module, "ChatConsole") as chat_console:
chat_console.return_value.print = MagicMock()
cli._handle_background_command("/btw check weather")
deadline = time.time() + 2
while cli._background_tasks and time.time() < deadline:
time.sleep(0.01)
assert seen["approval"].__self__ is cli
assert seen["approval"].__func__ is HermesCLI._approval_callback
assert seen["sudo"].__self__ is cli
assert seen["sudo"].__func__ is HermesCLI._sudo_password_callback
assert not cli._background_tasks
def _make_real_paint_cli_stub():
"""A stub whose modal repaint path runs the REAL _paint_now / _invalidate.
Both gates are set adversarially: _resize_recovery_pending=True and a recent
_last_invalidate inside the throttle window. A throttled _invalidate() would
be dropped under these conditions — _paint_now must paint regardless.
"""
cli = HermesCLI.__new__(HermesCLI)
cli._approval_state = None
cli._approval_deadline = 0
cli._approval_lock = threading.Lock()
cli._sudo_state = None
cli._sudo_deadline = 0
cli._clarify_state = None
cli._clarify_freetext = False
cli._clarify_deadline = 0
cli._modal_input_snapshot = None
# Real methods, not mocks.
cli._paint_now = HermesCLI._paint_now.__get__(cli, HermesCLI)
cli._invalidate = HermesCLI._invalidate.__get__(cli, HermesCLI)
cli._resize_recovery_pending = True # gate 1: resize in flight
cli._last_invalidate = time.monotonic() # gate 2: inside throttle window
cli._app = SimpleNamespace(invalidate=MagicMock(), current_buffer=_FakeBuffer())
return cli
class TestModalPaintNow:
"""Regression for #41098 — modal prompts must paint immediately.
The dangerous-command approval, clarify, and sudo prompts run their wait
loop on a background thread, set modal state a ConditionalContainer reads,
then must repaint so the panel becomes visible. They used the throttled
_invalidate(), whose paint is silently dropped on a 250ms window collision
or while a resize is pending — so the prompt timed out unseen. They now use
_paint_now(), which paints directly like the modal key-binding handlers.
"""
def test_paint_now_bypasses_throttle_and_resize_guard(self):
cli = _make_real_paint_cli_stub()
# A bare _invalidate() is suppressed under both gates...
cli._invalidate()
assert not cli._app.invalidate.called
# ...but _paint_now() always paints.
cli._paint_now()
assert cli._app.invalidate.called
def test_paint_now_no_app_is_safe(self):
cli = HermesCLI.__new__(HermesCLI)
cli._app = None
cli._paint_now() # must not raise
def _drive(self, cli, target, state_attr):
result = {}
def _run():
result["value"] = target()
with patch.object(cli_module, "_cprint"):
thread = threading.Thread(target=_run, daemon=True)
thread.start()
deadline = time.time() + 2
while getattr(cli, state_attr) is None and time.time() < deadline:
time.sleep(0.01)
assert getattr(cli, state_attr) is not None
assert cli._app.invalidate.called, (
f"{state_attr} panel was not painted despite throttle + resize gates"
)
# Reset so we can prove the response-received teardown also repaints
# (the panel must clear at once, not be held by the throttle).
cli._app.invalidate.reset_mock()
getattr(cli, state_attr)["response_queue"].put(
"deny" if state_attr == "_approval_state" else
("a" if state_attr == "_clarify_state" else "pw")
)
thread.join(timeout=2)
# clarify returns immediately on a response (no teardown repaint);
# approval and sudo repaint to tear the panel down.
if state_attr != "_clarify_state":
assert cli._app.invalidate.called, (
f"{state_attr} panel was not repainted on teardown"
)
assert not thread.is_alive()
return result["value"]
def test_approval_prompt_paints_under_both_gates(self):
cli = _make_real_paint_cli_stub()
value = self._drive(
cli, lambda: cli._approval_callback("rm -rf /tmp/scratch", "danger"),
"_approval_state",
)
assert value == "deny"
def test_clarify_prompt_paints_under_both_gates(self):
cli = _make_real_paint_cli_stub()
value = self._drive(
cli, lambda: cli._clarify_callback("Pick one", ["a", "b"]),
"_clarify_state",
)
assert value == "a"
def test_sudo_prompt_paints_under_both_gates(self):
cli = _make_real_paint_cli_stub()
value = self._drive(cli, cli._sudo_password_callback, "_sudo_state")
assert value == "pw"
def test_secret_response_teardown_paints(self):
"""_submit_secret_response tears the secret panel down via _paint_now,
so the panel clears immediately rather than being held by the throttle."""
cli = _make_real_paint_cli_stub()
cli._secret_state = {"response_queue": queue.Queue()}
cli._secret_deadline = 0
cli._submit_secret_response("hunter2")
assert cli._secret_state is None
assert cli._app.invalidate.called
assert cli._secret_state is None # cleared
class TestApprovalCallbackThreadLocalWiring:
"""Regression guard for the thread-local callback freeze (#13617 / #13618).
After 62348cff made _approval_callback / _sudo_password_callback thread-local
(ACP GHSA-qg5c-hvr5-hjgr), the CLI agent thread could no longer see callbacks
registered in the main thread — the dangerous-command prompt silently fell
back to stdin input() and deadlocked against prompt_toolkit. The fix is to
register the callbacks INSIDE the agent worker thread (matching the ACP
pattern). These tests lock in that invariant.
"""
def test_main_thread_registration_is_invisible_to_child_thread(self):
"""Confirms the underlying threading.local semantics that drove the bug.
If this ever starts passing as "visible", the thread-local isolation
is gone and the ACP race GHSA-qg5c-hvr5-hjgr may be back.
"""
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
def main_cb(_cmd, _desc):
return "once"
set_approval_callback(main_cb)
try:
seen = {}
def _child():
seen["value"] = _get_approval_callback()
t = threading.Thread(target=_child, daemon=True)
t.start()
t.join(timeout=2)
assert seen["value"] is None
finally:
set_approval_callback(None)
def test_child_thread_registration_is_visible_and_cleared_in_finally(self):
"""The fix pattern: register INSIDE the worker thread, clear in finally.
This is exactly what cli.py's run_agent() closure does. If this test
fails, the CLI approval prompt freeze (#13617) has regressed.
"""
from tools.terminal_tool import (
set_approval_callback,
set_sudo_password_callback,
_get_approval_callback,
_get_sudo_password_callback,
)
def approval_cb(_cmd, _desc):
return "once"
def sudo_cb():
return "hunter2"
seen = {}
def _worker():
# Mimic cli.py's run_agent() thread target.
set_approval_callback(approval_cb)
set_sudo_password_callback(sudo_cb)
try:
seen["approval"] = _get_approval_callback()
seen["sudo"] = _get_sudo_password_callback()
finally:
set_approval_callback(None)
set_sudo_password_callback(None)
seen["approval_after"] = _get_approval_callback()
seen["sudo_after"] = _get_sudo_password_callback()
t = threading.Thread(target=_worker, daemon=True)
t.start()
t.join(timeout=2)
assert seen["approval"] is approval_cb
assert seen["sudo"] is sudo_cb
# Finally block must clear both slots — otherwise a reused thread
# would hold a stale reference to a disposed CLI instance.
assert seen["approval_after"] is None
assert seen["sudo_after"] is None