diff --git a/tests/cli/test_slash_confirm_windows.py b/tests/cli/test_slash_confirm_windows.py index 33420ed0e9d..7ad565465af 100644 --- a/tests/cli/test_slash_confirm_windows.py +++ b/tests/cli/test_slash_confirm_windows.py @@ -1,29 +1,31 @@ -"""Regression tests for issue #30768 and #32383. +"""Regression tests for #30768, #32383, and #33961. -``_prompt_text_input_modal`` uses a queue-based modal that relies on -prompt_toolkit key bindings receiving keyboard events. On Windows the -prompt_toolkit input channel can deadlock when the modal is entered from -the ``process_loop`` daemon thread. The fix falls back to the simpler -``_prompt_text_input`` (stdin-based) prompt on Windows. +``_prompt_text_input_modal`` answers destructive-slash confirmations through a +queue-based modal driven by prompt_toolkit key bindings. When invoked from the +``process_loop`` daemon thread it sets the modal up on the app's event loop via +``call_soon_threadsafe``, so it is safe on every platform — including native +Windows (#33961), where the earlier ``sys.platform == "win32"`` → raw ``input()`` +fallback deadlocked the daemon thread against prompt_toolkit's stdin ownership. These tests verify: -1. Windows detection triggers the stdin fallback -2. Non-Windows daemon threads still use the modal via the app loop -3. macOS/Linux main-thread path still uses the modal (no regression) -4. No-app path still uses the stdin fallback (existing behavior) -5. Empty choices returns None (existing behavior) +1. Daemon-thread confirm uses the modal via the app loop on Linux AND native + Windows (#33961) — never the raw stdin fallback, never a hang. +2. Main-thread confirm with a running app uses the modal. +3. The raw stdin fallback is kept ONLY for the safe cases: no running app, and + (on win32, off-thread) a scheduling failure degrades to a clean cancel. +4. Empty choices returns None. """ -import queue import sys import threading import time from unittest.mock import MagicMock, patch +import pytest def _make_cli(): - """Minimal HermesCLI shell exposing prompt/modal helpers.""" + """Minimal HermesCLI shell exposing the prompt/modal helpers.""" import cli as cli_mod obj = object.__new__(cli_mod.HermesCLI) @@ -37,9 +39,6 @@ def _make_cli(): return obj -# --------------------------------------------------------------------------- -# Sample choices used across tests -# --------------------------------------------------------------------------- _SAMPLE_CHOICES = [ ("once", "Approve Once", "proceed this time only"), ("always", "Always Approve", "proceed and silence this prompt permanently"), @@ -47,164 +46,106 @@ _SAMPLE_CHOICES = [ ] -class TestModalWindowsFallback: - """Windows dead-lock regression tests for _prompt_text_input_modal.""" +def _answer_modal_when_open(cli, response, stop=None): + """Push ``response`` onto the modal's response_queue once it opens. - def test_windows_falls_back_to_stdin(self): - """On Windows, _prompt_text_input_modal should use _prompt_text_input.""" + Gives up after ~2s, or early when ``stop`` is set (the modal will never open, + e.g. a scheduling failure) so degraded-path tests don't wait the full budget. + """ + for _ in range(100): + if stop is not None and stop.is_set(): + return + state = cli._slash_confirm_state + if state and "response_queue" in state: + state["response_queue"].put(response) + return + time.sleep(0.02) + + +def _run_on_daemon(call, cli, *, platform, response, schedule=None): + """Invoke ``call`` on a daemon thread — as the process_loop does — answering + the modal with ``response`` once it opens. + + Returns ``{result, stdin_called, capture, restore}``. ``schedule`` overrides + the ``call_soon_threadsafe`` side effect (default: run the callback inline); + pass a raiser to simulate a scheduling failure. Fails if the worker hangs, + which is the deadlock canary for #33961. + """ + outcome = {"capture": [], "restore": [], "result": None, "stdin_called": False} + done = threading.Event() + + def _worker(): + try: + with patch.object(sys, "platform", platform), \ + patch.object(cli._app.loop, "call_soon_threadsafe", side_effect=schedule or (lambda cb: cb())), \ + patch.object(cli, "_prompt_text_input") as mock_stdin, \ + patch.object(cli, "_invalidate"), \ + patch.object(cli, "_capture_modal_input_snapshot", side_effect=lambda: outcome["capture"].append(1)), \ + patch.object(cli, "_restore_modal_input_snapshot", side_effect=lambda: outcome["restore"].append(1)): + outcome["result"] = call() + outcome["stdin_called"] = mock_stdin.called + finally: + done.set() + + worker = threading.Thread(target=_worker, daemon=True) + answerer = threading.Thread(target=_answer_modal_when_open, args=(cli, response, done), daemon=True) + answerer.start() + worker.start() + worker.join(timeout=2.0) + answerer.join(timeout=2.0) + assert not worker.is_alive(), "daemon thread hung — modal deadlocked" + return outcome + + +class TestModal: + """Behaviour of _prompt_text_input_modal across platforms and threads.""" + + @pytest.mark.parametrize("platform", ["linux", "win32"]) + def test_daemon_thread_uses_modal_via_app_loop(self, platform): + """Off the process_loop daemon thread, the confirm uses the modal via + call_soon_threadsafe on every platform — including native Windows, where + the old win32 early-return deadlocked on raw input() (#33961).""" cli = _make_cli() - - with patch.object(sys, "platform", "win32"), \ - patch.object(cli, "_prompt_text_input", return_value="1") as mock_stdin: - result = cli._prompt_text_input_modal( - title="⚠️ /new — destroys conversation state", + outcome = _run_on_daemon( + lambda: cli._prompt_text_input_modal( + title="⚠️ /reset", detail="This starts a fresh session.", choices=_SAMPLE_CHOICES, - ) + timeout=5, + ), + cli, + platform=platform, + response="once", + ) + assert outcome["stdin_called"] is False, "must use the modal, not raw input()" + assert outcome["result"] == "once" + assert outcome["capture"] == [1] + assert outcome["restore"] == [1] + assert cli._slash_confirm_state is None - # The stdin-based fallback was used, not the modal queue path. - mock_stdin.assert_called_once_with("Choice [1/2/3]: ") - assert result == "1" - - def test_non_main_thread_uses_modal_via_app_loop(self): - """Off the main thread on Linux, keep the modal path via app-loop setup.""" + def test_main_thread_with_app_uses_modal(self): + """On the main thread with a running app, the queue-based modal is used.""" cli = _make_cli() - result_holder = {} - setup_calls = [] - teardown_calls = [] - - def _call_soon_threadsafe(callback): - callback() - - def run_on_daemon(): - with patch.object(sys, "platform", "linux"), \ - patch.object(cli._app.loop, "call_soon_threadsafe", side_effect=_call_soon_threadsafe), \ - patch.object(cli, "_prompt_text_input") as mock_stdin, \ - patch.object(cli, "_capture_modal_input_snapshot", side_effect=lambda: setup_calls.append("capture")), \ - patch.object(cli, "_restore_modal_input_snapshot", side_effect=lambda: teardown_calls.append("restore")): - result_holder["result"] = cli._prompt_text_input_modal( - title="⚠️ /reset", - detail="This starts a fresh session.", - choices=_SAMPLE_CHOICES, - timeout=5, - ) - result_holder["stdin_called"] = mock_stdin.called - - def _submit_after_delay(): - time.sleep(0.2) - state = cli._slash_confirm_state - if state and "response_queue" in state: - state["response_queue"].put("once") - - submitter = threading.Thread(target=_submit_after_delay, daemon=True) - t = threading.Thread(target=run_on_daemon, daemon=True) - submitter.start() - t.start() - t.join(timeout=2.0) - submitter.join(timeout=2.0) - assert not t.is_alive(), "daemon thread hung — modal deadlocked" - assert result_holder["stdin_called"] is False - assert result_holder["result"] == "once" - assert setup_calls == ["capture"] - assert teardown_calls == ["restore"] - - def test_windows_daemon_thread_uses_modal_via_app_loop(self): - """On native Windows, the destructive-confirm must use the modal via the - app loop, not the raw input() fallback that deadlocks the daemon thread. - - Regression test for #33961: bare /reset froze on native Windows because - the win32 early-return routed confirmation to input() on the process_loop - daemon thread, deadlocking against prompt_toolkit's main-thread stdin. - """ - cli = _make_cli() - result_holder = {} - setup_calls = [] - teardown_calls = [] - - def run_on_daemon(): - with patch.object(sys, "platform", "win32"), \ - patch.object(cli._app.loop, "call_soon_threadsafe", side_effect=lambda cb: cb()), \ - patch.object(cli, "_prompt_text_input") as mock_stdin, \ - patch.object(cli, "_capture_modal_input_snapshot", side_effect=lambda: setup_calls.append("capture")), \ - patch.object(cli, "_restore_modal_input_snapshot", side_effect=lambda: teardown_calls.append("restore")): - result_holder["result"] = cli._prompt_text_input_modal( - title="⚠️ /reset", - detail="This starts a fresh session.", - choices=_SAMPLE_CHOICES, - timeout=5, - ) - result_holder["stdin_called"] = mock_stdin.called - - def _submit_after_delay(): - time.sleep(0.2) - state = cli._slash_confirm_state - if state and "response_queue" in state: - state["response_queue"].put("once") - - submitter = threading.Thread(target=_submit_after_delay, daemon=True) - t = threading.Thread(target=run_on_daemon, daemon=True) - submitter.start() - t.start() - t.join(timeout=2.0) - submitter.join(timeout=2.0) - assert not t.is_alive(), "daemon thread hung — modal deadlocked" - assert result_holder["stdin_called"] is False, "win32 must NOT use the input() fallback" - assert result_holder["result"] == "once" - assert setup_calls == ["capture"] - assert teardown_calls == ["restore"] - - def test_main_thread_non_windows_uses_modal(self): - """On macOS/Linux main thread, the queue-based modal is still used.""" - cli = _make_cli() - - # We need to simulate the modal receiving a response. We'll patch - # the response_queue to immediately return a value. with patch.object(sys, "platform", "darwin"), \ patch.object(cli, "_capture_modal_input_snapshot"), \ patch.object(cli, "_restore_modal_input_snapshot"), \ - patch.object(cli, "_invalidate"): - # Start the modal in a way that it will receive a response - # immediately via the queue. - original_queue = queue.Queue - original_time = time.monotonic + patch.object(cli, "_invalidate"), \ + patch.object(cli, "_prompt_text_input") as mock_stdin: + answerer = threading.Thread(target=_answer_modal_when_open, args=(cli, "once"), daemon=True) + answerer.start() + result = cli._prompt_text_input_modal( + title="⚠️ /new", + detail="This starts a fresh session.", + choices=_SAMPLE_CHOICES, + timeout=5, + ) + answerer.join(timeout=2.0) - def _fake_modal_flow(*args, **kwargs): - """Simulate the modal flow: set state, put response, return.""" - # We'll directly test that the modal path is entered by - # checking that _slash_confirm_state was set. - pass - - # Since we can't easily mock the internal queue, let's test - # that the modal path is entered by checking that - # _prompt_text_input was NOT called. - with patch.object(cli, "_prompt_text_input") as mock_stdin: - # Set up a response that will be put into the queue - # after the modal starts waiting. - def _submit_after_delay(): - time.sleep(0.2) - state = cli._slash_confirm_state - if state and "response_queue" in state: - state["response_queue"].put("once") - - submitter = threading.Thread(target=_submit_after_delay, daemon=True) - submitter.start() - - result = cli._prompt_text_input_modal( - title="⚠️ /new", - detail="This starts a fresh session.", - choices=_SAMPLE_CHOICES, - timeout=5, - ) - - submitter.join(timeout=2.0) - - # The stdin fallback should NOT have been called. - mock_stdin.assert_not_called() - # The result should be "once" from the simulated modal response. - assert result == "once" + mock_stdin.assert_not_called() + assert result == "once" def test_no_app_falls_back_to_stdin(self): - """Without a prompt_toolkit app, always use stdin fallback.""" + """Without a running app (oneshot / non-interactive), use the stdin prompt.""" cli = _make_cli() cli._app = None @@ -218,78 +159,102 @@ class TestModalWindowsFallback: mock_stdin.assert_called_once_with("Choice [1/2/3]: ") assert result == "3" - def test_empty_choices_returns_none(self): - """Empty choices list should return None without prompting.""" - cli = _make_cli() - - with patch.object(cli, "_prompt_text_input") as mock_stdin: - result = cli._prompt_text_input_modal( - title="Test", - detail="Test", - choices=[], - ) - - mock_stdin.assert_not_called() - assert result is None - - def test_windows_fallback_does_not_set_modal_state(self): - """Verify Windows fallback doesn't leave _slash_confirm_state set.""" + def test_windows_no_app_falls_back_to_stdin(self): + """win32 without a running app keeps stdin — the only case where the raw + prompt is safe on Windows, since no app owns the console to deadlock.""" cli = _make_cli() + cli._app = None with patch.object(sys, "platform", "win32"), \ - patch.object(cli, "_prompt_text_input", return_value="1"): - cli._prompt_text_input_modal( - title="⚠️ /reset", + patch.object(cli, "_prompt_text_input", return_value="1") as mock_stdin: + result = cli._prompt_text_input_modal( + title="⚠️ /new — destroys conversation state", detail="This starts a fresh session.", choices=_SAMPLE_CHOICES, ) + mock_stdin.assert_called_once_with("Choice [1/2/3]: ") + assert result == "1" + + def test_windows_scheduling_failure_clean_cancels(self): + """win32 off the main thread: if marshaling onto the app loop fails, cancel + cleanly (None) rather than fall to raw input() (which deadlocks on native + Windows) or hang. Asserts the _stdin_fallback guard (#33961).""" + cli = _make_cli() + + def _raise(_cb): + raise RuntimeError("loop closed") + + outcome = _run_on_daemon( + lambda: cli._prompt_text_input_modal( + title="⚠️ /reset", + detail="This starts a fresh session.", + choices=_SAMPLE_CHOICES, + timeout=5, + ), + cli, + platform="win32", + response="once", + schedule=_raise, + ) + assert outcome["stdin_called"] is False, "win32 off-thread must NOT call raw input()" + assert outcome["result"] is None assert cli._slash_confirm_state is None - def test_non_main_thread_modal_clears_state(self): - """Verify daemon-thread modal teardown does not leave state behind.""" + @pytest.mark.parametrize( + "platform, expect_stdin, expect_result", + [("win32", False, None), ("linux", True, "1")], + ) + def test_daemon_thread_no_app_loop_uses_fallback(self, platform, expect_stdin, expect_result): + """Off the daemon thread with no resolvable app loop (``self._app.loop`` + is None / raises), the modal can never be scheduled, so the method short- + circuits at the app_loop-is-None site (cli.py ~7260) — a distinct path + from a call_soon_threadsafe failure. win32 clean-cancels (None) instead of + deadlocking on raw input(); other platforms keep the stdin prompt.""" cli = _make_cli() - errors = [] + cli._app.loop = None # forces app_loop is None, off the main thread - def _call_soon_threadsafe(callback): - callback() + outcome = {"result": None, "stdin_called": False} + done = threading.Event() - def run_on_daemon(): + def _worker(): try: - with patch.object(sys, "platform", "linux"), \ - patch.object(cli._app.loop, "call_soon_threadsafe", side_effect=_call_soon_threadsafe): - def _submit_after_delay(): - time.sleep(0.2) - state = cli._slash_confirm_state - if state and "response_queue" in state: - state["response_queue"].put("cancel") - - submitter = threading.Thread(target=_submit_after_delay, daemon=True) - submitter.start() - cli._prompt_text_input_modal( - title="⚠️ /new", + with patch.object(sys, "platform", platform), \ + patch.object(cli, "_prompt_text_input", return_value="1") as mock_stdin, \ + patch.object(cli, "_invalidate"): + outcome["result"] = cli._prompt_text_input_modal( + title="⚠️ /reset", detail="This starts a fresh session.", choices=_SAMPLE_CHOICES, timeout=5, ) - submitter.join(timeout=2.0) - if cli._slash_confirm_state is not None: - errors.append("_slash_confirm_state should be None") - except Exception as exc: - errors.append(str(exc)) + outcome["stdin_called"] = mock_stdin.called + finally: + done.set() - t = threading.Thread(target=run_on_daemon, daemon=True) - t.start() - t.join(timeout=2.0) - assert not errors, f"unexpected errors: {errors}" + worker = threading.Thread(target=_worker, daemon=True) + worker.start() + worker.join(timeout=2.0) + assert not worker.is_alive(), "daemon thread hung — modal deadlocked" + assert outcome["stdin_called"] is expect_stdin + assert outcome["result"] == expect_result assert cli._slash_confirm_state is None + def test_empty_choices_returns_none(self): + """Empty choices returns None without prompting.""" + cli = _make_cli() + + with patch.object(cli, "_prompt_text_input") as mock_stdin: + result = cli._prompt_text_input_modal(title="Test", detail="Test", choices=[]) + + mock_stdin.assert_not_called() + assert result is None + class TestConfirmDestructiveSlashWindows: - """Integration-level tests for _confirm_destructive_slash on Windows.""" + """End-to-end _confirm_destructive_slash on the native-Windows daemon thread.""" - def test_confirm_destructive_slash_bypasses_modal_on_windows(self): - """_confirm_destructive_slash should work on Windows via stdin fallback.""" + def _make_interactive_cli(self): cli = _make_cli() cli.model = "test-model" cli._agent_running = False @@ -300,37 +265,26 @@ class TestConfirmDestructiveSlashWindows: cli._pending_tool_info = {} cli._tool_start_time = 0.0 cli._last_scrollback_tool = "" + return cli - with patch.object(sys, "platform", "win32"), \ - patch.object(cli, "_prompt_text_input", return_value="1"), \ - patch("cli.load_cli_config", return_value={"approvals": {"destructive_slash_confirm": True}}): - result = cli._confirm_destructive_slash( - "new", - "This starts a fresh session.\nThe current conversation history will be discarded.", + @pytest.mark.parametrize( + "response, expected", + [("once", "once"), ("cancel", None)], + ) + def test_confirm_destructive_slash_uses_modal_on_windows(self, response, expected): + """On native Windows, the bare /new confirm drives the modal (not stdin) + and returns the chosen outcome — the bug #33961 froze this path.""" + cli = self._make_interactive_cli() + with patch("cli.load_cli_config", return_value={"approvals": {"destructive_slash_confirm": True}}): + outcome = _run_on_daemon( + lambda: cli._confirm_destructive_slash( + "new", + "This starts a fresh session.\nThe current conversation history will be discarded.", + ), + cli, + platform="win32", + response=response, ) - assert result == "once" - - def test_confirm_destructive_slash_cancelled_on_windows(self): - """Cancellation via stdin fallback works on Windows.""" - cli = _make_cli() - cli.model = "test-model" - cli._agent_running = False - cli._spinner_text = "" - cli._should_exit = False - cli._command_running = False - cli.session_id = "test-session" - cli._pending_tool_info = {} - cli._tool_start_time = 0.0 - cli._last_scrollback_tool = "" - - with patch.object(sys, "platform", "win32"), \ - patch.object(cli, "_prompt_text_input", return_value="3"), \ - patch("cli.load_cli_config", return_value={"approvals": {"destructive_slash_confirm": True}}): - result = cli._confirm_destructive_slash( - "reset", - "This starts a fresh session.\nThe current conversation history will be discarded.", - ) - - # Choice "3" normalizes to "cancel", which returns None. - assert result is None + assert outcome["stdin_called"] is False + assert outcome["result"] == expected