diff --git a/cli.py b/cli.py index 95834959cd..e2ee9a2676 100755 --- a/cli.py +++ b/cli.py @@ -3617,11 +3617,14 @@ class HermesCLI: def _voice_stop_and_transcribe(self): """Stop recording, transcribe via STT, and queue the transcript as input.""" - # Atomic guard: only one thread can enter stop-and-transcribe + # Atomic guard: only one thread can enter stop-and-transcribe. + # Set _voice_processing immediately so concurrent Ctrl+B presses + # don't race into the START path while recorder.stop() holds its lock. with self._voice_lock: if not self._voice_recording: return self._voice_recording = False + self._voice_processing = True submitted = False wav_path = None @@ -3642,8 +3645,7 @@ class HermesCLI: _cprint(f"{_DIM}No speech detected.{_RST}") return - with self._voice_lock: - self._voice_processing = True + # _voice_processing is already True (set atomically above) if hasattr(self, '_app') and self._app: self._app.invalidate() _cprint(f"{_DIM}Transcribing...{_RST}") @@ -4864,7 +4866,12 @@ class HermesCLI: @kb.add(_voice_key) def handle_voice_record(event): - """Toggle voice recording when voice mode is active.""" + """Toggle voice recording when voice mode is active. + + IMPORTANT: This handler runs in prompt_toolkit's event-loop thread. + Any blocking call here (locks, sd.wait, disk I/O) freezes the + entire UI. All heavy work is dispatched to daemon threads. + """ if not cli_ref._voice_mode: return # Always allow STOPPING a recording (even when agent is running) @@ -4884,21 +4891,38 @@ class HermesCLI: return if cli_ref._clarify_state or cli_ref._sudo_state or cli_ref._approval_state: return - try: - # Interrupt TTS if playing, so user can start talking - if not cli_ref._voice_tts_done.is_set(): - try: - from tools.voice_mode import stop_playback - stop_playback() - cli_ref._voice_tts_done.set() - except Exception: - pass - with cli_ref._voice_lock: - cli_ref._voice_continuous = True - cli_ref._voice_start_recording() - event.app.invalidate() - except Exception as e: - _cprint(f"\n{_DIM}Voice recording failed: {e}{_RST}") + # Guard: don't start while a previous stop/transcribe cycle is + # still running — recorder.stop() holds AudioRecorder._lock and + # start() would block the event-loop thread waiting for it. + if cli_ref._voice_processing: + return + + # Interrupt TTS if playing, so user can start talking. + # stop_playback() is fast (just terminates a subprocess). + if not cli_ref._voice_tts_done.is_set(): + try: + from tools.voice_mode import stop_playback + stop_playback() + cli_ref._voice_tts_done.set() + except Exception: + pass + + with cli_ref._voice_lock: + cli_ref._voice_continuous = True + + # Dispatch to a daemon thread so play_beep(sd.wait), + # AudioRecorder.start(lock acquire), and config I/O + # never block the prompt_toolkit event loop. + def _start_recording(): + try: + cli_ref._voice_start_recording() + if hasattr(cli_ref, '_app') and cli_ref._app: + cli_ref._app.invalidate() + except Exception as e: + _cprint(f"\n{_DIM}Voice recording failed: {e}{_RST}") + + threading.Thread(target=_start_recording, daemon=True).start() + event.app.invalidate() from prompt_toolkit.keys import Keys @kb.add(Keys.BracketedPaste, eager=True) diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py index 32f48e19ce..e7be698d3b 100644 --- a/tests/tools/test_voice_cli_integration.py +++ b/tests/tools/test_voice_cli_integration.py @@ -732,6 +732,96 @@ class TestBrowserToolSignalHandlerRemoved: ) +class TestKeyHandlerNeverBlocks: + """The Ctrl+B key handler runs in prompt_toolkit's event-loop thread. + Any blocking call freezes the entire UI. Verify that: + 1. _voice_start_recording is NOT called directly (must be in daemon thread) + 2. _voice_processing guard prevents starting while stop/transcribe runs + 3. _voice_processing is set atomically with _voice_recording in stop_and_transcribe + """ + + def test_start_recording_not_called_directly_in_handler(self): + """AST check: handle_voice_record must NOT call _voice_start_recording() + directly — it must wrap it in a Thread to avoid blocking the UI.""" + import ast as _ast + + with open("cli.py") as f: + tree = _ast.parse(f.read()) + + for node in _ast.walk(tree): + if isinstance(node, _ast.FunctionDef) and node.name == "handle_voice_record": + # Collect all direct calls to _voice_start_recording in this function. + # They should ONLY appear inside a nested def (the _start_recording wrapper). + for child in _ast.iter_child_nodes(node): + # Direct statements in the handler body (not nested defs) + if isinstance(child, _ast.Expr) and isinstance(child.value, _ast.Call): + call_src = _ast.dump(child.value) + assert "_voice_start_recording" not in call_src, ( + "handle_voice_record calls _voice_start_recording directly " + "— must dispatch to a daemon thread" + ) + break + + def test_processing_guard_in_start_path(self): + """Source check: key handler must check _voice_processing before + starting a new recording.""" + with open("cli.py") as f: + source = f.read() + + lines = source.split("\n") + in_handler = False + in_else = False + found_guard = False + for line in lines: + if "def handle_voice_record" in line: + in_handler = True + elif in_handler and line.strip().startswith("def ") and "_start_recording" not in line: + break + elif in_handler and "else:" in line: + in_else = True + elif in_else and "_voice_processing" in line: + found_guard = True + break + + assert found_guard, ( + "Key handler START path must guard against _voice_processing " + "to prevent blocking on AudioRecorder._lock" + ) + + def test_processing_set_atomically_with_recording_false(self): + """Source check: _voice_stop_and_transcribe must set _voice_processing = True + in the same lock block where it sets _voice_recording = False.""" + with open("cli.py") as f: + source = f.read() + + lines = source.split("\n") + in_method = False + in_first_lock = False + found_recording_false = False + found_processing_true = False + for line in lines: + if "def _voice_stop_and_transcribe" in line: + in_method = True + elif in_method and "with self._voice_lock:" in line and not in_first_lock: + in_first_lock = True + elif in_first_lock: + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + if "_voice_recording = False" in stripped: + found_recording_false = True + if "_voice_processing = True" in stripped: + found_processing_true = True + # End of with block (dedent) + if stripped and not line.startswith(" ") and not line.startswith("\t\t\t"): + break + + assert found_recording_false and found_processing_true, ( + "_voice_stop_and_transcribe must set _voice_processing = True " + "atomically (same lock block) with _voice_recording = False" + ) + + # ============================================================================ # Real behavior tests — CLI voice methods via _make_voice_cli() # ============================================================================