fix(tui): restore voice push-to-talk parity (#20897)

* fix(tui): restore classic CLI voice push-to-talk parity (cherry picked from commit 93b9ae301b) * fix(tui): harden voice push-to-talk stop flow Address review feedback from PR #16189 by stopping the active recorder before background transcription, documenting single-shot voice capture, and covering the TUI gateway flags with regression tests. * fix(tui): preserve silent voice strike tracking Keep single-shot voice recording's no-speech counter alive across starts so the TUI can still emit the three-strikes auto-disable event, and bind the auto-restart state at module scope for type checking. * fix(tui): clean up voice stop failure path Address follow-up review by naming the TUI flow as single-shot push-to-talk and cancelling the recorder when forced stop cannot produce a WAV. * fix(tui): report busy voice capture starts Return explicit start state from the voice wrapper so the TUI gateway does not report recording while forced-stop transcription is still cleaning up. * fix(tui): handle busy voice record responses Apply the gateway busy status immediately in the TUI and route forced-stop voice events to the session that sent the stop request. * fix(tui): clear voice recording on null response Treat a null voice.record RPC result as a failed optimistic start so the REC badge cannot stick after gateway-side errors. * fix(tui): count silent manual voice stops Preserve single-shot voice no-speech strikes through forced stop transcription so empty push-to-talk captures still trigger the three-strikes guard. --------- Co-authored-by: Montbra <montbra@gmail.com>
2026-05-13 03:52:00 +00:00 · 2026-05-06 15:49:59 -07:00 · 2026-05-06 15:49:59 -07:00 · 04cf4788cc
commit 04cf4788cc
parent 5ccab51fa8
7 changed files with 527 additions and 57 deletions
--- a/tests/hermes_cli/test_voice_wrapper.py
+++ b/tests/hermes_cli/test_voice_wrapper.py
@ -309,6 +309,7 @@ class TestContinuousAPI:

        # Isolate from any state left behind by other tests in the session.
        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_stopping", False, raising=False)
        monkeypatch.setattr(voice, "_continuous_recorder", None)

        assert voice.is_continuous_active() is False
@ -343,11 +344,20 @@ class TestContinuousAPI:

        monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder())

-        voice.start_continuous(on_transcript=lambda _t: None)
+        started = voice.start_continuous(on_transcript=lambda _t: None)

        # The guard inside start_continuous short-circuits before rec.start()
+        assert started is True
        assert called["n"] == 0

+    def test_start_returns_false_while_stopping(self, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_stopping", True, raising=False)
+
+        assert voice.start_continuous(on_transcript=lambda _t: None) is False
+

 class TestContinuousLoopSimulation:
    """End-to-end simulation of the VAD loop with a fake recorder.
@ -368,6 +378,8 @@ class TestContinuousLoopSimulation:
        monkeypatch.setattr(voice, "_continuous_on_transcript", None)
        monkeypatch.setattr(voice, "_continuous_on_status", None)
        monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
+        monkeypatch.setattr(voice, "_continuous_auto_restart", True, raising=False)
+        monkeypatch.setattr(voice, "_play_beep", lambda *_, **__: None)

        class FakeRecorder:
            _silence_threshold = 200
@ -381,13 +393,20 @@ class TestContinuousLoopSimulation:
                self.cancelled = 0
                # Preset WAV path returned by stop()
                self.next_stop_wav = "/tmp/fake.wav"
+                self.fail_stop = False
+                self.fail_next_start = False

            def start(self, on_silence_stop=None):
+                if self.fail_next_start:
+                    self.fail_next_start = False
+                    raise RuntimeError("boom")
                self.start_calls += 1
                self.last_callback = on_silence_stop
                self.is_recording = True

            def stop(self):
+                if self.fail_stop:
+                    raise RuntimeError("stop failed")
                self.stopped += 1
                self.is_recording = False
                return self.next_stop_wav
@ -433,6 +452,204 @@ class TestContinuousLoopSimulation:

        voice.stop_continuous()

+    def test_auto_restart_false_stops_after_first_transcript(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "single shot"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        statuses = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_status=lambda s: statuses.append(s),
+            auto_restart=False,
+        )
+        fake_recorder.last_callback()
+
+        assert transcripts == ["single shot"]
+        assert fake_recorder.start_calls == 1
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+
+    def test_auto_restart_false_retains_silent_strikes_across_starts(
+        self, fake_recorder, monkeypatch
+    ):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": ""},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        silent_limit_fired = []
+
+        for _ in range(3):
+            voice.start_continuous(
+                on_transcript=lambda _t: None,
+                on_silent_limit=lambda: silent_limit_fired.append(True),
+                auto_restart=False,
+            )
+            fake_recorder.last_callback()
+
+        assert silent_limit_fired == [True]
+        assert voice.is_continuous_active() is False
+        assert fake_recorder.start_calls == 3
+
+    def test_force_transcribe_stop_delivers_current_buffer(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        class ImmediateThread:
+            def __init__(self, target, daemon=False):
+                self.target = target
+
+            def start(self):
+                self.target()
+
+        monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "manual stop"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        statuses = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_status=lambda s: statuses.append(s),
+        )
+        voice.stop_continuous(force_transcribe=True)
+
+        assert fake_recorder.stopped == 1
+        assert transcripts == ["manual stop"]
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+
+    def test_force_transcribe_empty_single_shots_hit_silent_limit(
+        self, fake_recorder, monkeypatch
+    ):
+        import hermes_cli.voice as voice
+
+        class ImmediateThread:
+            def __init__(self, target, daemon=False):
+                self.target = target
+
+            def start(self):
+                self.target()
+
+        monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": ""},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        silent_limit_fired = []
+
+        for _ in range(3):
+            voice.start_continuous(
+                on_transcript=lambda _t: None,
+                on_silent_limit=lambda: silent_limit_fired.append(True),
+                auto_restart=False,
+            )
+            voice.stop_continuous(force_transcribe=True)
+
+        assert silent_limit_fired == [True]
+        assert fake_recorder.stopped == 3
+        assert voice._continuous_no_speech_count == 0
+
+    def test_force_transcribe_valid_single_shot_resets_silent_strikes(
+        self, fake_recorder, monkeypatch
+    ):
+        import hermes_cli.voice as voice
+
+        class ImmediateThread:
+            def __init__(self, target, daemon=False):
+                self.target = target
+
+            def start(self):
+                self.target()
+
+        monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
+        monkeypatch.setattr(voice, "_continuous_no_speech_count", 2)
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "manual stop"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        silent_limit_fired = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_silent_limit=lambda: silent_limit_fired.append(True),
+            auto_restart=False,
+        )
+        voice.stop_continuous(force_transcribe=True)
+
+        assert transcripts == ["manual stop"]
+        assert silent_limit_fired == []
+        assert voice._continuous_no_speech_count == 0
+
+    def test_force_transcribe_stop_failure_cancels_and_clears_stopping(
+        self, fake_recorder, monkeypatch
+    ):
+        import hermes_cli.voice as voice
+
+        class ImmediateThread:
+            def __init__(self, target, daemon=False):
+                self.target = target
+
+            def start(self):
+                self.target()
+
+        monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
+        fake_recorder.fail_stop = True
+
+        statuses = []
+        voice.start_continuous(
+            on_transcript=lambda _t: None,
+            on_status=lambda s: statuses.append(s),
+        )
+        voice.stop_continuous(force_transcribe=True)
+
+        assert fake_recorder.cancelled == 1
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+        assert voice._continuous_stopping is False
+
+    def test_restart_failure_reports_idle(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "hello world"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        statuses = []
+        voice.start_continuous(on_transcript=lambda _t: None, on_status=statuses.append)
+
+        fake_recorder.fail_next_start = True
+        fake_recorder.last_callback()
+
+        assert statuses == ["listening", "transcribing", "idle"]
+        assert voice.is_continuous_active() is False
+
    def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
        import hermes_cli.voice as voice