fix: fix silence detection bugs and add Phase 4 voice mode features

Fix 3 critical bugs in silence detection: - Micro-pause tolerance now tracks dip duration (not time since speech start) - Peak RMS check in stop() prevents discarding recordings with real speech - Reduced min_speech_duration from 0.5s to 0.3s for reliable speech confirmation Phase 4 features: configurable silence params, visual audio level indicator, voice system prompt, tool call audio cues, TTS interrupt, continuous mode auto-restart, interruptable playback via Popen tracking.
2026-04-26 01:01:40 +00:00 · 2026-03-03 20:43:22 +03:00 · 2026-03-03 20:43:22 +03:00 · dad865e920
commit dad865e920
parent 32b033c11c
4 changed files with 245 additions and 23 deletions
--- a/tests/tools/test_voice_mode.py
+++ b/tests/tools/test_voice_mode.py
@ -157,6 +157,7 @@ class TestAudioRecorderStop:
        # Simulate captured audio frames (1 second of loud audio above RMS threshold)
        frame = np.full((SAMPLE_RATE, 1), 1000, dtype="int16")
        recorder._frames = [frame]
+        recorder._peak_rms = 1000  # Peak RMS above threshold

        wav_path = recorder.stop()

@ -203,6 +204,7 @@ class TestAudioRecorderStop:
        # 1 second of near-silence (RMS well below threshold)
        frame = np.full((SAMPLE_RATE, 1), 10, dtype="int16")
        recorder._frames = [frame]
+        recorder._peak_rms = 10  # Peak RMS also below threshold

        wav_path = recorder.stop()
        assert wav_path is None
@ -475,8 +477,9 @@ class TestSilenceDetection:
        from tools.voice_mode import AudioRecorder, SAMPLE_RATE

        recorder = AudioRecorder()
-        # Use very short silence duration for testing
+        # Use very short durations for testing
        recorder._silence_duration = 0.05
+        recorder._min_speech_duration = 0.05

        fired = threading.Event()

@ -490,9 +493,11 @@ class TestSilenceDetection:
        if callback is None:
            callback = mock_sd.InputStream.call_args[1]["callback"]

-        # Simulate loud audio (speech) -- RMS well above threshold
+        # Simulate sustained speech (multiple loud chunks to exceed min_speech_duration)
        loud_frame = np.full((1600, 1), 5000, dtype="int16")
        callback(loud_frame, 1600, None, None)
+        time.sleep(0.06)
+        callback(loud_frame, 1600, None, None)
        assert recorder._has_spoken is True

        # Simulate silence
@ -537,6 +542,47 @@ class TestSilenceDetection:

        recorder.cancel()

+    def test_micro_pause_tolerance_during_speech(self, mock_sd):
+        """Brief dips below threshold during speech should NOT reset speech tracking."""
+        np = pytest.importorskip("numpy")
+        import threading
+
+        mock_stream = MagicMock()
+        mock_sd.InputStream.return_value = mock_stream
+
+        from tools.voice_mode import AudioRecorder
+
+        recorder = AudioRecorder()
+        recorder._silence_duration = 0.05
+        recorder._min_speech_duration = 0.15
+        recorder._max_dip_tolerance = 0.1
+
+        fired = threading.Event()
+        recorder.start(on_silence_stop=lambda: fired.set())
+
+        callback = mock_sd.InputStream.call_args.kwargs.get("callback")
+        if callback is None:
+            callback = mock_sd.InputStream.call_args[1]["callback"]
+
+        loud_frame = np.full((1600, 1), 5000, dtype="int16")
+        quiet_frame = np.full((1600, 1), 50, dtype="int16")
+
+        # Speech chunk 1
+        callback(loud_frame, 1600, None, None)
+        time.sleep(0.05)
+        # Brief micro-pause (dip < max_dip_tolerance)
+        callback(quiet_frame, 1600, None, None)
+        time.sleep(0.05)
+        # Speech resumes -- speech_start should NOT have been reset
+        callback(loud_frame, 1600, None, None)
+        assert recorder._speech_start > 0, "Speech start should be preserved across brief dips"
+        time.sleep(0.06)
+        # Another speech chunk to exceed min_speech_duration
+        callback(loud_frame, 1600, None, None)
+        assert recorder._has_spoken is True, "Speech should be confirmed after tolerating micro-pause"
+
+        recorder.cancel()
+
    def test_no_callback_means_no_silence_detection(self, mock_sd):
        np = pytest.importorskip("numpy")