mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-26 01:01:40 +00:00
fix: fix silence detection bugs and add Phase 4 voice mode features
Fix 3 critical bugs in silence detection: - Micro-pause tolerance now tracks dip duration (not time since speech start) - Peak RMS check in stop() prevents discarding recordings with real speech - Reduced min_speech_duration from 0.5s to 0.3s for reliable speech confirmation Phase 4 features: configurable silence params, visual audio level indicator, voice system prompt, tool call audio cues, TTS interrupt, continuous mode auto-restart, interruptable playback via Popen tracking.
This commit is contained in:
parent
32b033c11c
commit
dad865e920
4 changed files with 245 additions and 23 deletions
|
|
@ -157,6 +157,7 @@ class TestAudioRecorderStop:
|
|||
# Simulate captured audio frames (1 second of loud audio above RMS threshold)
|
||||
frame = np.full((SAMPLE_RATE, 1), 1000, dtype="int16")
|
||||
recorder._frames = [frame]
|
||||
recorder._peak_rms = 1000 # Peak RMS above threshold
|
||||
|
||||
wav_path = recorder.stop()
|
||||
|
||||
|
|
@ -203,6 +204,7 @@ class TestAudioRecorderStop:
|
|||
# 1 second of near-silence (RMS well below threshold)
|
||||
frame = np.full((SAMPLE_RATE, 1), 10, dtype="int16")
|
||||
recorder._frames = [frame]
|
||||
recorder._peak_rms = 10 # Peak RMS also below threshold
|
||||
|
||||
wav_path = recorder.stop()
|
||||
assert wav_path is None
|
||||
|
|
@ -475,8 +477,9 @@ class TestSilenceDetection:
|
|||
from tools.voice_mode import AudioRecorder, SAMPLE_RATE
|
||||
|
||||
recorder = AudioRecorder()
|
||||
# Use very short silence duration for testing
|
||||
# Use very short durations for testing
|
||||
recorder._silence_duration = 0.05
|
||||
recorder._min_speech_duration = 0.05
|
||||
|
||||
fired = threading.Event()
|
||||
|
||||
|
|
@ -490,9 +493,11 @@ class TestSilenceDetection:
|
|||
if callback is None:
|
||||
callback = mock_sd.InputStream.call_args[1]["callback"]
|
||||
|
||||
# Simulate loud audio (speech) -- RMS well above threshold
|
||||
# Simulate sustained speech (multiple loud chunks to exceed min_speech_duration)
|
||||
loud_frame = np.full((1600, 1), 5000, dtype="int16")
|
||||
callback(loud_frame, 1600, None, None)
|
||||
time.sleep(0.06)
|
||||
callback(loud_frame, 1600, None, None)
|
||||
assert recorder._has_spoken is True
|
||||
|
||||
# Simulate silence
|
||||
|
|
@ -537,6 +542,47 @@ class TestSilenceDetection:
|
|||
|
||||
recorder.cancel()
|
||||
|
||||
def test_micro_pause_tolerance_during_speech(self, mock_sd):
|
||||
"""Brief dips below threshold during speech should NOT reset speech tracking."""
|
||||
np = pytest.importorskip("numpy")
|
||||
import threading
|
||||
|
||||
mock_stream = MagicMock()
|
||||
mock_sd.InputStream.return_value = mock_stream
|
||||
|
||||
from tools.voice_mode import AudioRecorder
|
||||
|
||||
recorder = AudioRecorder()
|
||||
recorder._silence_duration = 0.05
|
||||
recorder._min_speech_duration = 0.15
|
||||
recorder._max_dip_tolerance = 0.1
|
||||
|
||||
fired = threading.Event()
|
||||
recorder.start(on_silence_stop=lambda: fired.set())
|
||||
|
||||
callback = mock_sd.InputStream.call_args.kwargs.get("callback")
|
||||
if callback is None:
|
||||
callback = mock_sd.InputStream.call_args[1]["callback"]
|
||||
|
||||
loud_frame = np.full((1600, 1), 5000, dtype="int16")
|
||||
quiet_frame = np.full((1600, 1), 50, dtype="int16")
|
||||
|
||||
# Speech chunk 1
|
||||
callback(loud_frame, 1600, None, None)
|
||||
time.sleep(0.05)
|
||||
# Brief micro-pause (dip < max_dip_tolerance)
|
||||
callback(quiet_frame, 1600, None, None)
|
||||
time.sleep(0.05)
|
||||
# Speech resumes -- speech_start should NOT have been reset
|
||||
callback(loud_frame, 1600, None, None)
|
||||
assert recorder._speech_start > 0, "Speech start should be preserved across brief dips"
|
||||
time.sleep(0.06)
|
||||
# Another speech chunk to exceed min_speech_duration
|
||||
callback(loud_frame, 1600, None, None)
|
||||
assert recorder._has_spoken is True, "Speech should be confirmed after tolerating micro-pause"
|
||||
|
||||
recorder.cancel()
|
||||
|
||||
def test_no_callback_means_no_silence_detection(self, mock_sd):
|
||||
np = pytest.importorskip("numpy")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue