mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(tui): restore voice push-to-talk parity (#20897)
* fix(tui): restore classic CLI voice push-to-talk parity
(cherry picked from commit 93b9ae301b)
* fix(tui): harden voice push-to-talk stop flow
Address review feedback from PR #16189 by stopping the active recorder before background transcription, documenting single-shot voice capture, and covering the TUI gateway flags with regression tests.
* fix(tui): preserve silent voice strike tracking
Keep single-shot voice recording's no-speech counter alive across starts so the TUI can still emit the three-strikes auto-disable event, and bind the auto-restart state at module scope for type checking.
* fix(tui): clean up voice stop failure path
Address follow-up review by naming the TUI flow as single-shot push-to-talk and cancelling the recorder when forced stop cannot produce a WAV.
* fix(tui): report busy voice capture starts
Return explicit start state from the voice wrapper so the TUI gateway does not report recording while forced-stop transcription is still cleaning up.
* fix(tui): handle busy voice record responses
Apply the gateway busy status immediately in the TUI and route forced-stop voice events to the session that sent the stop request.
* fix(tui): clear voice recording on null response
Treat a null voice.record RPC result as a failed optimistic start so the REC badge cannot stick after gateway-side errors.
* fix(tui): count silent manual voice stops
Preserve single-shot voice no-speech strikes through forced stop transcription so empty push-to-talk captures still trigger the three-strikes guard.
---------
Co-authored-by: Montbra <montbra@gmail.com>
This commit is contained in:
parent
5ccab51fa8
commit
04cf4788cc
7 changed files with 527 additions and 57 deletions
|
|
@ -281,6 +281,8 @@ _recorder_lock = threading.Lock()
|
|||
# ── Continuous (VAD) state ───────────────────────────────────────────
|
||||
_continuous_lock = threading.Lock()
|
||||
_continuous_active = False
|
||||
_continuous_stopping = False
|
||||
_continuous_auto_restart: bool = True
|
||||
_continuous_recorder: Any = None
|
||||
|
||||
# ── TTS-vs-STT feedback guard ────────────────────────────────────────
|
||||
|
|
@ -370,32 +372,43 @@ def start_continuous(
|
|||
on_silent_limit: Optional[Callable[[], None]] = None,
|
||||
silence_threshold: int = 200,
|
||||
silence_duration: float = 3.0,
|
||||
) -> None:
|
||||
auto_restart: bool = True,
|
||||
) -> bool:
|
||||
"""Start a VAD-driven continuous recording loop.
|
||||
|
||||
The loop calls ``on_transcript(text)`` each time speech is detected and
|
||||
transcribed successfully, then auto-restarts. After
|
||||
``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
|
||||
picked up at all) the loop stops itself and calls ``on_silent_limit``
|
||||
so the UI can reflect "voice off". Idempotent — calling while already
|
||||
active is a no-op.
|
||||
transcribed successfully. If ``auto_restart`` is True, it auto-restarts
|
||||
for the next turn and resets the no-speech counter for that loop. If
|
||||
``auto_restart`` is False, the first silence-triggered transcription ends
|
||||
the loop and reports ``"idle"``; no-speech counts are retained across
|
||||
starts so a push-to-talk caller can still enforce the three-strikes guard.
|
||||
After ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
|
||||
picked up at all) the loop stops itself and calls ``on_silent_limit`` so the
|
||||
UI can reflect "voice off". Returns False if a previous stop is still
|
||||
transcribing/cleaning up; otherwise returns True. Idempotent — calling while
|
||||
already active is a successful no-op.
|
||||
|
||||
``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
|
||||
``"idle"`` so the UI can show a live indicator.
|
||||
"""
|
||||
global _continuous_active, _continuous_recorder
|
||||
global _continuous_active, _continuous_recorder, _continuous_auto_restart
|
||||
global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if _continuous_active:
|
||||
_debug("start_continuous: already active — no-op")
|
||||
return
|
||||
return True
|
||||
if _continuous_stopping:
|
||||
_debug("start_continuous: stop/transcribe in progress — busy")
|
||||
return False
|
||||
_continuous_active = True
|
||||
_continuous_auto_restart = auto_restart
|
||||
_continuous_on_transcript = on_transcript
|
||||
_continuous_on_status = on_status
|
||||
_continuous_on_silent_limit = on_silent_limit
|
||||
_continuous_no_speech_count = 0
|
||||
if auto_restart:
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if _continuous_recorder is None:
|
||||
_continuous_recorder = create_audio_recorder()
|
||||
|
|
@ -428,15 +441,18 @@ def start_continuous(
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
def stop_continuous() -> None:
|
||||
|
||||
def stop_continuous(force_transcribe: bool = False) -> None:
|
||||
"""Stop the active continuous loop and release the microphone.
|
||||
|
||||
Idempotent — calling while not active is a no-op. Any in-flight
|
||||
transcription completes but its result is discarded (the callback
|
||||
checks ``_continuous_active`` before firing).
|
||||
Idempotent — calling while not active is a no-op. If ``force_transcribe`` is
|
||||
True, the recorder stops synchronously, then transcription/cleanup runs on a
|
||||
background thread before reporting ``"idle"``. Otherwise the buffer is
|
||||
discarded.
|
||||
"""
|
||||
global _continuous_active, _continuous_on_transcript
|
||||
global _continuous_active, _continuous_on_transcript, _continuous_stopping
|
||||
global _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_recorder, _continuous_no_speech_count
|
||||
|
||||
|
|
@ -446,18 +462,98 @@ def stop_continuous() -> None:
|
|||
_continuous_active = False
|
||||
rec = _continuous_recorder
|
||||
on_status = _continuous_on_status
|
||||
on_transcript = _continuous_on_transcript
|
||||
on_silent_limit = _continuous_on_silent_limit
|
||||
auto_restart = _continuous_auto_restart
|
||||
track_no_speech = force_transcribe and not auto_restart
|
||||
_continuous_stopping = rec is not None
|
||||
_continuous_on_transcript = None
|
||||
_continuous_on_status = None
|
||||
_continuous_on_silent_limit = None
|
||||
_continuous_no_speech_count = 0
|
||||
if not track_no_speech:
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if rec is not None:
|
||||
try:
|
||||
# cancel() (not stop()) discards buffered frames — the loop
|
||||
# is over, we don't want to transcribe a half-captured turn.
|
||||
rec.cancel()
|
||||
except Exception as e:
|
||||
logger.warning("failed to cancel recorder: %s", e)
|
||||
if force_transcribe and on_transcript:
|
||||
if on_status:
|
||||
try:
|
||||
on_status("transcribing")
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
wav_path = rec.stop()
|
||||
except Exception as e:
|
||||
logger.warning("failed to stop recorder: %s", e)
|
||||
try:
|
||||
rec.cancel()
|
||||
except Exception as cancel_error:
|
||||
logger.warning("failed to cancel recorder: %s", cancel_error)
|
||||
wav_path = None
|
||||
|
||||
def _transcribe_and_cleanup():
|
||||
global _continuous_no_speech_count, _continuous_stopping
|
||||
transcript: Optional[str] = None
|
||||
should_halt = False
|
||||
|
||||
try:
|
||||
if wav_path:
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
if result.get("success"):
|
||||
text = (result.get("transcript") or "").strip()
|
||||
if text and not is_whisper_hallucination(text):
|
||||
transcript = text
|
||||
finally:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception as e:
|
||||
logger.warning("failed to stop/transcribe recorder: %s", e)
|
||||
finally:
|
||||
if transcript:
|
||||
try:
|
||||
on_transcript(transcript)
|
||||
except Exception as e:
|
||||
logger.warning("on_transcript callback raised: %s", e)
|
||||
|
||||
if track_no_speech:
|
||||
with _continuous_lock:
|
||||
if transcript:
|
||||
_continuous_no_speech_count = 0
|
||||
else:
|
||||
_continuous_no_speech_count += 1
|
||||
should_halt = (
|
||||
_continuous_no_speech_count
|
||||
>= _CONTINUOUS_NO_SPEECH_LIMIT
|
||||
)
|
||||
if should_halt:
|
||||
_continuous_no_speech_count = 0
|
||||
if should_halt and on_silent_limit:
|
||||
try:
|
||||
on_silent_limit()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_play_beep(frequency=660, count=2)
|
||||
with _continuous_lock:
|
||||
_continuous_stopping = False
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
threading.Thread(target=_transcribe_and_cleanup, daemon=True).start()
|
||||
return
|
||||
else:
|
||||
try:
|
||||
# cancel() (not stop()) discards buffered frames — the loop
|
||||
# is over, we don't want to transcribe a half-captured turn.
|
||||
rec.cancel()
|
||||
except Exception as e:
|
||||
logger.warning("failed to cancel recorder: %s", e)
|
||||
|
||||
with _continuous_lock:
|
||||
_continuous_stopping = False
|
||||
|
||||
# Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
|
||||
# silence-auto-stop path plays).
|
||||
|
|
@ -603,23 +699,39 @@ def _continuous_on_silence() -> None:
|
|||
_debug("_continuous_on_silence: stopped while waiting for TTS")
|
||||
return
|
||||
|
||||
# Restart for the next turn.
|
||||
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
|
||||
_play_beep(frequency=880, count=1)
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to restart continuous recording: %s", e)
|
||||
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
|
||||
if _continuous_auto_restart:
|
||||
# Restart for the next turn.
|
||||
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
|
||||
_play_beep(frequency=880, count=1)
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to restart continuous recording: %s", e)
|
||||
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
# Do not auto-restart. Clean up state and notify idle.
|
||||
_debug("_continuous_on_silence: auto_restart=False, stopping loop")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ── TTS API ──────────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -309,6 +309,7 @@ class TestContinuousAPI:
|
|||
|
||||
# Isolate from any state left behind by other tests in the session.
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_stopping", False, raising=False)
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", None)
|
||||
|
||||
assert voice.is_continuous_active() is False
|
||||
|
|
@ -343,11 +344,20 @@ class TestContinuousAPI:
|
|||
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder())
|
||||
|
||||
voice.start_continuous(on_transcript=lambda _t: None)
|
||||
started = voice.start_continuous(on_transcript=lambda _t: None)
|
||||
|
||||
# The guard inside start_continuous short-circuits before rec.start()
|
||||
assert started is True
|
||||
assert called["n"] == 0
|
||||
|
||||
def test_start_returns_false_while_stopping(self, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_stopping", True, raising=False)
|
||||
|
||||
assert voice.start_continuous(on_transcript=lambda _t: None) is False
|
||||
|
||||
|
||||
class TestContinuousLoopSimulation:
|
||||
"""End-to-end simulation of the VAD loop with a fake recorder.
|
||||
|
|
@ -368,6 +378,8 @@ class TestContinuousLoopSimulation:
|
|||
monkeypatch.setattr(voice, "_continuous_on_transcript", None)
|
||||
monkeypatch.setattr(voice, "_continuous_on_status", None)
|
||||
monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
|
||||
monkeypatch.setattr(voice, "_continuous_auto_restart", True, raising=False)
|
||||
monkeypatch.setattr(voice, "_play_beep", lambda *_, **__: None)
|
||||
|
||||
class FakeRecorder:
|
||||
_silence_threshold = 200
|
||||
|
|
@ -381,13 +393,20 @@ class TestContinuousLoopSimulation:
|
|||
self.cancelled = 0
|
||||
# Preset WAV path returned by stop()
|
||||
self.next_stop_wav = "/tmp/fake.wav"
|
||||
self.fail_stop = False
|
||||
self.fail_next_start = False
|
||||
|
||||
def start(self, on_silence_stop=None):
|
||||
if self.fail_next_start:
|
||||
self.fail_next_start = False
|
||||
raise RuntimeError("boom")
|
||||
self.start_calls += 1
|
||||
self.last_callback = on_silence_stop
|
||||
self.is_recording = True
|
||||
|
||||
def stop(self):
|
||||
if self.fail_stop:
|
||||
raise RuntimeError("stop failed")
|
||||
self.stopped += 1
|
||||
self.is_recording = False
|
||||
return self.next_stop_wav
|
||||
|
|
@ -433,6 +452,204 @@ class TestContinuousLoopSimulation:
|
|||
|
||||
voice.stop_continuous()
|
||||
|
||||
def test_auto_restart_false_stops_after_first_transcript(self, fake_recorder, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": "single shot"},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
statuses = []
|
||||
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda t: transcripts.append(t),
|
||||
on_status=lambda s: statuses.append(s),
|
||||
auto_restart=False,
|
||||
)
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert transcripts == ["single shot"]
|
||||
assert fake_recorder.start_calls == 1
|
||||
assert statuses == ["listening", "transcribing", "idle"]
|
||||
assert voice.is_continuous_active() is False
|
||||
|
||||
def test_auto_restart_false_retains_silent_strikes_across_starts(
|
||||
self, fake_recorder, monkeypatch
|
||||
):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": ""},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
silent_limit_fired = []
|
||||
|
||||
for _ in range(3):
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda _t: None,
|
||||
on_silent_limit=lambda: silent_limit_fired.append(True),
|
||||
auto_restart=False,
|
||||
)
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert silent_limit_fired == [True]
|
||||
assert voice.is_continuous_active() is False
|
||||
assert fake_recorder.start_calls == 3
|
||||
|
||||
def test_force_transcribe_stop_delivers_current_buffer(self, fake_recorder, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
class ImmediateThread:
|
||||
def __init__(self, target, daemon=False):
|
||||
self.target = target
|
||||
|
||||
def start(self):
|
||||
self.target()
|
||||
|
||||
monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": "manual stop"},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
statuses = []
|
||||
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda t: transcripts.append(t),
|
||||
on_status=lambda s: statuses.append(s),
|
||||
)
|
||||
voice.stop_continuous(force_transcribe=True)
|
||||
|
||||
assert fake_recorder.stopped == 1
|
||||
assert transcripts == ["manual stop"]
|
||||
assert statuses == ["listening", "transcribing", "idle"]
|
||||
assert voice.is_continuous_active() is False
|
||||
|
||||
def test_force_transcribe_empty_single_shots_hit_silent_limit(
|
||||
self, fake_recorder, monkeypatch
|
||||
):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
class ImmediateThread:
|
||||
def __init__(self, target, daemon=False):
|
||||
self.target = target
|
||||
|
||||
def start(self):
|
||||
self.target()
|
||||
|
||||
monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": ""},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
silent_limit_fired = []
|
||||
|
||||
for _ in range(3):
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda _t: None,
|
||||
on_silent_limit=lambda: silent_limit_fired.append(True),
|
||||
auto_restart=False,
|
||||
)
|
||||
voice.stop_continuous(force_transcribe=True)
|
||||
|
||||
assert silent_limit_fired == [True]
|
||||
assert fake_recorder.stopped == 3
|
||||
assert voice._continuous_no_speech_count == 0
|
||||
|
||||
def test_force_transcribe_valid_single_shot_resets_silent_strikes(
|
||||
self, fake_recorder, monkeypatch
|
||||
):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
class ImmediateThread:
|
||||
def __init__(self, target, daemon=False):
|
||||
self.target = target
|
||||
|
||||
def start(self):
|
||||
self.target()
|
||||
|
||||
monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
|
||||
monkeypatch.setattr(voice, "_continuous_no_speech_count", 2)
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": "manual stop"},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
silent_limit_fired = []
|
||||
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda t: transcripts.append(t),
|
||||
on_silent_limit=lambda: silent_limit_fired.append(True),
|
||||
auto_restart=False,
|
||||
)
|
||||
voice.stop_continuous(force_transcribe=True)
|
||||
|
||||
assert transcripts == ["manual stop"]
|
||||
assert silent_limit_fired == []
|
||||
assert voice._continuous_no_speech_count == 0
|
||||
|
||||
def test_force_transcribe_stop_failure_cancels_and_clears_stopping(
|
||||
self, fake_recorder, monkeypatch
|
||||
):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
class ImmediateThread:
|
||||
def __init__(self, target, daemon=False):
|
||||
self.target = target
|
||||
|
||||
def start(self):
|
||||
self.target()
|
||||
|
||||
monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
|
||||
fake_recorder.fail_stop = True
|
||||
|
||||
statuses = []
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda _t: None,
|
||||
on_status=lambda s: statuses.append(s),
|
||||
)
|
||||
voice.stop_continuous(force_transcribe=True)
|
||||
|
||||
assert fake_recorder.cancelled == 1
|
||||
assert statuses == ["listening", "transcribing", "idle"]
|
||||
assert voice.is_continuous_active() is False
|
||||
assert voice._continuous_stopping is False
|
||||
|
||||
def test_restart_failure_reports_idle(self, fake_recorder, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": "hello world"},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
statuses = []
|
||||
voice.start_continuous(on_transcript=lambda _t: None, on_status=statuses.append)
|
||||
|
||||
fake_recorder.fail_next_start = True
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert statuses == ["listening", "transcribing", "idle"]
|
||||
assert voice.is_continuous_active() is False
|
||||
|
||||
def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
|
|
|
|||
|
|
@ -204,6 +204,7 @@ def test_voice_record_start_handles_non_dict_voice_cfg(monkeypatch):
|
|||
assert resp["result"]["status"] == "recording"
|
||||
assert captured["silence_threshold"] == 200
|
||||
assert captured["silence_duration"] == 3.0
|
||||
assert captured["auto_restart"] is False
|
||||
|
||||
# Round-12 Copilot review regression on #19835: ``bool`` is a subclass
|
||||
# of ``int``, so the naive ``isinstance(threshold, (int, float))``
|
||||
|
|
@ -232,6 +233,80 @@ def test_voice_record_start_handles_non_dict_voice_cfg(monkeypatch):
|
|||
assert (
|
||||
captured["silence_duration"] == 3.0
|
||||
), f"bool silence_duration leaked through for {bad_bool_cfg!r}"
|
||||
assert captured["auto_restart"] is False
|
||||
|
||||
|
||||
def test_voice_record_stop_forces_transcription(monkeypatch):
|
||||
captured: dict = {}
|
||||
|
||||
def fake_stop_continuous(**kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
monkeypatch.setitem(
|
||||
sys.modules,
|
||||
"hermes_cli.voice",
|
||||
types.SimpleNamespace(
|
||||
start_continuous=lambda **_kwargs: None,
|
||||
stop_continuous=fake_stop_continuous,
|
||||
),
|
||||
)
|
||||
|
||||
resp = server.dispatch(
|
||||
{
|
||||
"id": "voice-record-stop",
|
||||
"method": "voice.record",
|
||||
"params": {"action": "stop"},
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["result"]["status"] == "stopped"
|
||||
assert captured["force_transcribe"] is True
|
||||
|
||||
|
||||
def test_voice_record_stop_updates_event_session_id(monkeypatch):
|
||||
monkeypatch.setitem(
|
||||
sys.modules,
|
||||
"hermes_cli.voice",
|
||||
types.SimpleNamespace(
|
||||
start_continuous=lambda **_kwargs: True,
|
||||
stop_continuous=lambda **_kwargs: None,
|
||||
),
|
||||
)
|
||||
monkeypatch.setattr(server, "_voice_event_sid", "old-session")
|
||||
|
||||
resp = server.dispatch(
|
||||
{
|
||||
"id": "voice-record-stop-session",
|
||||
"method": "voice.record",
|
||||
"params": {"action": "stop", "session_id": "new-session"},
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["result"]["status"] == "stopped"
|
||||
assert server._voice_event_sid == "new-session"
|
||||
|
||||
|
||||
def test_voice_record_start_reports_busy_when_stop_is_in_progress(monkeypatch):
|
||||
monkeypatch.setitem(
|
||||
sys.modules,
|
||||
"hermes_cli.voice",
|
||||
types.SimpleNamespace(
|
||||
start_continuous=lambda **_kwargs: False,
|
||||
stop_continuous=lambda **_kwargs: None,
|
||||
),
|
||||
)
|
||||
monkeypatch.setenv("HERMES_VOICE", "1")
|
||||
monkeypatch.setattr(server, "_load_cfg", lambda: {"voice": {}})
|
||||
|
||||
resp = server.dispatch(
|
||||
{
|
||||
"id": "voice-record-busy",
|
||||
"method": "voice.record",
|
||||
"params": {"action": "start"},
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["result"]["status"] == "busy"
|
||||
|
||||
|
||||
def test_voice_toggle_tts_branch_also_carries_record_key(monkeypatch):
|
||||
|
|
|
|||
|
|
@ -5619,14 +5619,13 @@ def _(rid, params: dict) -> dict:
|
|||
|
||||
@method("voice.record")
|
||||
def _(rid, params: dict) -> dict:
|
||||
"""VAD-driven continuous record loop, CLI-parity.
|
||||
"""VAD-bounded push-to-talk capture, CLI-parity.
|
||||
|
||||
``start`` turns on a VAD loop that emits ``voice.transcript`` events
|
||||
for each detected utterance and auto-restarts for the next turn.
|
||||
``stop`` halts the loop (manual stop; matches cli.py's Ctrl+B-while-
|
||||
recording branch clearing ``_voice_continuous``). Three consecutive
|
||||
silent cycles stop the loop automatically and emit a
|
||||
``voice.transcript`` with ``no_speech_limit=True``.
|
||||
``start`` begins one VAD-bounded capture and emits ``voice.transcript``
|
||||
after silence stops the recorder. ``stop`` forces transcription of the
|
||||
active buffer, matching classic CLI push-to-talk. The voice wrapper retains
|
||||
no-speech counts across single-shot starts, so three consecutive silent
|
||||
captures emit ``voice.transcript`` with ``no_speech_limit=True``.
|
||||
"""
|
||||
action = params.get("action", "start")
|
||||
|
||||
|
|
@ -5665,7 +5664,7 @@ def _(rid, params: dict) -> dict:
|
|||
if isinstance(duration, (int, float)) and not isinstance(duration, bool)
|
||||
else 3.0
|
||||
)
|
||||
start_continuous(
|
||||
started = start_continuous(
|
||||
on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}),
|
||||
on_status=lambda s: _voice_emit("voice.status", {"state": s}),
|
||||
on_silent_limit=lambda: _voice_emit(
|
||||
|
|
@ -5673,13 +5672,19 @@ def _(rid, params: dict) -> dict:
|
|||
),
|
||||
silence_threshold=safe_threshold,
|
||||
silence_duration=safe_duration,
|
||||
auto_restart=False,
|
||||
)
|
||||
if started is False:
|
||||
return _ok(rid, {"status": "busy"})
|
||||
return _ok(rid, {"status": "recording"})
|
||||
|
||||
# action == "stop"
|
||||
with _voice_sid_lock:
|
||||
_voice_event_sid = params.get("session_id") or _voice_event_sid
|
||||
|
||||
from hermes_cli.voice import stop_continuous
|
||||
|
||||
stop_continuous()
|
||||
stop_continuous(force_transcribe=True)
|
||||
return _ok(rid, {"status": "stopped"})
|
||||
except ImportError:
|
||||
return _err(
|
||||
|
|
|
|||
37
ui-tui/src/__tests__/useInputHandlers.test.ts
Normal file
37
ui-tui/src/__tests__/useInputHandlers.test.ts
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import { describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { applyVoiceRecordResponse } from '../app/useInputHandlers.js'
|
||||
|
||||
describe('applyVoiceRecordResponse', () => {
|
||||
it('reverts optimistic REC state when the gateway reports voice busy', () => {
|
||||
const setProcessing = vi.fn()
|
||||
const setRecording = vi.fn()
|
||||
const sys = vi.fn()
|
||||
|
||||
applyVoiceRecordResponse({ status: 'busy' }, true, { setProcessing, setRecording }, sys)
|
||||
|
||||
expect(setRecording).toHaveBeenCalledWith(false)
|
||||
expect(setProcessing).toHaveBeenCalledWith(true)
|
||||
expect(sys).toHaveBeenCalledWith('voice: still transcribing; try again shortly')
|
||||
})
|
||||
|
||||
it('keeps optimistic REC state for successful recording starts', () => {
|
||||
const setProcessing = vi.fn()
|
||||
const setRecording = vi.fn()
|
||||
|
||||
applyVoiceRecordResponse({ status: 'recording' }, true, { setProcessing, setRecording }, vi.fn())
|
||||
|
||||
expect(setRecording).not.toHaveBeenCalled()
|
||||
expect(setProcessing).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('reverts optimistic REC state when the gateway returns null', () => {
|
||||
const setProcessing = vi.fn()
|
||||
const setRecording = vi.fn()
|
||||
|
||||
applyVoiceRecordResponse(null, true, { setProcessing, setRecording }, vi.fn())
|
||||
|
||||
expect(setRecording).toHaveBeenCalledWith(false)
|
||||
expect(setProcessing).toHaveBeenCalledWith(false)
|
||||
})
|
||||
})
|
||||
|
|
@ -23,6 +23,26 @@ import { getUiState } from './uiStore.js'
|
|||
|
||||
const isCtrl = (key: { ctrl: boolean }, ch: string, target: string) => key.ctrl && ch.toLowerCase() === target
|
||||
|
||||
export function applyVoiceRecordResponse(
|
||||
response: null | VoiceRecordResponse,
|
||||
starting: boolean,
|
||||
voice: Pick<InputHandlerContext['voice'], 'setProcessing' | 'setRecording'>,
|
||||
sys: (text: string) => void
|
||||
) {
|
||||
if (!starting || response?.status === 'recording') {
|
||||
return
|
||||
}
|
||||
|
||||
voice.setRecording(false)
|
||||
|
||||
if (response?.status === 'busy') {
|
||||
voice.setProcessing(true)
|
||||
sys('voice: still transcribing; try again shortly')
|
||||
} else {
|
||||
voice.setProcessing(false)
|
||||
}
|
||||
}
|
||||
|
||||
export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
|
||||
const { actions, composer, gateway, terminal, voice, wheelStep } = ctx
|
||||
const { actions: cActions, refs: cRefs, state: cState } = composer
|
||||
|
|
@ -157,11 +177,12 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
|
|||
}
|
||||
}
|
||||
|
||||
// CLI parity: Ctrl+B toggles the VAD-driven continuous recording loop
|
||||
// CLI parity: Ctrl+B toggles a VAD-bounded push-to-talk capture
|
||||
// (NOT the voice-mode umbrella bit). The mode is enabled via /voice on;
|
||||
// Ctrl+B while the mode is off sys-nudges the user. While the mode is
|
||||
// on, the first press starts a continuous loop (gateway → start_continuous,
|
||||
// VAD auto-stop → transcribe → auto-restart), a subsequent press stops it.
|
||||
// on, the first press starts a single VAD-bounded capture
|
||||
// (gateway -> start_continuous(auto_restart=false), VAD auto-stop ->
|
||||
// transcribe -> idle), a subsequent press stops and transcribes it.
|
||||
// The gateway publishes voice.status + voice.transcript events that
|
||||
// createGatewayEventHandler turns into UI badges and composer injection.
|
||||
const voiceRecordToggle = () => {
|
||||
|
|
@ -182,14 +203,17 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
|
|||
voice.setProcessing(false)
|
||||
}
|
||||
|
||||
gateway.rpc<VoiceRecordResponse>('voice.record', { action }).catch((e: Error) => {
|
||||
// Revert optimistic UI on failure.
|
||||
if (starting) {
|
||||
voice.setRecording(false)
|
||||
}
|
||||
gateway
|
||||
.rpc<VoiceRecordResponse>('voice.record', { action, session_id: getUiState().sid })
|
||||
.then(r => applyVoiceRecordResponse(r, starting, voice, actions.sys))
|
||||
.catch((e: Error) => {
|
||||
// Revert optimistic UI on failure.
|
||||
if (starting) {
|
||||
voice.setRecording(false)
|
||||
}
|
||||
|
||||
actions.sys(`voice error: ${e.message}`)
|
||||
})
|
||||
actions.sys(`voice error: ${e.message}`)
|
||||
})
|
||||
}
|
||||
|
||||
useInput((ch, key) => {
|
||||
|
|
|
|||
|
|
@ -295,7 +295,7 @@ export interface VoiceToggleResponse {
|
|||
}
|
||||
|
||||
export interface VoiceRecordResponse {
|
||||
status?: string
|
||||
status?: 'busy' | 'recording' | 'stopped'
|
||||
text?: string
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue