diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py index 0a355ce4fa..9520b8a598 100644 --- a/hermes_cli/voice.py +++ b/hermes_cli/voice.py @@ -184,20 +184,21 @@ def start_continuous( on_silent_limit: Optional[Callable[[], None]] = None, silence_threshold: int = 200, silence_duration: float = 3.0, + auto_restart: bool = True, ) -> None: """Start a VAD-driven continuous recording loop. The loop calls ``on_transcript(text)`` each time speech is detected and - transcribed successfully, then auto-restarts. After - ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech - picked up at all) the loop stops itself and calls ``on_silent_limit`` - so the UI can reflect "voice off". Idempotent — calling while already - active is a no-op. + transcribed successfully. If ``auto_restart`` is True, it auto-restarts + for the next turn. After ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive + silent cycles (no speech picked up at all) the loop stops itself and calls + ``on_silent_limit`` so the UI can reflect "voice off". Idempotent — calling + while already active is a no-op. ``on_status`` is called with ``"listening"`` / ``"transcribing"`` / ``"idle"`` so the UI can show a live indicator. """ - global _continuous_active, _continuous_recorder + global _continuous_active, _continuous_recorder, _continuous_auto_restart global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit global _continuous_no_speech_count @@ -206,6 +207,7 @@ def start_continuous( _debug("start_continuous: already active — no-op") return _continuous_active = True + _continuous_auto_restart = auto_restart _continuous_on_transcript = on_transcript _continuous_on_status = on_status _continuous_on_silent_limit = on_silent_limit @@ -243,12 +245,12 @@ def start_continuous( pass -def stop_continuous() -> None: +def stop_continuous(force_transcribe: bool = False) -> None: """Stop the active continuous loop and release the microphone. - Idempotent — calling while not active is a no-op. Any in-flight - transcription completes but its result is discarded (the callback - checks ``_continuous_active`` before firing). + Idempotent — calling while not active is a no-op. If force_transcribe + is True, the current buffer is transcribed before stopping. Otherwise + the buffer is discarded. """ global _continuous_active, _continuous_on_transcript global _continuous_on_status, _continuous_on_silent_limit @@ -260,18 +262,51 @@ def stop_continuous() -> None: _continuous_active = False rec = _continuous_recorder on_status = _continuous_on_status + on_transcript = _continuous_on_transcript _continuous_on_transcript = None _continuous_on_status = None _continuous_on_silent_limit = None _continuous_no_speech_count = 0 if rec is not None: - try: - # cancel() (not stop()) discards buffered frames — the loop - # is over, we don't want to transcribe a half-captured turn. - rec.cancel() - except Exception as e: - logger.warning("failed to cancel recorder: %s", e) + if force_transcribe and on_transcript: + def _transcribe_and_cleanup(): + if on_status: + try: + on_status("transcribing") + except Exception: + pass + try: + wav_path = rec.stop() + if wav_path: + try: + result = transcribe_recording(wav_path) + if result.get("success"): + text = (result.get("transcript") or "").strip() + if text and not is_whisper_hallucination(text): + on_transcript(text) + finally: + if os.path.isfile(wav_path): + os.unlink(wav_path) + except Exception as e: + logger.warning("failed to stop/transcribe recorder: %s", e) + finally: + _play_beep(frequency=660, count=2) + if on_status: + try: + on_status("idle") + except Exception: + pass + + threading.Thread(target=_transcribe_and_cleanup, daemon=True).start() + return + else: + try: + # cancel() (not stop()) discards buffered frames — the loop + # is over, we don't want to transcribe a half-captured turn. + rec.cancel() + except Exception as e: + logger.warning("failed to cancel recorder: %s", e) # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the # silence-auto-stop path plays). @@ -417,23 +452,34 @@ def _continuous_on_silence() -> None: _debug("_continuous_on_silence: stopped while waiting for TTS") return - # Restart for the next turn. - _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})") - _play_beep(frequency=880, count=1) - try: - rec.start(on_silence_stop=_continuous_on_silence) - except Exception as e: - logger.error("failed to restart continuous recording: %s", e) - _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}") + if _continuous_auto_restart: + # Restart for the next turn. + _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})") + _play_beep(frequency=880, count=1) + try: + rec.start(on_silence_stop=_continuous_on_silence) + except Exception as e: + logger.error("failed to restart continuous recording: %s", e) + _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}") + with _continuous_lock: + _continuous_active = False + return + + if on_status: + try: + on_status("listening") + except Exception: + pass + else: + # Do not auto-restart. Clean up state and notify idle. + _debug("_continuous_on_silence: auto_restart=False, stopping loop") with _continuous_lock: _continuous_active = False - return - - if on_status: - try: - on_status("listening") - except Exception: - pass + if on_status: + try: + on_status("idle") + except Exception: + pass # ── TTS API ────────────────────────────────────────────────────────── diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 30531aab28..5f6ea75685 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -4012,6 +4012,8 @@ def _(rid, params: dict) -> dict: from hermes_cli.voice import start_continuous voice_cfg = _load_cfg().get("voice", {}) + if not isinstance(voice_cfg, dict): + voice_cfg = {} start_continuous( on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}), on_status=lambda s: _voice_emit("voice.status", {"state": s}), @@ -4020,13 +4022,14 @@ def _(rid, params: dict) -> dict: ), silence_threshold=voice_cfg.get("silence_threshold", 200), silence_duration=voice_cfg.get("silence_duration", 3.0), + auto_restart=False, ) return _ok(rid, {"status": "recording"}) # action == "stop" from hermes_cli.voice import stop_continuous - stop_continuous() + stop_continuous(force_transcribe=True) return _ok(rid, {"status": "stopped"}) except ImportError: return _err( diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts index d2b8bf2717..01dbe58f42 100644 --- a/ui-tui/src/app/useInputHandlers.ts +++ b/ui-tui/src/app/useInputHandlers.ts @@ -155,7 +155,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult { voice.setProcessing(false) } - gateway.rpc('voice.record', { action }).catch((e: Error) => { + gateway.rpc('voice.record', { action, session_id: getUiState().sid }).catch((e: Error) => { // Revert optimistic UI on failure. if (starting) { voice.setRecording(false)