fix(tui): restore classic CLI voice push-to-talk parity

This commit is contained in:
Montbra 2026-04-26 16:05:59 -03:00
parent 0e2a53eab2
commit 93b9ae301b
3 changed files with 82 additions and 33 deletions

View file

@ -184,20 +184,21 @@ def start_continuous(
on_silent_limit: Optional[Callable[[], None]] = None, on_silent_limit: Optional[Callable[[], None]] = None,
silence_threshold: int = 200, silence_threshold: int = 200,
silence_duration: float = 3.0, silence_duration: float = 3.0,
auto_restart: bool = True,
) -> None: ) -> None:
"""Start a VAD-driven continuous recording loop. """Start a VAD-driven continuous recording loop.
The loop calls ``on_transcript(text)`` each time speech is detected and The loop calls ``on_transcript(text)`` each time speech is detected and
transcribed successfully, then auto-restarts. After transcribed successfully. If ``auto_restart`` is True, it auto-restarts
``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech for the next turn. After ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive
picked up at all) the loop stops itself and calls ``on_silent_limit`` silent cycles (no speech picked up at all) the loop stops itself and calls
so the UI can reflect "voice off". Idempotent calling while already ``on_silent_limit`` so the UI can reflect "voice off". Idempotent calling
active is a no-op. while already active is a no-op.
``on_status`` is called with ``"listening"`` / ``"transcribing"`` / ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
``"idle"`` so the UI can show a live indicator. ``"idle"`` so the UI can show a live indicator.
""" """
global _continuous_active, _continuous_recorder global _continuous_active, _continuous_recorder, _continuous_auto_restart
global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
global _continuous_no_speech_count global _continuous_no_speech_count
@ -206,6 +207,7 @@ def start_continuous(
_debug("start_continuous: already active — no-op") _debug("start_continuous: already active — no-op")
return return
_continuous_active = True _continuous_active = True
_continuous_auto_restart = auto_restart
_continuous_on_transcript = on_transcript _continuous_on_transcript = on_transcript
_continuous_on_status = on_status _continuous_on_status = on_status
_continuous_on_silent_limit = on_silent_limit _continuous_on_silent_limit = on_silent_limit
@ -243,12 +245,12 @@ def start_continuous(
pass pass
def stop_continuous() -> None: def stop_continuous(force_transcribe: bool = False) -> None:
"""Stop the active continuous loop and release the microphone. """Stop the active continuous loop and release the microphone.
Idempotent calling while not active is a no-op. Any in-flight Idempotent calling while not active is a no-op. If force_transcribe
transcription completes but its result is discarded (the callback is True, the current buffer is transcribed before stopping. Otherwise
checks ``_continuous_active`` before firing). the buffer is discarded.
""" """
global _continuous_active, _continuous_on_transcript global _continuous_active, _continuous_on_transcript
global _continuous_on_status, _continuous_on_silent_limit global _continuous_on_status, _continuous_on_silent_limit
@ -260,18 +262,51 @@ def stop_continuous() -> None:
_continuous_active = False _continuous_active = False
rec = _continuous_recorder rec = _continuous_recorder
on_status = _continuous_on_status on_status = _continuous_on_status
on_transcript = _continuous_on_transcript
_continuous_on_transcript = None _continuous_on_transcript = None
_continuous_on_status = None _continuous_on_status = None
_continuous_on_silent_limit = None _continuous_on_silent_limit = None
_continuous_no_speech_count = 0 _continuous_no_speech_count = 0
if rec is not None: if rec is not None:
try: if force_transcribe and on_transcript:
# cancel() (not stop()) discards buffered frames — the loop def _transcribe_and_cleanup():
# is over, we don't want to transcribe a half-captured turn. if on_status:
rec.cancel() try:
except Exception as e: on_status("transcribing")
logger.warning("failed to cancel recorder: %s", e) except Exception:
pass
try:
wav_path = rec.stop()
if wav_path:
try:
result = transcribe_recording(wav_path)
if result.get("success"):
text = (result.get("transcript") or "").strip()
if text and not is_whisper_hallucination(text):
on_transcript(text)
finally:
if os.path.isfile(wav_path):
os.unlink(wav_path)
except Exception as e:
logger.warning("failed to stop/transcribe recorder: %s", e)
finally:
_play_beep(frequency=660, count=2)
if on_status:
try:
on_status("idle")
except Exception:
pass
threading.Thread(target=_transcribe_and_cleanup, daemon=True).start()
return
else:
try:
# cancel() (not stop()) discards buffered frames — the loop
# is over, we don't want to transcribe a half-captured turn.
rec.cancel()
except Exception as e:
logger.warning("failed to cancel recorder: %s", e)
# Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
# silence-auto-stop path plays). # silence-auto-stop path plays).
@ -417,23 +452,34 @@ def _continuous_on_silence() -> None:
_debug("_continuous_on_silence: stopped while waiting for TTS") _debug("_continuous_on_silence: stopped while waiting for TTS")
return return
# Restart for the next turn. if _continuous_auto_restart:
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})") # Restart for the next turn.
_play_beep(frequency=880, count=1) _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
try: _play_beep(frequency=880, count=1)
rec.start(on_silence_stop=_continuous_on_silence) try:
except Exception as e: rec.start(on_silence_stop=_continuous_on_silence)
logger.error("failed to restart continuous recording: %s", e) except Exception as e:
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}") logger.error("failed to restart continuous recording: %s", e)
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
with _continuous_lock:
_continuous_active = False
return
if on_status:
try:
on_status("listening")
except Exception:
pass
else:
# Do not auto-restart. Clean up state and notify idle.
_debug("_continuous_on_silence: auto_restart=False, stopping loop")
with _continuous_lock: with _continuous_lock:
_continuous_active = False _continuous_active = False
return if on_status:
try:
if on_status: on_status("idle")
try: except Exception:
on_status("listening") pass
except Exception:
pass
# ── TTS API ────────────────────────────────────────────────────────── # ── TTS API ──────────────────────────────────────────────────────────

View file

@ -4012,6 +4012,8 @@ def _(rid, params: dict) -> dict:
from hermes_cli.voice import start_continuous from hermes_cli.voice import start_continuous
voice_cfg = _load_cfg().get("voice", {}) voice_cfg = _load_cfg().get("voice", {})
if not isinstance(voice_cfg, dict):
voice_cfg = {}
start_continuous( start_continuous(
on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}), on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}),
on_status=lambda s: _voice_emit("voice.status", {"state": s}), on_status=lambda s: _voice_emit("voice.status", {"state": s}),
@ -4020,13 +4022,14 @@ def _(rid, params: dict) -> dict:
), ),
silence_threshold=voice_cfg.get("silence_threshold", 200), silence_threshold=voice_cfg.get("silence_threshold", 200),
silence_duration=voice_cfg.get("silence_duration", 3.0), silence_duration=voice_cfg.get("silence_duration", 3.0),
auto_restart=False,
) )
return _ok(rid, {"status": "recording"}) return _ok(rid, {"status": "recording"})
# action == "stop" # action == "stop"
from hermes_cli.voice import stop_continuous from hermes_cli.voice import stop_continuous
stop_continuous() stop_continuous(force_transcribe=True)
return _ok(rid, {"status": "stopped"}) return _ok(rid, {"status": "stopped"})
except ImportError: except ImportError:
return _err( return _err(

View file

@ -155,7 +155,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
voice.setProcessing(false) voice.setProcessing(false)
} }
gateway.rpc<VoiceRecordResponse>('voice.record', { action }).catch((e: Error) => { gateway.rpc<VoiceRecordResponse>('voice.record', { action, session_id: getUiState().sid }).catch((e: Error) => {
// Revert optimistic UI on failure. // Revert optimistic UI on failure.
if (starting) { if (starting) {
voice.setRecording(false) voice.setRecording(false)