mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-13 03:52:00 +00:00
fix(tui): restore classic CLI voice push-to-talk parity
This commit is contained in:
parent
0e2a53eab2
commit
93b9ae301b
3 changed files with 82 additions and 33 deletions
|
|
@ -184,20 +184,21 @@ def start_continuous(
|
||||||
on_silent_limit: Optional[Callable[[], None]] = None,
|
on_silent_limit: Optional[Callable[[], None]] = None,
|
||||||
silence_threshold: int = 200,
|
silence_threshold: int = 200,
|
||||||
silence_duration: float = 3.0,
|
silence_duration: float = 3.0,
|
||||||
|
auto_restart: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Start a VAD-driven continuous recording loop.
|
"""Start a VAD-driven continuous recording loop.
|
||||||
|
|
||||||
The loop calls ``on_transcript(text)`` each time speech is detected and
|
The loop calls ``on_transcript(text)`` each time speech is detected and
|
||||||
transcribed successfully, then auto-restarts. After
|
transcribed successfully. If ``auto_restart`` is True, it auto-restarts
|
||||||
``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
|
for the next turn. After ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive
|
||||||
picked up at all) the loop stops itself and calls ``on_silent_limit``
|
silent cycles (no speech picked up at all) the loop stops itself and calls
|
||||||
so the UI can reflect "voice off". Idempotent — calling while already
|
``on_silent_limit`` so the UI can reflect "voice off". Idempotent — calling
|
||||||
active is a no-op.
|
while already active is a no-op.
|
||||||
|
|
||||||
``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
|
``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
|
||||||
``"idle"`` so the UI can show a live indicator.
|
``"idle"`` so the UI can show a live indicator.
|
||||||
"""
|
"""
|
||||||
global _continuous_active, _continuous_recorder
|
global _continuous_active, _continuous_recorder, _continuous_auto_restart
|
||||||
global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
|
global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
|
||||||
global _continuous_no_speech_count
|
global _continuous_no_speech_count
|
||||||
|
|
||||||
|
|
@ -206,6 +207,7 @@ def start_continuous(
|
||||||
_debug("start_continuous: already active — no-op")
|
_debug("start_continuous: already active — no-op")
|
||||||
return
|
return
|
||||||
_continuous_active = True
|
_continuous_active = True
|
||||||
|
_continuous_auto_restart = auto_restart
|
||||||
_continuous_on_transcript = on_transcript
|
_continuous_on_transcript = on_transcript
|
||||||
_continuous_on_status = on_status
|
_continuous_on_status = on_status
|
||||||
_continuous_on_silent_limit = on_silent_limit
|
_continuous_on_silent_limit = on_silent_limit
|
||||||
|
|
@ -243,12 +245,12 @@ def start_continuous(
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def stop_continuous() -> None:
|
def stop_continuous(force_transcribe: bool = False) -> None:
|
||||||
"""Stop the active continuous loop and release the microphone.
|
"""Stop the active continuous loop and release the microphone.
|
||||||
|
|
||||||
Idempotent — calling while not active is a no-op. Any in-flight
|
Idempotent — calling while not active is a no-op. If force_transcribe
|
||||||
transcription completes but its result is discarded (the callback
|
is True, the current buffer is transcribed before stopping. Otherwise
|
||||||
checks ``_continuous_active`` before firing).
|
the buffer is discarded.
|
||||||
"""
|
"""
|
||||||
global _continuous_active, _continuous_on_transcript
|
global _continuous_active, _continuous_on_transcript
|
||||||
global _continuous_on_status, _continuous_on_silent_limit
|
global _continuous_on_status, _continuous_on_silent_limit
|
||||||
|
|
@ -260,18 +262,51 @@ def stop_continuous() -> None:
|
||||||
_continuous_active = False
|
_continuous_active = False
|
||||||
rec = _continuous_recorder
|
rec = _continuous_recorder
|
||||||
on_status = _continuous_on_status
|
on_status = _continuous_on_status
|
||||||
|
on_transcript = _continuous_on_transcript
|
||||||
_continuous_on_transcript = None
|
_continuous_on_transcript = None
|
||||||
_continuous_on_status = None
|
_continuous_on_status = None
|
||||||
_continuous_on_silent_limit = None
|
_continuous_on_silent_limit = None
|
||||||
_continuous_no_speech_count = 0
|
_continuous_no_speech_count = 0
|
||||||
|
|
||||||
if rec is not None:
|
if rec is not None:
|
||||||
try:
|
if force_transcribe and on_transcript:
|
||||||
# cancel() (not stop()) discards buffered frames — the loop
|
def _transcribe_and_cleanup():
|
||||||
# is over, we don't want to transcribe a half-captured turn.
|
if on_status:
|
||||||
rec.cancel()
|
try:
|
||||||
except Exception as e:
|
on_status("transcribing")
|
||||||
logger.warning("failed to cancel recorder: %s", e)
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
wav_path = rec.stop()
|
||||||
|
if wav_path:
|
||||||
|
try:
|
||||||
|
result = transcribe_recording(wav_path)
|
||||||
|
if result.get("success"):
|
||||||
|
text = (result.get("transcript") or "").strip()
|
||||||
|
if text and not is_whisper_hallucination(text):
|
||||||
|
on_transcript(text)
|
||||||
|
finally:
|
||||||
|
if os.path.isfile(wav_path):
|
||||||
|
os.unlink(wav_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("failed to stop/transcribe recorder: %s", e)
|
||||||
|
finally:
|
||||||
|
_play_beep(frequency=660, count=2)
|
||||||
|
if on_status:
|
||||||
|
try:
|
||||||
|
on_status("idle")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
threading.Thread(target=_transcribe_and_cleanup, daemon=True).start()
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
# cancel() (not stop()) discards buffered frames — the loop
|
||||||
|
# is over, we don't want to transcribe a half-captured turn.
|
||||||
|
rec.cancel()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("failed to cancel recorder: %s", e)
|
||||||
|
|
||||||
# Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
|
# Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
|
||||||
# silence-auto-stop path plays).
|
# silence-auto-stop path plays).
|
||||||
|
|
@ -417,23 +452,34 @@ def _continuous_on_silence() -> None:
|
||||||
_debug("_continuous_on_silence: stopped while waiting for TTS")
|
_debug("_continuous_on_silence: stopped while waiting for TTS")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Restart for the next turn.
|
if _continuous_auto_restart:
|
||||||
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
|
# Restart for the next turn.
|
||||||
_play_beep(frequency=880, count=1)
|
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
|
||||||
try:
|
_play_beep(frequency=880, count=1)
|
||||||
rec.start(on_silence_stop=_continuous_on_silence)
|
try:
|
||||||
except Exception as e:
|
rec.start(on_silence_stop=_continuous_on_silence)
|
||||||
logger.error("failed to restart continuous recording: %s", e)
|
except Exception as e:
|
||||||
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
|
logger.error("failed to restart continuous recording: %s", e)
|
||||||
|
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
|
||||||
|
with _continuous_lock:
|
||||||
|
_continuous_active = False
|
||||||
|
return
|
||||||
|
|
||||||
|
if on_status:
|
||||||
|
try:
|
||||||
|
on_status("listening")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Do not auto-restart. Clean up state and notify idle.
|
||||||
|
_debug("_continuous_on_silence: auto_restart=False, stopping loop")
|
||||||
with _continuous_lock:
|
with _continuous_lock:
|
||||||
_continuous_active = False
|
_continuous_active = False
|
||||||
return
|
if on_status:
|
||||||
|
try:
|
||||||
if on_status:
|
on_status("idle")
|
||||||
try:
|
except Exception:
|
||||||
on_status("listening")
|
pass
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# ── TTS API ──────────────────────────────────────────────────────────
|
# ── TTS API ──────────────────────────────────────────────────────────
|
||||||
|
|
|
||||||
|
|
@ -4012,6 +4012,8 @@ def _(rid, params: dict) -> dict:
|
||||||
from hermes_cli.voice import start_continuous
|
from hermes_cli.voice import start_continuous
|
||||||
|
|
||||||
voice_cfg = _load_cfg().get("voice", {})
|
voice_cfg = _load_cfg().get("voice", {})
|
||||||
|
if not isinstance(voice_cfg, dict):
|
||||||
|
voice_cfg = {}
|
||||||
start_continuous(
|
start_continuous(
|
||||||
on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}),
|
on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}),
|
||||||
on_status=lambda s: _voice_emit("voice.status", {"state": s}),
|
on_status=lambda s: _voice_emit("voice.status", {"state": s}),
|
||||||
|
|
@ -4020,13 +4022,14 @@ def _(rid, params: dict) -> dict:
|
||||||
),
|
),
|
||||||
silence_threshold=voice_cfg.get("silence_threshold", 200),
|
silence_threshold=voice_cfg.get("silence_threshold", 200),
|
||||||
silence_duration=voice_cfg.get("silence_duration", 3.0),
|
silence_duration=voice_cfg.get("silence_duration", 3.0),
|
||||||
|
auto_restart=False,
|
||||||
)
|
)
|
||||||
return _ok(rid, {"status": "recording"})
|
return _ok(rid, {"status": "recording"})
|
||||||
|
|
||||||
# action == "stop"
|
# action == "stop"
|
||||||
from hermes_cli.voice import stop_continuous
|
from hermes_cli.voice import stop_continuous
|
||||||
|
|
||||||
stop_continuous()
|
stop_continuous(force_transcribe=True)
|
||||||
return _ok(rid, {"status": "stopped"})
|
return _ok(rid, {"status": "stopped"})
|
||||||
except ImportError:
|
except ImportError:
|
||||||
return _err(
|
return _err(
|
||||||
|
|
|
||||||
|
|
@ -155,7 +155,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
|
||||||
voice.setProcessing(false)
|
voice.setProcessing(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
gateway.rpc<VoiceRecordResponse>('voice.record', { action }).catch((e: Error) => {
|
gateway.rpc<VoiceRecordResponse>('voice.record', { action, session_id: getUiState().sid }).catch((e: Error) => {
|
||||||
// Revert optimistic UI on failure.
|
// Revert optimistic UI on failure.
|
||||||
if (starting) {
|
if (starting) {
|
||||||
voice.setRecording(false)
|
voice.setRecording(false)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue