mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-13 03:52:00 +00:00
fix(tui): restore voice push-to-talk parity (#20897)
* fix(tui): restore classic CLI voice push-to-talk parity
(cherry picked from commit 93b9ae301b)
* fix(tui): harden voice push-to-talk stop flow
Address review feedback from PR #16189 by stopping the active recorder before background transcription, documenting single-shot voice capture, and covering the TUI gateway flags with regression tests.
* fix(tui): preserve silent voice strike tracking
Keep single-shot voice recording's no-speech counter alive across starts so the TUI can still emit the three-strikes auto-disable event, and bind the auto-restart state at module scope for type checking.
* fix(tui): clean up voice stop failure path
Address follow-up review by naming the TUI flow as single-shot push-to-talk and cancelling the recorder when forced stop cannot produce a WAV.
* fix(tui): report busy voice capture starts
Return explicit start state from the voice wrapper so the TUI gateway does not report recording while forced-stop transcription is still cleaning up.
* fix(tui): handle busy voice record responses
Apply the gateway busy status immediately in the TUI and route forced-stop voice events to the session that sent the stop request.
* fix(tui): clear voice recording on null response
Treat a null voice.record RPC result as a failed optimistic start so the REC badge cannot stick after gateway-side errors.
* fix(tui): count silent manual voice stops
Preserve single-shot voice no-speech strikes through forced stop transcription so empty push-to-talk captures still trigger the three-strikes guard.
---------
Co-authored-by: Montbra <montbra@gmail.com>
This commit is contained in:
parent
5ccab51fa8
commit
04cf4788cc
7 changed files with 527 additions and 57 deletions
37
ui-tui/src/__tests__/useInputHandlers.test.ts
Normal file
37
ui-tui/src/__tests__/useInputHandlers.test.ts
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
import { describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { applyVoiceRecordResponse } from '../app/useInputHandlers.js'
|
||||
|
||||
describe('applyVoiceRecordResponse', () => {
|
||||
it('reverts optimistic REC state when the gateway reports voice busy', () => {
|
||||
const setProcessing = vi.fn()
|
||||
const setRecording = vi.fn()
|
||||
const sys = vi.fn()
|
||||
|
||||
applyVoiceRecordResponse({ status: 'busy' }, true, { setProcessing, setRecording }, sys)
|
||||
|
||||
expect(setRecording).toHaveBeenCalledWith(false)
|
||||
expect(setProcessing).toHaveBeenCalledWith(true)
|
||||
expect(sys).toHaveBeenCalledWith('voice: still transcribing; try again shortly')
|
||||
})
|
||||
|
||||
it('keeps optimistic REC state for successful recording starts', () => {
|
||||
const setProcessing = vi.fn()
|
||||
const setRecording = vi.fn()
|
||||
|
||||
applyVoiceRecordResponse({ status: 'recording' }, true, { setProcessing, setRecording }, vi.fn())
|
||||
|
||||
expect(setRecording).not.toHaveBeenCalled()
|
||||
expect(setProcessing).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('reverts optimistic REC state when the gateway returns null', () => {
|
||||
const setProcessing = vi.fn()
|
||||
const setRecording = vi.fn()
|
||||
|
||||
applyVoiceRecordResponse(null, true, { setProcessing, setRecording }, vi.fn())
|
||||
|
||||
expect(setRecording).toHaveBeenCalledWith(false)
|
||||
expect(setProcessing).toHaveBeenCalledWith(false)
|
||||
})
|
||||
})
|
||||
|
|
@ -23,6 +23,26 @@ import { getUiState } from './uiStore.js'
|
|||
|
||||
const isCtrl = (key: { ctrl: boolean }, ch: string, target: string) => key.ctrl && ch.toLowerCase() === target
|
||||
|
||||
export function applyVoiceRecordResponse(
|
||||
response: null | VoiceRecordResponse,
|
||||
starting: boolean,
|
||||
voice: Pick<InputHandlerContext['voice'], 'setProcessing' | 'setRecording'>,
|
||||
sys: (text: string) => void
|
||||
) {
|
||||
if (!starting || response?.status === 'recording') {
|
||||
return
|
||||
}
|
||||
|
||||
voice.setRecording(false)
|
||||
|
||||
if (response?.status === 'busy') {
|
||||
voice.setProcessing(true)
|
||||
sys('voice: still transcribing; try again shortly')
|
||||
} else {
|
||||
voice.setProcessing(false)
|
||||
}
|
||||
}
|
||||
|
||||
export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
|
||||
const { actions, composer, gateway, terminal, voice, wheelStep } = ctx
|
||||
const { actions: cActions, refs: cRefs, state: cState } = composer
|
||||
|
|
@ -157,11 +177,12 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
|
|||
}
|
||||
}
|
||||
|
||||
// CLI parity: Ctrl+B toggles the VAD-driven continuous recording loop
|
||||
// CLI parity: Ctrl+B toggles a VAD-bounded push-to-talk capture
|
||||
// (NOT the voice-mode umbrella bit). The mode is enabled via /voice on;
|
||||
// Ctrl+B while the mode is off sys-nudges the user. While the mode is
|
||||
// on, the first press starts a continuous loop (gateway → start_continuous,
|
||||
// VAD auto-stop → transcribe → auto-restart), a subsequent press stops it.
|
||||
// on, the first press starts a single VAD-bounded capture
|
||||
// (gateway -> start_continuous(auto_restart=false), VAD auto-stop ->
|
||||
// transcribe -> idle), a subsequent press stops and transcribes it.
|
||||
// The gateway publishes voice.status + voice.transcript events that
|
||||
// createGatewayEventHandler turns into UI badges and composer injection.
|
||||
const voiceRecordToggle = () => {
|
||||
|
|
@ -182,14 +203,17 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
|
|||
voice.setProcessing(false)
|
||||
}
|
||||
|
||||
gateway.rpc<VoiceRecordResponse>('voice.record', { action }).catch((e: Error) => {
|
||||
// Revert optimistic UI on failure.
|
||||
if (starting) {
|
||||
voice.setRecording(false)
|
||||
}
|
||||
gateway
|
||||
.rpc<VoiceRecordResponse>('voice.record', { action, session_id: getUiState().sid })
|
||||
.then(r => applyVoiceRecordResponse(r, starting, voice, actions.sys))
|
||||
.catch((e: Error) => {
|
||||
// Revert optimistic UI on failure.
|
||||
if (starting) {
|
||||
voice.setRecording(false)
|
||||
}
|
||||
|
||||
actions.sys(`voice error: ${e.message}`)
|
||||
})
|
||||
actions.sys(`voice error: ${e.message}`)
|
||||
})
|
||||
}
|
||||
|
||||
useInput((ch, key) => {
|
||||
|
|
|
|||
|
|
@ -295,7 +295,7 @@ export interface VoiceToggleResponse {
|
|||
}
|
||||
|
||||
export interface VoiceRecordResponse {
|
||||
status?: string
|
||||
status?: 'busy' | 'recording' | 'stopped'
|
||||
text?: string
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue