fix(tui): restore voice push-to-talk parity (#20897)

* fix(tui): restore classic CLI voice push-to-talk parity

(cherry picked from commit 93b9ae301b)

* fix(tui): harden voice push-to-talk stop flow

Address review feedback from PR #16189 by stopping the active recorder before background transcription, documenting single-shot voice capture, and covering the TUI gateway flags with regression tests.

* fix(tui): preserve silent voice strike tracking

Keep single-shot voice recording's no-speech counter alive across starts so the TUI can still emit the three-strikes auto-disable event, and bind the auto-restart state at module scope for type checking.

* fix(tui): clean up voice stop failure path

Address follow-up review by naming the TUI flow as single-shot push-to-talk and cancelling the recorder when forced stop cannot produce a WAV.

* fix(tui): report busy voice capture starts

Return explicit start state from the voice wrapper so the TUI gateway does not report recording while forced-stop transcription is still cleaning up.

* fix(tui): handle busy voice record responses

Apply the gateway busy status immediately in the TUI and route forced-stop voice events to the session that sent the stop request.

* fix(tui): clear voice recording on null response

Treat a null voice.record RPC result as a failed optimistic start so the REC badge cannot stick after gateway-side errors.

* fix(tui): count silent manual voice stops

Preserve single-shot voice no-speech strikes through forced stop transcription so empty push-to-talk captures still trigger the three-strikes guard.

---------

Co-authored-by: Montbra <montbra@gmail.com>
This commit is contained in:
brooklyn! 2026-05-06 15:49:59 -07:00 committed by GitHub
parent 5ccab51fa8
commit 04cf4788cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 527 additions and 57 deletions

View file

@ -0,0 +1,37 @@
import { describe, expect, it, vi } from 'vitest'
import { applyVoiceRecordResponse } from '../app/useInputHandlers.js'
describe('applyVoiceRecordResponse', () => {
it('reverts optimistic REC state when the gateway reports voice busy', () => {
const setProcessing = vi.fn()
const setRecording = vi.fn()
const sys = vi.fn()
applyVoiceRecordResponse({ status: 'busy' }, true, { setProcessing, setRecording }, sys)
expect(setRecording).toHaveBeenCalledWith(false)
expect(setProcessing).toHaveBeenCalledWith(true)
expect(sys).toHaveBeenCalledWith('voice: still transcribing; try again shortly')
})
it('keeps optimistic REC state for successful recording starts', () => {
const setProcessing = vi.fn()
const setRecording = vi.fn()
applyVoiceRecordResponse({ status: 'recording' }, true, { setProcessing, setRecording }, vi.fn())
expect(setRecording).not.toHaveBeenCalled()
expect(setProcessing).not.toHaveBeenCalled()
})
it('reverts optimistic REC state when the gateway returns null', () => {
const setProcessing = vi.fn()
const setRecording = vi.fn()
applyVoiceRecordResponse(null, true, { setProcessing, setRecording }, vi.fn())
expect(setRecording).toHaveBeenCalledWith(false)
expect(setProcessing).toHaveBeenCalledWith(false)
})
})

View file

@ -23,6 +23,26 @@ import { getUiState } from './uiStore.js'
const isCtrl = (key: { ctrl: boolean }, ch: string, target: string) => key.ctrl && ch.toLowerCase() === target
export function applyVoiceRecordResponse(
response: null | VoiceRecordResponse,
starting: boolean,
voice: Pick<InputHandlerContext['voice'], 'setProcessing' | 'setRecording'>,
sys: (text: string) => void
) {
if (!starting || response?.status === 'recording') {
return
}
voice.setRecording(false)
if (response?.status === 'busy') {
voice.setProcessing(true)
sys('voice: still transcribing; try again shortly')
} else {
voice.setProcessing(false)
}
}
export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
const { actions, composer, gateway, terminal, voice, wheelStep } = ctx
const { actions: cActions, refs: cRefs, state: cState } = composer
@ -157,11 +177,12 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
}
}
// CLI parity: Ctrl+B toggles the VAD-driven continuous recording loop
// CLI parity: Ctrl+B toggles a VAD-bounded push-to-talk capture
// (NOT the voice-mode umbrella bit). The mode is enabled via /voice on;
// Ctrl+B while the mode is off sys-nudges the user. While the mode is
// on, the first press starts a continuous loop (gateway → start_continuous,
// VAD auto-stop → transcribe → auto-restart), a subsequent press stops it.
// on, the first press starts a single VAD-bounded capture
// (gateway -> start_continuous(auto_restart=false), VAD auto-stop ->
// transcribe -> idle), a subsequent press stops and transcribes it.
// The gateway publishes voice.status + voice.transcript events that
// createGatewayEventHandler turns into UI badges and composer injection.
const voiceRecordToggle = () => {
@ -182,14 +203,17 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
voice.setProcessing(false)
}
gateway.rpc<VoiceRecordResponse>('voice.record', { action }).catch((e: Error) => {
// Revert optimistic UI on failure.
if (starting) {
voice.setRecording(false)
}
gateway
.rpc<VoiceRecordResponse>('voice.record', { action, session_id: getUiState().sid })
.then(r => applyVoiceRecordResponse(r, starting, voice, actions.sys))
.catch((e: Error) => {
// Revert optimistic UI on failure.
if (starting) {
voice.setRecording(false)
}
actions.sys(`voice error: ${e.message}`)
})
actions.sys(`voice error: ${e.message}`)
})
}
useInput((ch, key) => {

View file

@ -295,7 +295,7 @@ export interface VoiceToggleResponse {
}
export interface VoiceRecordResponse {
status?: string
status?: 'busy' | 'recording' | 'stopped'
text?: string
}