mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
Merge pull request #52187 from NousResearch/bb/desktop-voice
fix(desktop): wire Ctrl+B voice, declutter voice settings, stop endless TTS hang
This commit is contained in:
commit
70650e82a3
9 changed files with 141 additions and 5 deletions
|
|
@ -34,6 +34,7 @@ interface InsertRefsDetail {
|
|||
const FOCUS_EVENT = 'hermes:composer-focus'
|
||||
const INSERT_EVENT = 'hermes:composer-insert'
|
||||
const INSERT_REFS_EVENT = 'hermes:composer-insert-refs'
|
||||
const VOICE_TOGGLE_EVENT = 'hermes:composer-voice-toggle'
|
||||
|
||||
let activeTarget: ComposerTarget = 'main'
|
||||
|
||||
|
|
@ -105,6 +106,13 @@ export const requestComposerInsertRefs = (
|
|||
export const onComposerInsertRefsRequest = (handler: (detail: InsertRefsDetail) => void) =>
|
||||
subscribe<InsertRefsDetail>(INSERT_REFS_EVENT, handler)
|
||||
|
||||
/** Toggle the active composer's voice conversation — the `composer.voice`
|
||||
* hotkey (Ctrl+B) reaching into the composer that owns the voice state. */
|
||||
export const requestVoiceToggle = () => dispatch<{ at: number }>(VOICE_TOGGLE_EVENT, { at: Date.now() })
|
||||
|
||||
export const onComposerVoiceToggleRequest = (handler: () => void) =>
|
||||
subscribe<{ at: number }>(VOICE_TOGGLE_EVENT, () => handler())
|
||||
|
||||
/**
|
||||
* Focus a composer input across React commit + browser focus restore.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -79,7 +79,8 @@ import {
|
|||
markActiveComposer,
|
||||
onComposerFocusRequest,
|
||||
onComposerInsertRefsRequest,
|
||||
onComposerInsertRequest
|
||||
onComposerInsertRequest,
|
||||
onComposerVoiceToggleRequest
|
||||
} from './focus'
|
||||
import { HelpHint } from './help-hint'
|
||||
import { useAtCompletions } from './hooks/use-at-completions'
|
||||
|
|
@ -1844,6 +1845,24 @@ export function ChatBar({
|
|||
pendingResponse
|
||||
})
|
||||
|
||||
// The `composer.voice` hotkey (Ctrl+B) toggles the conversation. Starting
|
||||
// with STT unconfigured lets the conversation surface its own "configure
|
||||
// speech-to-text" notice rather than silently no-opping.
|
||||
const toggleVoiceConversation = useCallback(() => {
|
||||
if (disabled) {
|
||||
return
|
||||
}
|
||||
|
||||
if (voiceConversationActive) {
|
||||
setVoiceConversationActive(false)
|
||||
void conversation.end()
|
||||
} else {
|
||||
setVoiceConversationActive(true)
|
||||
}
|
||||
}, [conversation, disabled, voiceConversationActive])
|
||||
|
||||
useEffect(() => onComposerVoiceToggleRequest(toggleVoiceConversation), [toggleVoiceConversation])
|
||||
|
||||
const contextMenu = (
|
||||
<ContextMenu
|
||||
onInsertText={insertText}
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ import {
|
|||
import { openNewSessionInNewWindow } from '@/store/windows'
|
||||
import { useTheme } from '@/themes/context'
|
||||
|
||||
import { requestComposerFocus } from '../chat/composer/focus'
|
||||
import { requestComposerFocus, requestVoiceToggle } from '../chat/composer/focus'
|
||||
import { SIDEBAR_COLLAPSE_MEDIA_QUERY } from '../layout-constants'
|
||||
import {
|
||||
AGENTS_ROUTE,
|
||||
|
|
@ -114,6 +114,7 @@ export function useKeybinds(deps: KeybindRuntimeDeps): void {
|
|||
|
||||
'composer.focus': () => requestComposerFocus('main'),
|
||||
'composer.modelPicker': () => setModelPickerOpen(true),
|
||||
'composer.voice': requestVoiceToggle,
|
||||
|
||||
'nav.commandPalette': toggleCommandPalette,
|
||||
'nav.commandCenter': deps.toggleCommandCenter,
|
||||
|
|
|
|||
|
|
@ -26,6 +26,26 @@ import { ModelSettings } from './model-settings'
|
|||
import { EmptyState, ListRow, LoadingState, SettingsContent } from './primitives'
|
||||
import { ProviderConfigPanel } from './provider-config-panel'
|
||||
|
||||
// On the Voice page, only surface the sub-fields of the *selected* TTS/STT
|
||||
// provider — otherwise every provider's options render at once (the "totally
|
||||
// crazy" wall of ~30 fields). Top-level keys (tts.provider, stt.enabled,
|
||||
// voice.*) always show; STT provider fields hide entirely when STT is off.
|
||||
export function voiceFieldVisible(key: string, config: HermesConfigRecord): boolean {
|
||||
const match = /^(tts|stt)\.([^.]+)\./.exec(key)
|
||||
|
||||
if (!match) {
|
||||
return true
|
||||
}
|
||||
|
||||
const [, domain, provider] = match
|
||||
|
||||
if (domain === 'stt' && !getNested(config, 'stt.enabled')) {
|
||||
return false
|
||||
}
|
||||
|
||||
return provider === String(getNested(config, `${domain}.provider`) ?? '')
|
||||
}
|
||||
|
||||
function ConfigField({
|
||||
schemaKey,
|
||||
schema,
|
||||
|
|
@ -356,6 +376,9 @@ export function ConfigSettings({
|
|||
return <LoadingState label={c.loading} />
|
||||
}
|
||||
|
||||
const visibleFields =
|
||||
activeSectionId === 'voice' ? fields.filter(([key]) => voiceFieldVisible(key, config)) : fields
|
||||
|
||||
return (
|
||||
<SettingsContent>
|
||||
{activeSectionId === 'model' && (
|
||||
|
|
@ -363,11 +386,11 @@ export function ConfigSettings({
|
|||
<ModelSettings onMainModelChanged={onMainModelChanged} />
|
||||
</div>
|
||||
)}
|
||||
{fields.length === 0 ? (
|
||||
{visibleFields.length === 0 ? (
|
||||
<EmptyState description={c.emptyDesc} title={c.emptyTitle} />
|
||||
) : (
|
||||
<div className="grid gap-1">
|
||||
{fields.map(([key, field]) => (
|
||||
{visibleFields.map(([key, field]) => (
|
||||
<div className="scroll-mt-6 rounded-lg" id={`setting-field-${key}`} key={key}>
|
||||
<ConfigField
|
||||
descriptionExtra={
|
||||
|
|
|
|||
48
apps/desktop/src/app/settings/voice-field-visible.test.ts
Normal file
48
apps/desktop/src/app/settings/voice-field-visible.test.ts
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import type { HermesConfigRecord } from '@/types/hermes'
|
||||
|
||||
import { voiceFieldVisible } from './config-settings'
|
||||
|
||||
const cfg = (over: Record<string, unknown> = {}): HermesConfigRecord =>
|
||||
({
|
||||
tts: { provider: 'edge', edge: {}, openai: {} },
|
||||
stt: { enabled: true, provider: 'local', local: {}, groq: {} },
|
||||
...over
|
||||
}) as unknown as HermesConfigRecord
|
||||
|
||||
describe('voiceFieldVisible', () => {
|
||||
it('always shows top-level + non-provider keys', () => {
|
||||
const config = cfg()
|
||||
|
||||
for (const key of ['tts.provider', 'stt.enabled', 'stt.provider', 'voice.auto_tts', 'voice.record_key']) {
|
||||
expect(voiceFieldVisible(key, config)).toBe(true)
|
||||
}
|
||||
})
|
||||
|
||||
it('shows only the selected TTS provider sub-fields', () => {
|
||||
const config = cfg()
|
||||
expect(voiceFieldVisible('tts.edge.voice', config)).toBe(true)
|
||||
expect(voiceFieldVisible('tts.openai.voice', config)).toBe(false)
|
||||
expect(voiceFieldVisible('tts.elevenlabs.voice_id', config)).toBe(false)
|
||||
})
|
||||
|
||||
it('shows only the selected STT provider sub-fields', () => {
|
||||
const config = cfg()
|
||||
expect(voiceFieldVisible('stt.local.model', config)).toBe(true)
|
||||
expect(voiceFieldVisible('stt.groq.model', config)).toBe(false)
|
||||
})
|
||||
|
||||
it('hides every STT provider sub-field when STT is disabled', () => {
|
||||
const config = cfg({ stt: { enabled: false, provider: 'local', local: {} } })
|
||||
expect(voiceFieldVisible('stt.local.model', config)).toBe(false)
|
||||
// ...but the enable/provider toggles themselves stay visible.
|
||||
expect(voiceFieldVisible('stt.enabled', config)).toBe(true)
|
||||
expect(voiceFieldVisible('stt.provider', config)).toBe(true)
|
||||
})
|
||||
|
||||
it('tracks a provider switch', () => {
|
||||
expect(voiceFieldVisible('tts.openai.voice', cfg({ tts: { provider: 'openai', openai: {} } }))).toBe(true)
|
||||
expect(voiceFieldVisible('tts.edge.voice', cfg({ tts: { provider: 'openai', openai: {} } }))).toBe(false)
|
||||
})
|
||||
})
|
||||
|
|
@ -211,6 +211,7 @@ export const en: Translations = {
|
|||
'session.togglePin': 'Pin / unpin current session',
|
||||
'composer.focus': 'Focus composer',
|
||||
'composer.modelPicker': 'Open model picker',
|
||||
'composer.voice': 'Start / stop voice conversation',
|
||||
'view.toggleSidebar': 'Toggle sessions sidebar',
|
||||
'view.toggleRightSidebar': 'Toggle file browser',
|
||||
'view.showFiles': 'Show file browser',
|
||||
|
|
|
|||
|
|
@ -206,6 +206,7 @@ export const zh: Translations = {
|
|||
'session.togglePin': '固定/取消固定当前会话',
|
||||
'composer.focus': '聚焦输入框',
|
||||
'composer.modelPicker': '打开模型选择器',
|
||||
'composer.voice': '开始 / 停止语音对话',
|
||||
'view.toggleSidebar': '切换会话侧边栏',
|
||||
'view.toggleRightSidebar': '切换文件浏览器',
|
||||
'view.showFiles': '显示文件浏览器',
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@
|
|||
// like navigate / theme); labels come from i18n (`t.keybinds.actions[id]`). To
|
||||
// add a hotkey, add a row here and a handler there — nothing else.
|
||||
|
||||
import { IS_MAC } from './combo'
|
||||
|
||||
export type KeybindCategory = 'composer' | 'profiles' | 'session' | 'navigation' | 'view'
|
||||
|
||||
// The self-referential opener — bound + dispatched like any action, but shown in
|
||||
|
|
@ -55,6 +57,12 @@ export const KEYBIND_ACTIONS: readonly KeybindActionMeta[] = [
|
|||
// ── Composer ─────────────────────────────────────────────────────────────
|
||||
{ id: 'composer.focus', category: 'composer', defaults: [] },
|
||||
{ id: 'composer.modelPicker', category: 'composer', defaults: [] },
|
||||
// Voice conversation toggle. Matches the documented `voice.record_key`
|
||||
// (Ctrl+B). On macOS that's literally ⌃B — distinct from the ⌘B sidebar
|
||||
// toggle. Off macOS `ctrl` folds to `mod`, which IS the ⌘B/Ctrl+B sidebar
|
||||
// chord, so ship it unbound there (rebindable in the panel) rather than
|
||||
// stealing the long-standing sidebar binding.
|
||||
{ id: 'composer.voice', category: 'composer', defaults: IS_MAC ? ['ctrl+b'] : [] },
|
||||
|
||||
// ── Profiles ─────────────────────────────────────────────────────────────
|
||||
{ id: 'profile.default', category: 'profiles', defaults: ['mod+d'] },
|
||||
|
|
|
|||
|
|
@ -8,6 +8,12 @@ import {
|
|||
|
||||
import { sanitizeTextForSpeech } from './speech-text'
|
||||
|
||||
// Free Edge TTS occasionally hands back audio that never fires `playing`/`ended`
|
||||
// nor `error` — leaving voice mode stuck "speaking" forever. Reject if playback
|
||||
// fails to start or stalls mid-stream for this long (rearmed on each progress
|
||||
// tick, so legitimately long speech is never cut off).
|
||||
const PLAYBACK_STALL_MS = 15_000
|
||||
|
||||
let currentAudio: HTMLAudioElement | null = null
|
||||
let currentStop: (() => void) | null = null
|
||||
let sequence = 0
|
||||
|
|
@ -78,12 +84,31 @@ export async function playSpeechText(text: string, options: VoicePlaybackOptions
|
|||
setVoicePlaybackState(currentState('speaking', options, audio))
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
let stall: number | null = null
|
||||
|
||||
const cleanup = () => {
|
||||
if (stall !== null) {
|
||||
window.clearTimeout(stall)
|
||||
stall = null
|
||||
}
|
||||
|
||||
audio.removeEventListener('ended', onEnded)
|
||||
audio.removeEventListener('error', onError)
|
||||
audio.removeEventListener('timeupdate', armStall)
|
||||
currentStop = null
|
||||
}
|
||||
|
||||
const armStall = () => {
|
||||
if (stall !== null) {
|
||||
window.clearTimeout(stall)
|
||||
}
|
||||
|
||||
stall = window.setTimeout(() => {
|
||||
cleanup()
|
||||
reject(new Error('Playback stalled'))
|
||||
}, PLAYBACK_STALL_MS)
|
||||
}
|
||||
|
||||
const onEnded = () => {
|
||||
cleanup()
|
||||
resolve()
|
||||
|
|
@ -101,7 +126,9 @@ export async function playSpeechText(text: string, options: VoicePlaybackOptions
|
|||
|
||||
audio.addEventListener('ended', onEnded, { once: true })
|
||||
audio.addEventListener('error', onError, { once: true })
|
||||
void audio.play().catch(reject)
|
||||
audio.addEventListener('timeupdate', armStall)
|
||||
armStall()
|
||||
void audio.play().catch(onError)
|
||||
})
|
||||
|
||||
if (!isCurrent()) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue