feat: lots of speech stuff

This commit is contained in:
Brooklyn Nicholson 2026-05-01 19:28:02 -05:00
parent 9f3d393a4d
commit d5d7b5c6dc
41 changed files with 1405 additions and 361 deletions

View file

@ -384,9 +384,9 @@ IMAGE_TOOLS_DEBUG=false
# Default STT provider is "local" (faster-whisper) — runs on your machine, no API key needed.
# Install with: pip install faster-whisper
# Model downloads automatically on first use (~150 MB for "base").
# To use cloud providers instead, set GROQ_API_KEY or VOICE_TOOLS_OPENAI_KEY above.
# Provider priority: local > groq > openai
# Configure in config.yaml: stt.provider: local | groq | openai
# To use cloud providers instead, set GROQ_API_KEY, VOICE_TOOLS_OPENAI_KEY, or ELEVENLABS_API_KEY above.
# Provider priority: local > groq > openai > mistral > xai > elevenlabs
# Configure in config.yaml: stt.provider: local | groq | openai | mistral | xai | elevenlabs
# =============================================================================
# STT ADVANCED OVERRIDES (optional)
@ -394,10 +394,12 @@ IMAGE_TOOLS_DEBUG=false
# Override default STT models per provider (normally set via stt.model in config.yaml)
# STT_GROQ_MODEL=whisper-large-v3-turbo
# STT_OPENAI_MODEL=whisper-1
# STT_ELEVENLABS_MODEL=scribe_v2
# Override STT provider endpoints (for proxies or self-hosted instances)
# GROQ_BASE_URL=https://api.groq.com/openai/v1
# STT_OPENAI_BASE_URL=https://api.openai.com/v1
# ELEVENLABS_STT_BASE_URL=https://api.elevenlabs.io/v1
# =============================================================================
# MICROSOFT TEAMS INTEGRATION

View file

@ -10,6 +10,7 @@
"dependencies": {
"@assistant-ui/react": "^0.12.28",
"@assistant-ui/react-streamdown": "^0.1.11",
"@audiowave/react": "^0.6.2",
"@chenglou/pretext": "^0.0.6",
"@nanostores/react": "^1.1.0",
"@radix-ui/react-slot": "^1.2.4",
@ -305,6 +306,25 @@
}
}
},
"node_modules/@audiowave/core": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/@audiowave/core/-/core-0.3.1.tgz",
"integrity": "sha512-KtC2MTWKp6Orkedty3I8IklVBVQ2IFaFWDJ1cz+UsACpX2x1gINwZGTRZT7bw/dx8KazNSMuVK5lm1jL67KQkQ==",
"license": "MIT"
},
"node_modules/@audiowave/react": {
"version": "0.6.2",
"resolved": "https://registry.npmjs.org/@audiowave/react/-/react-0.6.2.tgz",
"integrity": "sha512-hajG2Iv3mVxived9wXad8L0ZQF+HmYnB3IrfOkIdkTv4RxOJDXwFWMAd0zb7ZU1Qz0IEYZXCbASFWyuxEQ7PAw==",
"license": "MIT",
"dependencies": {
"@audiowave/core": "0.3.1"
},
"peerDependencies": {
"react": ">=16.8.0",
"react-dom": ">=16.8.0"
}
},
"node_modules/@babel/code-frame": {
"version": "7.29.0",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz",

View file

@ -23,6 +23,7 @@
"dependencies": {
"@assistant-ui/react": "^0.12.28",
"@assistant-ui/react-streamdown": "^0.1.11",
"@audiowave/react": "^0.6.2",
"@chenglou/pretext": "^0.0.6",
"@nanostores/react": "^1.1.0",
"@radix-ui/react-slot": "^1.2.4",

View file

@ -1,4 +1,3 @@
import type { Unstable_TriggerItem } from '@assistant-ui/core'
import type { Unstable_IconComponent } from '@assistant-ui/react'
import { FileText, FolderOpen, ImageIcon, Link, type LucideIcon } from 'lucide-react'
import type { CSSProperties } from 'react'
@ -37,7 +36,7 @@ export const DIRECTIVE_ICONS: Record<string, Unstable_IconComponent> = {
}
export const DIRECTIVE_POPOVER_CLASS =
'absolute bottom-24 left-1/2 z-50 w-[min(calc(100vw-1.5rem),28rem)] max-h-[min(28rem,calc(100vh-8rem))] -translate-x-1/2 overflow-y-auto overscroll-contain rounded-2xl border border-border/70 bg-popover p-1.5 text-popover-foreground shadow-2xl'
'absolute bottom-24 left-1/2 z-50 w-[min(calc(100vw-1.5rem),26rem)] max-h-[min(24rem,calc(100vh-8rem))] -translate-x-1/2 overflow-y-auto overscroll-contain rounded-2xl border border-border/60 bg-popover/95 p-1.5 text-popover-foreground shadow-2xl backdrop-blur-md ring-1 ring-black/5'
export const PROMPT_SNIPPETS = [
{
@ -64,37 +63,6 @@ export const ASK_PLACEHOLDERS = [
'Duck mode: gentle debugging, together.'
]
export const REF_ITEMS: Unstable_TriggerItem[] = [
{
id: 'file:',
type: 'file',
label: 'File',
description: 'Attach a file path',
metadata: { icon: 'file' }
},
{
id: 'folder:',
type: 'folder',
label: 'Folder',
description: 'Attach a folder path',
metadata: { icon: 'folder' }
},
{
id: 'url:',
type: 'url',
label: 'URL',
description: 'Attach a web page',
metadata: { icon: 'url' }
},
{
id: 'image:',
type: 'image',
label: 'Image',
description: 'Attach an image path',
metadata: { icon: 'image' }
}
]
export const EDGE_NEWLINES_RE = /^[\t ]*(?:\r\n|\r|\n)+|(?:\r\n|\r|\n)+[\t ]*$/g
export const DEFAULT_MAX_RECORDING_SECONDS = 120

View file

@ -15,11 +15,10 @@ import {
import { cn } from '@/lib/utils'
import { GHOST_ICON_BTN, PROMPT_SNIPPETS } from './constants'
import type { ChatBarState, ContextSuggestion } from './types'
import type { ChatBarState } from './types'
export function ContextMenu({
state,
onAddContextRef,
onInsertText,
onOpenUrlDialog,
onPasteClipboardImage,
@ -28,7 +27,6 @@ export function ContextMenu({
onPickImages
}: {
state: ChatBarState
onAddContextRef?: (refText: string, label?: string, detail?: string) => void
onInsertText: (text: string) => void
onOpenUrlDialog: () => void
onPasteClipboardImage?: () => void
@ -36,11 +34,6 @@ export function ContextMenu({
onPickFolders?: () => void
onPickImages?: () => void
}) {
const choose = (item: ContextSuggestion) =>
onAddContextRef ? onAddContextRef(item.text, item.display, item.meta) : onInsertText(item.text)
const suggestions = state.tools.suggestions?.slice(0, 8) ?? []
return (
<DropdownMenu>
<DropdownMenuTrigger asChild>
@ -56,48 +49,28 @@ export function ContextMenu({
<Plus size={18} />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align="start" className="w-64" side="top" sideOffset={10}>
<DropdownMenuLabel className="text-xs text-muted-foreground">Add context</DropdownMenuLabel>
<DropdownMenuContent align="start" className="w-60" side="top" sideOffset={10}>
<DropdownMenuLabel className="text-[0.7rem] font-medium uppercase tracking-wide text-muted-foreground/85">
Attach
</DropdownMenuLabel>
<ContextMenuItem disabled={!onPickFiles} icon={FileText} onSelect={onPickFiles}>
Files
Files
</ContextMenuItem>
<ContextMenuItem disabled={!onPickFolders} icon={FolderOpen} onSelect={onPickFolders}>
Folders
Folder
</ContextMenuItem>
<ContextMenuItem disabled={!onPickImages} icon={ImageIcon} onSelect={onPickImages}>
Images
Images
</ContextMenuItem>
<ContextMenuItem disabled={!onPasteClipboardImage} icon={Clipboard} onSelect={onPasteClipboardImage}>
Image from clipboard
Paste image
</ContextMenuItem>
<ContextMenuItem icon={Link} onSelect={onOpenUrlDialog}>
URL
URL
</ContextMenuItem>
<DropdownMenuSeparator />
<DropdownMenuSub>
<DropdownMenuSubTrigger>
<FileText />
<span>Suggested files</span>
</DropdownMenuSubTrigger>
<DropdownMenuSubContent className="w-72">
{suggestions.length === 0 ? (
<DropdownMenuItem disabled>
<span className="text-muted-foreground">No suggestions</span>
</DropdownMenuItem>
) : (
suggestions.map(item => (
<DropdownMenuItem key={item.text} onSelect={() => choose(item)}>
<FileText />
<span className="min-w-0 flex-1 truncate">{item.display}</span>
{item.meta && <span className="max-w-28 truncate text-xs text-muted-foreground">{item.meta}</span>}
</DropdownMenuItem>
))
)}
</DropdownMenuSubContent>
</DropdownMenuSub>
<DropdownMenuSub>
<DropdownMenuSubTrigger>
<MessageSquareText />
@ -111,6 +84,13 @@ export function ContextMenu({
))}
</DropdownMenuSubContent>
</DropdownMenuSub>
<DropdownMenuSeparator />
<div className="px-2 py-1 text-[0.7rem] text-muted-foreground/80">
Tip: type <kbd className="rounded bg-muted/70 px-1 py-px font-mono text-[0.65rem]">@</kbd> to reference files
inline.
</div>
</DropdownMenuContent>
</DropdownMenu>
)

View file

@ -15,6 +15,7 @@ interface ConversationProps {
status: ConversationStatus
onEnd: () => void
onStart: () => void
onStopTurn: () => void
onToggleMute: () => void
}
@ -80,6 +81,7 @@ function ConversationPill({
level,
muted,
onEnd,
onStopTurn,
onToggleMute,
status
}: ConversationProps & { disabled: boolean }) {
@ -104,10 +106,10 @@ function ConversationPill({
aria-pressed={muted}
className={cn(GHOST_ICON_BTN, 'p-0', muted && 'bg-muted text-muted-foreground')}
disabled={disabled}
onClick={() => {
triggerHaptic('selection')
onToggleMute()
}}
onClick={() => {
triggerHaptic('selection')
onToggleMute()
}}
size="icon"
title={muted ? 'Unmute microphone' : 'Mute microphone'}
type="button"
@ -115,6 +117,23 @@ function ConversationPill({
>
{muted ? <MicOff size={16} /> : <Mic size={16} />}
</Button>
{listening && (
<Button
aria-label="Stop listening and send"
className="h-8 shrink-0 gap-1.5 rounded-full px-2.5 text-xs text-muted-foreground hover:bg-accent hover:text-foreground"
disabled={disabled}
onClick={() => {
triggerHaptic('submit')
onStopTurn()
}}
title="Stop listening and send"
type="button"
variant="ghost"
>
<Square className="fill-current" size={11} />
<span>Stop</span>
</Button>
)}
<Button
aria-label="End voice conversation"
className="h-8 gap-1.5 rounded-full bg-primary px-3 text-xs font-medium text-primary-foreground hover:bg-primary/90"

View file

@ -5,9 +5,9 @@ import {
type Unstable_MentionCategory,
type Unstable_MentionDirective
} from '@assistant-ui/react'
import { ChevronDown } from 'lucide-react'
import { FileText } from 'lucide-react'
import { DIRECTIVE_POPOVER_CLASS, REF_ITEMS } from './constants'
import { DIRECTIVE_POPOVER_CLASS } from './constants'
import type { ContextSuggestion } from './types'
export function DirectivePopover({
@ -24,80 +24,73 @@ export function DirectivePopover({
return (
<ComposerPrimitive.Unstable_TriggerPopover adapter={adapter} char="@" className={DIRECTIVE_POPOVER_CLASS}>
<ComposerPrimitive.Unstable_TriggerPopover.Directive {...directive} />
<ComposerPrimitive.Unstable_TriggerPopoverCategories>
{categories => (
<div className="grid gap-1">
{categories.map(c => (
<ComposerPrimitive.Unstable_TriggerPopoverCategoryItem
categoryId={c.id}
className="flex w-full items-center justify-between rounded-xl px-3 py-2 text-left text-sm hover:bg-accent data-highlighted:bg-accent"
key={c.id}
>
<span>{c.label}</span>
<ChevronDown className="-rotate-90 size-3.5 text-muted-foreground" />
</ComposerPrimitive.Unstable_TriggerPopoverCategoryItem>
))}
</div>
)}
</ComposerPrimitive.Unstable_TriggerPopoverCategories>
<ComposerPrimitive.Unstable_TriggerPopoverItems>
{items => (
<div className="grid gap-1">
<ComposerPrimitive.Unstable_TriggerPopoverBack className="mb-1 text-xs text-muted-foreground hover:text-foreground">
Back
</ComposerPrimitive.Unstable_TriggerPopoverBack>
{items.map((item, index) => {
const Icon = directiveIcon(item, iconMap, Fallback)
<div className="grid gap-0.5">
<div className="px-2 pb-1 pt-0.5 text-[0.7rem] font-medium uppercase tracking-wide text-muted-foreground/80">
Reference a file
</div>
{items.length === 0 ? (
<div className="px-3 py-3 text-sm text-muted-foreground">
<p>No file suggestions yet.</p>
<p className="mt-1 text-xs text-muted-foreground/80">
Keep typing to filter, or click <span className="font-medium text-foreground/80">+</span> to attach
files, folders, or a URL.
</p>
</div>
) : (
items.map((item, index) => {
const Icon = directiveIcon(item, iconMap, Fallback)
return (
<ComposerPrimitive.Unstable_TriggerPopoverItem
className="flex w-full items-center gap-2 rounded-xl px-3 py-2 text-left text-sm hover:bg-accent data-highlighted:bg-accent"
index={index}
item={item}
key={`${item.type}:${item.id}`}
>
<Icon className="size-4 shrink-0 text-muted-foreground" />
<span className="grid min-w-0 flex-1 gap-0.5">
<span className="truncate font-medium">{item.label}</span>
{item.description && (
<span className="truncate text-xs text-muted-foreground">{item.description}</span>
)}
</span>
</ComposerPrimitive.Unstable_TriggerPopoverItem>
)
})}
return (
<ComposerPrimitive.Unstable_TriggerPopoverItem
className="flex w-full items-center gap-2 rounded-xl px-2.5 py-1.5 text-left text-sm transition-colors hover:bg-accent/70 data-highlighted:bg-accent"
index={index}
item={item}
key={`${item.type}:${item.id}`}
>
<Icon className="size-4 shrink-0 text-muted-foreground/80" />
<span className="grid min-w-0 flex-1 gap-0.5">
<span className="truncate font-medium text-foreground">{item.label}</span>
{item.description && (
<span className="truncate text-[0.72rem] text-muted-foreground/85">{item.description}</span>
)}
</span>
</ComposerPrimitive.Unstable_TriggerPopoverItem>
)
})
)}
</div>
)}
</ComposerPrimitive.Unstable_TriggerPopoverItems>
</ComposerPrimitive.Unstable_TriggerPopover>
)
}
export function buildMentionCategories(suggestions: ContextSuggestion[] | undefined): Unstable_MentionCategory[] {
const items = (suggestions ?? [])
.map(s => {
const match = s.text.match(/^@(file|folder|url|image):(.+)$/)
const items: Unstable_TriggerItem[] = []
if (!match) {
return null
}
for (const s of suggestions ?? []) {
const match = s.text.match(/^@(file|folder|url|image):(.+)$/)
const [, type, id] = match
if (!match) {
continue
}
return {
id,
type,
label: s.display || id,
description: s.meta,
metadata: { icon: type }
}
const [, type, id] = match
items.push({
id,
type,
label: s.display || id,
description: s.meta,
metadata: { icon: type }
})
.filter((item): item is NonNullable<typeof item> => Boolean(item))
}
return [
{ id: 'refs', label: 'Hermes refs', items: REF_ITEMS },
...(items.length ? [{ id: 'context', label: 'Suggested files', items }] : [])
]
return [{ id: 'context', label: 'References', items }]
}
function directiveIcon(
item: Unstable_TriggerItem,
iconMap: Record<string, Unstable_IconComponent>,
@ -106,5 +99,5 @@ function directiveIcon(
const meta = item.metadata as Record<string, unknown> | undefined
const key = typeof meta?.icon === 'string' ? meta.icon : item.type
return iconMap[key] ?? iconMap[item.type] ?? fallback
return iconMap[key] ?? iconMap[item.type] ?? fallback ?? FileText
}

View file

@ -1,6 +1,6 @@
import { useCallback, useEffect, useRef, useState } from 'react'
import { speakText } from '@/hermes'
import { playSpeechText, stopVoicePlayback } from '@/lib/voice-playback'
import { notify, notifyError } from '@/store/notifications'
import {
@ -14,13 +14,19 @@ import { useMicRecorder } from './use-mic-recorder'
export type ConversationStatus = 'idle' | 'listening' | 'transcribing' | 'thinking' | 'speaking'
interface PendingVoiceResponse {
id: string
pending: boolean
text: string
}
interface VoiceConversationOptions {
busy: boolean
enabled: boolean
onFatalError?: () => void
onSubmit: (text: string) => void
onSubmit: (text: string) => Promise<void> | void
onTranscribeAudio?: (audio: Blob) => Promise<string>
pendingResponseText: () => string | null
pendingResponse: () => PendingVoiceResponse | null
consumePendingResponse: () => void
}
@ -30,16 +36,19 @@ export function useVoiceConversation({
onFatalError,
onSubmit,
onTranscribeAudio,
pendingResponseText,
pendingResponse,
consumePendingResponse
}: VoiceConversationOptions) {
const { handle, level } = useMicRecorder()
const [status, setStatus] = useState<ConversationStatus>('idle')
const [muted, setMuted] = useState(false)
const audioRef = useRef<HTMLAudioElement | null>(null)
const turnTimeoutRef = useRef<number | null>(null)
const pendingStartRef = useRef(false)
const lastSpokenRef = useRef<string | null>(null)
const turnClosingRef = useRef(false)
const awaitingSpokenResponseRef = useRef(false)
const responseIdRef = useRef<string | null>(null)
const spokenSourceLengthRef = useRef(0)
const speechBufferRef = useRef('')
const enabledRef = useRef(enabled)
const mutedRef = useRef(muted)
const busyRef = useRef(busy)
@ -69,36 +78,74 @@ export function useVoiceConversation({
}
}
const stopAudio = useCallback(() => {
const audio = audioRef.current
const resetSpeechBuffer = () => {
responseIdRef.current = null
spokenSourceLengthRef.current = 0
speechBufferRef.current = ''
}
if (audio) {
audio.pause()
audio.src = ''
audioRef.current = null
}
}, [])
const handleTurn = useCallback(async () => {
clearTurnTimeout()
setStatus('transcribing')
const result = await handle.stop()
if (!result || !result.heardSpeech || !onTranscribeAudio) {
if (enabledRef.current && !mutedRef.current && !busyRef.current && statusRef.current !== 'speaking') {
pendingStartRef.current = true
}
setStatus('idle')
const appendSpeechText = (text: string) => {
const cleaned = text
if (!cleaned) {
return
}
try {
const transcript = (await onTranscribeAudio(result.audio)).trim()
speechBufferRef.current = `${speechBufferRef.current} ${cleaned}`.trim()
}
if (!transcript) {
if (enabledRef.current) {
const takeSpeechChunk = (force = false): string | null => {
const buffer = speechBufferRef.current.replace(/\s+/g, ' ').trim()
if (!buffer) {
speechBufferRef.current = ''
return null
}
const sentence = buffer.match(/^(.+?[.!?。!?])(?:\s+|$)/)
if (sentence?.[1] && (sentence[1].length >= 8 || force)) {
const chunk = sentence[1].trim()
speechBufferRef.current = buffer.slice(sentence[1].length).trim()
return chunk
}
if (!force && buffer.length > 220) {
const softBoundary = Math.max(buffer.lastIndexOf(', ', 180), buffer.lastIndexOf('; ', 180), buffer.lastIndexOf(': ', 180))
if (softBoundary > 80) {
const chunk = buffer.slice(0, softBoundary + 1).trim()
speechBufferRef.current = buffer.slice(softBoundary + 1).trim()
return chunk
}
}
if (!force) {
return null
}
speechBufferRef.current = ''
return buffer
}
const handleTurn = useCallback(async (forceTranscribe = false) => {
if (turnClosingRef.current) {
return
}
turnClosingRef.current = true
clearTurnTimeout()
setStatus('transcribing')
try {
const result = await handle.stop()
if (!result || (!result.heardSpeech && !forceTranscribe) || !onTranscribeAudio) {
if (enabledRef.current && !mutedRef.current && !busyRef.current && statusRef.current !== 'speaking') {
pendingStartRef.current = true
}
@ -107,16 +154,34 @@ export function useVoiceConversation({
return
}
onSubmit(transcript)
setStatus('thinking')
} catch (error) {
notifyError(error, 'Voice transcription failed')
try {
const transcript = (await onTranscribeAudio(result.audio)).trim()
if (enabledRef.current && !mutedRef.current && !busyRef.current) {
pendingStartRef.current = true
if (!transcript) {
if (enabledRef.current) {
pendingStartRef.current = true
}
setStatus('idle')
return
}
awaitingSpokenResponseRef.current = true
resetSpeechBuffer()
await onSubmit(transcript)
setStatus('thinking')
} catch (error) {
notifyError(error, 'Voice transcription failed')
if (enabledRef.current && !mutedRef.current && !busyRef.current) {
pendingStartRef.current = true
}
setStatus('idle')
}
setStatus('idle')
} finally {
turnClosingRef.current = false
}
}, [handle, onSubmit, onTranscribeAudio])
@ -158,24 +223,13 @@ export function useVoiceConversation({
const speak = useCallback(
async (text: string) => {
stopAudio()
setStatus('speaking')
try {
const response = await speakText(text)
const audio = new Audio(response.data_url)
audioRef.current = audio
await new Promise<void>((resolve, reject) => {
audio.addEventListener('ended', () => resolve(), { once: true })
audio.addEventListener('error', () => reject(new Error('Playback failed')), { once: true })
void audio.play().catch(reject)
})
await playSpeechText(text, { source: 'voice-conversation' })
} catch (error) {
notifyError(error, 'Voice playback failed')
} finally {
audioRef.current = null
if (enabledRef.current) {
pendingStartRef.current = true
setStatus('idle')
@ -184,7 +238,7 @@ export function useVoiceConversation({
}
}
},
[stopAudio]
[]
)
const start = useCallback(async () => {
@ -200,20 +254,31 @@ export function useVoiceConversation({
}
setMuted(false)
lastSpokenRef.current = null
awaitingSpokenResponseRef.current = false
resetSpeechBuffer()
consumePendingResponse()
pendingStartRef.current = true
}, [onFatalError, onTranscribeAudio])
await startListening()
}, [consumePendingResponse, onFatalError, onTranscribeAudio, startListening])
const end = useCallback(async () => {
pendingStartRef.current = false
clearTurnTimeout()
stopAudio()
stopVoicePlayback()
handle.cancel()
lastSpokenRef.current = null
turnClosingRef.current = false
awaitingSpokenResponseRef.current = false
resetSpeechBuffer()
consumePendingResponse()
setMuted(false)
setStatus('idle')
}, [consumePendingResponse, handle, stopAudio])
}, [consumePendingResponse, handle])
const stopTurn = useCallback(() => {
if (statusRef.current === 'listening') {
void handleTurn(true)
}
}, [handleTurn])
const toggleMute = useCallback(() => {
setMuted(value => {
@ -231,22 +296,77 @@ export function useVoiceConversation({
})
}, [handle])
// Drive the loop: speak any new assistant response, otherwise start listening
// when the agent is idle and we're between turns.
useEffect(() => {
if (!enabled) {
return
}
const onKeyDown = (event: KeyboardEvent) => {
if (event.code !== 'Space' || event.repeat || event.metaKey || event.ctrlKey || event.altKey) {
return
}
if (statusRef.current !== 'listening') {
return
}
event.preventDefault()
stopTurn()
}
window.addEventListener('keydown', onKeyDown, { capture: true })
return () => window.removeEventListener('keydown', onKeyDown, { capture: true })
}, [enabled, stopTurn])
// Drive the loop: after a voice-submitted turn, speak stable chunks as the
// assistant stream grows. Otherwise start listening when idle between turns.
useEffect(() => {
if (!enabled || muted) {
return
}
const text = pendingResponseText()
const trimmed = text?.trim() ?? ''
if (awaitingSpokenResponseRef.current && status !== 'speaking') {
const response = pendingResponse()
if (trimmed && trimmed !== lastSpokenRef.current && status !== 'speaking') {
lastSpokenRef.current = trimmed
consumePendingResponse()
void speak(trimmed)
if (response) {
if (response.id !== responseIdRef.current) {
resetSpeechBuffer()
responseIdRef.current = response.id
}
return
if (response.text.length > spokenSourceLengthRef.current) {
appendSpeechText(response.text.slice(spokenSourceLengthRef.current))
spokenSourceLengthRef.current = response.text.length
}
const chunk = takeSpeechChunk(!response.pending && !busy)
if (chunk) {
void speak(chunk)
return
}
if (!response.pending && !busy) {
awaitingSpokenResponseRef.current = false
consumePendingResponse()
resetSpeechBuffer()
pendingStartRef.current = true
setStatus('idle')
return
}
}
if (!busy && status === 'thinking') {
awaitingSpokenResponseRef.current = false
resetSpeechBuffer()
pendingStartRef.current = true
setStatus('idle')
return
}
}
if (busy || status !== 'idle') {
@ -256,7 +376,7 @@ export function useVoiceConversation({
if (pendingStartRef.current) {
void startListening()
}
}, [busy, consumePendingResponse, enabled, muted, pendingResponseText, speak, startListening, status])
}, [busy, consumePendingResponse, enabled, muted, pendingResponse, speak, startListening, status])
useEffect(() => {
if (enabled && !wasEnabledRef.current) {
@ -270,5 +390,5 @@ export function useVoiceConversation({
wasEnabledRef.current = enabled
}, [enabled, end, start])
return { end, level, muted, start, status, toggleMute }
return { end, level, muted, start, status, stopTurn, toggleMute }
}

View file

@ -32,7 +32,7 @@ import { useVoiceConversation } from './hooks/use-voice-conversation'
import { useVoiceRecorder } from './hooks/use-voice-recorder'
import type { ChatBarProps } from './types'
import { UrlDialog } from './url-dialog'
import { VoiceActivity } from './voice-activity'
import { VoiceActivity, VoicePlaybackActivity } from './voice-activity'
function trimPastedEdgeNewlines(text: string): string {
return text.replace(EDGE_NEWLINES_RE, '')
@ -45,7 +45,6 @@ export function ChatBar({
maxRecordingSeconds = DEFAULT_MAX_RECORDING_SECONDS,
state,
onCancel,
onAddContextRef,
onAddUrl,
onPasteClipboardImage,
onPickFiles,
@ -203,7 +202,7 @@ export function ChatBar({
onCancel()
} else if (draft.trim() || attachments.length > 0) {
triggerHaptic('submit')
onSubmit(draft)
void onSubmit(draft)
aui.composer().setText('')
}
@ -235,9 +234,9 @@ export function ChatBar({
onTranscribeAudio
})
const pendingResponseText = () => {
const pendingResponse = () => {
const messages = $messages.get()
const last = messages.findLast(m => m.role === 'assistant' && !m.pending && !m.hidden)
const last = messages.findLast(m => m.role === 'assistant' && !m.hidden)
if (!last || last.id === lastSpokenIdRef.current) {
return null
@ -249,9 +248,11 @@ export function ChatBar({
return null
}
lastSpokenIdRef.current = last.id
return text
return {
id: last.id,
pending: Boolean(last.pending),
text
}
}
const consumePendingResponse = () => {
@ -263,13 +264,13 @@ export function ChatBar({
}
}
const submitVoiceTurn = (text: string) => {
const submitVoiceTurn = async (text: string) => {
if (busy) {
return
}
triggerHaptic('submit')
onSubmit(text)
await onSubmit(text)
aui.composer().setText('')
draftRef.current = ''
}
@ -281,12 +282,11 @@ export function ChatBar({
onFatalError: () => setVoiceConversationActive(false),
onSubmit: submitVoiceTurn,
onTranscribeAudio,
pendingResponseText
pendingResponse
})
const contextMenu = (
<ContextMenu
onAddContextRef={onAddContextRef}
onInsertText={insertText}
onOpenUrlDialog={() => {
triggerHaptic('open')
@ -313,6 +313,7 @@ export function ChatBar({
void conversation.end()
},
onStart: () => setVoiceConversationActive(true),
onStopTurn: conversation.stopTurn,
onToggleMute: conversation.toggleMute,
status: conversation.status
}}
@ -343,14 +344,12 @@ export function ChatBar({
return (
<>
<ComposerPrimitive.Unstable_TriggerPopoverRoot>
{mentionCategories.length > 0 && (
<DirectivePopover
adapter={mention.adapter}
directive={mention.directive}
fallbackIcon={mention.fallbackIcon ?? FileText}
iconMap={mention.iconMap ?? DIRECTIVE_ICONS}
/>
)}
<DirectivePopover
adapter={mention.adapter}
directive={mention.directive}
fallbackIcon={mention.fallbackIcon ?? FileText}
iconMap={mention.iconMap ?? DIRECTIVE_ICONS}
/>
<ComposerPrimitive.Root
className={cn(SHELL, 'group/composer pb-8 pt-2')}
onSubmit={e => {
@ -407,6 +406,7 @@ export function ChatBar({
style={{ ...COMPOSER_BACKDROP_STYLE, borderRadius: `${glassTweaks.liquid.cornerRadius}px` }}
>
<VoiceActivity state={voiceActivityState} />
<VoicePlaybackActivity />
{attachments.length > 0 && <AttachmentList attachments={attachments} onRemove={onRemoveAttachment} />}
{stacked ? (
<>

View file

@ -36,7 +36,7 @@ export interface ChatBarProps {
onPickFolders?: () => void
onPickImages?: () => void
onRemoveAttachment?: (id: string) => void
onSubmit: (value: string) => void
onSubmit: (value: string) => Promise<void> | void
onTranscribeAudio?: (audio: Blob) => Promise<string>
}

View file

@ -1,9 +1,12 @@
import { Globe } from 'lucide-react'
import type * as React from 'react'
import { Button } from '@/components/ui/button'
import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from '@/components/ui/dialog'
import { Input } from '@/components/ui/input'
const URL_HINT = /^https?:\/\//i
export function UrlDialog({
inputRef,
onChange,
@ -19,14 +22,23 @@ export function UrlDialog({
open: boolean
value: string
}) {
const trimmed = value.trim()
const looksLikeUrl = trimmed.length > 0 && URL_HINT.test(trimmed)
return (
<Dialog onOpenChange={onOpenChange} open={open}>
<DialogContent className="max-w-md">
<DialogHeader>
<DialogTitle>Add URL Context</DialogTitle>
<DialogDescription>
Hermes will fetch this URL via the existing @url context resolver when you send the prompt.
</DialogDescription>
<DialogContent className="max-w-md gap-5">
<DialogHeader className="flex-row items-center gap-3 sm:items-center">
<span
aria-hidden
className="grid size-9 shrink-0 place-items-center rounded-xl bg-[color-mix(in_srgb,var(--dt-primary)_14%,transparent)] text-primary ring-1 ring-inset ring-primary/15"
>
<Globe className="size-4" />
</span>
<div className="grid gap-0.5 text-left">
<DialogTitle>Attach a URL</DialogTitle>
<DialogDescription>Hermes will fetch the page and include it as context for this turn.</DialogDescription>
</div>
</DialogHeader>
<form
className="grid gap-4"
@ -35,18 +47,29 @@ export function UrlDialog({
onSubmit()
}}
>
<Input
onChange={e => onChange(e.target.value)}
placeholder="https://example.com"
ref={inputRef}
value={value}
/>
<div className="grid gap-1.5">
<Input
autoComplete="off"
autoCorrect="off"
inputMode="url"
onChange={e => onChange(e.target.value)}
placeholder="https://example.com/post"
ref={inputRef}
spellCheck={false}
value={value}
/>
{trimmed.length > 0 && !looksLikeUrl && (
<p className="text-xs text-muted-foreground/85">
Include the full URL, e.g. <span className="font-mono">https://…</span>
</p>
)}
</div>
<DialogFooter>
<Button onClick={() => onOpenChange(false)} type="button" variant="ghost">
Cancel
</Button>
<Button disabled={!value.trim()} type="submit">
Add URL
<Button disabled={!looksLikeUrl} type="submit">
Attach
</Button>
</DialogFooter>
</form>

View file

@ -1,6 +1,10 @@
import { Loader2, Mic } from 'lucide-react'
import { useStore } from '@nanostores/react'
import { Loader2, Mic, Volume2, VolumeX } from 'lucide-react'
import { Button } from '@/components/ui/button'
import { cn } from '@/lib/utils'
import { stopVoicePlayback } from '@/lib/voice-playback'
import { $voicePlayback } from '@/store/voice-playback'
import type { VoiceActivityState } from './types'
@ -36,6 +40,25 @@ function VoiceLevelBars({ level, active }: { active: boolean; level: number }) {
)
}
function PlaybackBars() {
const bars = [820, 940, 760, 880, 700, 980, 790]
return (
<div aria-hidden="true" className="flex h-4 items-center gap-0.75">
{bars.map((duration, index) => (
<span
className="voice-wave-bar h-full w-0.5 rounded-full bg-current"
key={index}
style={{
animationDelay: `${index * -110}ms`,
animationDuration: `${duration}ms`
}}
/>
))}
</div>
)
}
export function VoiceActivity({
state
}: {
@ -75,3 +98,50 @@ export function VoiceActivity({
</div>
)
}
export function VoicePlaybackActivity() {
const playback = useStore($voicePlayback)
if (playback.status === 'idle') {
return null
}
const preparing = playback.status === 'preparing'
const title = preparing
? 'Preparing audio'
: playback.source === 'voice-conversation'
? 'Speaking response'
: 'Reading aloud'
return (
<div
aria-live="polite"
className={cn(
'flex h-8 items-center gap-2 rounded-xl border border-primary/20 bg-primary/10 px-2.5 text-xs text-primary',
'shadow-[inset_0_1px_0_rgba(255,255,255,0.35)] backdrop-blur-sm'
)}
role="status"
>
<div className="flex size-5 shrink-0 items-center justify-center rounded-full bg-primary/15 text-primary">
{preparing ? <Loader2 className="animate-spin" size={12} /> : <Volume2 size={12} />}
</div>
<div className="flex min-w-0 flex-1 items-center gap-2">
<span className="truncate font-medium text-foreground/85">{title}</span>
{!preparing && <PlaybackBars />}
</div>
<Button
className="h-6 shrink-0 gap-1 rounded-full px-2 text-[0.6875rem]"
onClick={stopVoicePlayback}
size="sm"
type="button"
variant="ghost"
>
<VolumeX size={12} />
Stop
</Button>
</div>
)
}

View file

@ -1,5 +1,6 @@
import { useCallback } from 'react'
import { formatRefValue } from '@/components/assistant-ui/directive-text'
import { attachmentId, contextPath, pathLabel } from '@/lib/chat-runtime'
import {
addComposerAttachment,
@ -57,7 +58,7 @@ export function useComposerActions({ activeSessionId, currentCwd, requestGateway
kind,
label: pathLabel(path),
detail: rel,
refText: `@${kind}:${rel}`,
refText: `@${kind}:${formatRefValue(rel)}`,
path
})
}

View file

@ -8,13 +8,14 @@ import { useStore } from '@nanostores/react'
import { useQuery } from '@tanstack/react-query'
import { ChevronDown } from 'lucide-react'
import type * as React from 'react'
import { Suspense, useMemo } from 'react'
import { Suspense, useMemo, useRef } from 'react'
import { useLocation } from 'react-router-dom'
import { Thread } from '@/components/assistant-ui/thread'
import { NotificationStack } from '@/components/notifications'
import { Button } from '@/components/ui/button'
import { getGlobalModelOptions, type HermesGateway } from '@/hermes'
import type { ChatMessage } from '@/lib/chat-messages'
import { quickModelOptions, sessionTitle, toRuntimeMessage } from '@/lib/chat-runtime'
import { cn } from '@/lib/utils'
import { $pinnedSessionIds } from '@/store/layout'
@ -57,7 +58,7 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
onPickFolders: () => void
onPickImages: () => void
onRemoveAttachment: (id: string) => void
onSubmit: (text: string) => void
onSubmit: (text: string) => Promise<void> | void
onChangeCwd: (cwd: string) => void
onBrowseCwd: () => void
onOpenModelPicker: () => void
@ -118,6 +119,7 @@ export function ChatView({
const pinnedSessionIds = useStore($pinnedSessionIds)
const selectedSessionId = useStore($selectedStoredSessionId)
const sessions = useStore($sessions)
const runtimeMessageCacheRef = useRef(new WeakMap<ChatMessage, ThreadMessage>())
const activeStoredSession = sessions.find(session => session.id === selectedSessionId) || null
const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
const selectedIsPinned = selectedSessionId ? pinnedSessionIds.includes(selectedSessionId) : false
@ -128,6 +130,7 @@ export function ChatView({
const loadingSession = isRoutedSessionView && messages.length === 0
const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse)
const showChatBar = !loadingSession
const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
const title = activeStoredSession ? sessionTitle(activeStoredSession) : ''
const modelOptionsQuery = useQuery<ModelOptionsResponse>({
@ -190,7 +193,14 @@ export function ChatView({
parentId = branchParentByGroup.get(message.branchGroupId) ?? null
}
items.push({ message: toRuntimeMessage(message), parentId })
const cachedMessage = runtimeMessageCacheRef.current.get(message)
const runtimeMessage = cachedMessage ?? toRuntimeMessage(message)
if (!cachedMessage) {
runtimeMessageCacheRef.current.set(message, runtimeMessage)
}
items.push({ message: runtimeMessage, parentId })
if (!message.hidden) {
visibleParentId = message.id
@ -248,6 +258,7 @@ export function ChatView({
intro={showIntro ? { personality: introPersonality, seed: introSeed } : undefined}
loading={threadLoading}
onBranchInNewChat={onBranchInNewChat}
sessionKey={threadKey}
/>
{showChatBar && (
<Suspense fallback={<ChatBarFallback />}>

View file

@ -14,6 +14,7 @@ import {
listSessions,
setGlobalModel
} from '../hermes'
import { formatRefValue } from '../components/assistant-ui/directive-text'
import { toChatMessages } from '../lib/chat-messages'
import { BUILTIN_PERSONALITIES, normalizePersonalityValue, personalityNamesFromConfig } from '../lib/chat-runtime'
import { $pinnedSessionIds, pinSession, unpinSession } from '../store/layout'
@ -571,7 +572,7 @@ export function DesktopController() {
gateway={gatewayRef.current}
maxVoiceRecordingSeconds={voiceMaxRecordingSeconds}
onAddContextRef={addContextRefAttachment}
onAddUrl={url => addContextRefAttachment(`@url:${url}`, url)}
onAddUrl={url => addContextRefAttachment(`@url:${formatRefValue(url)}`, url)}
onBranchInNewChat={messageId => void branchInNewChat(messageId)}
onBrowseCwd={() => void browseSessionCwd()}
onCancel={() => void cancelRun()}
@ -589,7 +590,7 @@ export function DesktopController() {
onReload={reloadFromMessage}
onRemoveAttachment={id => void removeAttachment(id)}
onSelectPersonality={name => void selectPersonality(name)}
onSubmit={text => void submitText(text)}
onSubmit={submitText}
onThreadMessagesChange={handleThreadMessagesChange}
onToggleSelectedPin={toggleSelectedPin}
onTranscribeAudio={transcribeVoiceAudio}

View file

@ -1,6 +1,5 @@
import type { QueryClient } from '@tanstack/react-query'
import { type MutableRefObject, useCallback } from 'react'
import { flushSync } from 'react-dom'
import {
appendReasoningPart,
@ -60,7 +59,6 @@ export function useMessageStream({
transform: (parts: ChatMessagePart[], message: ChatMessage) => ChatMessagePart[],
seed: () => ChatMessagePart[],
opts: {
sync?: boolean
pending?: (message: ChatMessage) => boolean
} = {}
) => {
@ -112,7 +110,7 @@ export function useMessageStream({
})
}
opts.sync ? flushSync(apply) : apply()
apply()
},
[updateSessionState]
)
@ -126,8 +124,7 @@ export function useMessageStream({
mutateStream(
sessionId,
parts => appendTextPart(parts, delta),
() => [textPart(delta)],
{ sync: true }
() => [textPart(delta)]
)
},
[mutateStream]
@ -152,8 +149,7 @@ export function useMessageStream({
return appendReasoningPart(parts, delta)
},
() => [reasoningPart(delta)],
{ sync: true }
() => [reasoningPart(delta)]
)
},
[mutateStream]
@ -299,6 +295,7 @@ export function useMessageStream({
const apply = explicitSid ? isActiveEvent : !activeSessionIdRef.current
const modelChanged = typeof payload?.model === 'string'
const providerChanged = typeof payload?.provider === 'string'
const runningChanged = typeof payload?.running === 'boolean'
if (apply) {
if (modelChanged) {
@ -320,6 +317,35 @@ export function useMessageStream({
if (typeof payload?.personality === 'string') {
setCurrentPersonality(normalizePersonalityValue(payload.personality))
}
if (runningChanged && sessionId) {
updateSessionState(sessionId, state => {
const busy = Boolean(payload!.running)
if (state.busy === busy && (busy || !state.awaitingResponse)) {
return state
}
if (busy) {
return {
...state,
busy
}
}
if (state.awaitingResponse && !state.sawAssistantPayload) {
return state
}
return {
...state,
awaitingResponse: false,
busy,
pendingBranchGroup: null,
streamId: null
}
})
}
}
void refreshHermesConfig()
@ -355,11 +381,11 @@ export function useMessageStream({
}
} else if (event.type === 'reasoning.delta') {
if (sessionId) {
appendReasoningDelta(sessionId, coerceGatewayText(payload?.text))
appendReasoningDelta(sessionId, coerceThinkingText(payload?.text))
}
} else if (event.type === 'reasoning.available') {
if (sessionId) {
appendReasoningDelta(sessionId, coerceGatewayText(payload?.text), true)
appendReasoningDelta(sessionId, coerceThinkingText(payload?.text), true)
}
} else if (event.type === 'message.complete') {
if (!sessionId) {

View file

@ -13,7 +13,7 @@ import {
import { triggerHaptic } from '@/lib/haptics'
import { $composerAttachments, clearComposerAttachments } from '@/store/composer'
import { clearNotifications, notify, notifyError } from '@/store/notifications'
import { $busy, $messages, setAwaitingResponse, setBusy } from '@/store/session'
import { $busy, $messages, setAwaitingResponse, setBusy, setMessages } from '@/store/session'
import type { ClientSessionState, SlashExecResponse } from '../../types'
@ -296,12 +296,34 @@ export function usePromptActions({
)
const cancelRun = useCallback(async () => {
if (!activeSessionId) {
const sessionId = activeSessionId || activeSessionIdRef.current
busyRef.current = false
setBusy(false)
setAwaitingResponse(false)
const finalizeMessages = (messages: ChatMessage[]) =>
messages.map(message =>
message.pending
? {
...message,
parts: chatMessageText(message).trim()
? appendTextPart(message.parts, INTERRUPTED_MARKER)
: [...message.parts, textPart(INTERRUPTED_MARKER.trim())],
pending: false
}
: message
)
if (!sessionId) {
setMessages(finalizeMessages($messages.get()))
return
}
updateSessionState(activeSessionId, state => {
updateSessionState(sessionId, state => {
const streamId = state.streamId
const messages = streamId
? state.messages.map(message =>
message.id === streamId
@ -314,7 +336,7 @@ export function usePromptActions({
}
: message
)
: state.messages
: finalizeMessages(state.messages)
return {
...state,
@ -328,11 +350,11 @@ export function usePromptActions({
})
try {
await requestGateway('session.interrupt', { session_id: activeSessionId })
await requestGateway('session.interrupt', { session_id: sessionId })
} catch (err) {
notifyError(err, 'Stop failed')
}
}, [activeSessionId, requestGateway, updateSessionState])
}, [activeSessionId, activeSessionIdRef, busyRef, requestGateway, updateSessionState])
const reloadFromMessage = useCallback(
async (parentId: string | null) => {

View file

@ -87,6 +87,11 @@ export function useSessionActions({
const createBackendSessionForSend = useCallback(async (): Promise<string | null> => {
const created = await requestGateway<SessionCreateResponse>('session.create', { cols: 96 })
if (created.stored_session_id) {
navigate(sessionRoute(created.stored_session_id), { replace: true })
}
setActiveSessionId(created.session_id)
activeSessionIdRef.current = created.session_id
ensureSessionState(created.session_id, created.stored_session_id ?? null)
@ -94,7 +99,6 @@ export function useSessionActions({
if (created.stored_session_id) {
setSelectedStoredSessionId(created.stored_session_id)
selectedStoredSessionIdRef.current = created.stored_session_id
navigate(sessionRoute(created.stored_session_id), { replace: true })
}
if (created.info?.model) {

View file

@ -60,6 +60,7 @@ export const ENUM_OPTIONS: Record<string, string[]> = {
'context.engine': ['compressor', 'default', 'custom'],
'delegation.reasoning_effort': ['', 'minimal', 'low', 'medium', 'high', 'xhigh'],
'memory.provider': ['', 'builtin', 'honcho'],
'stt.elevenlabs.model_id': ['scribe_v2', 'scribe_v1'],
'stt.local.model': ['tiny', 'base', 'small', 'medium', 'large-v3'],
'tts.openai.voice': ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
}
@ -101,6 +102,10 @@ export const FIELD_LABELS: Record<string, string> = {
'stt.provider': 'Speech-To-Text Provider',
'stt.local.model': 'Local Transcription Model',
'stt.local.language': 'Transcription Language',
'stt.elevenlabs.model_id': 'ElevenLabs STT Model',
'stt.elevenlabs.language_code': 'ElevenLabs Language',
'stt.elevenlabs.tag_audio_events': 'Tag Audio Events',
'stt.elevenlabs.diarize': 'Speaker Diarization',
'tts.provider': 'Text-To-Speech Provider',
'tts.edge.voice': 'Edge Voice',
'tts.openai.model': 'OpenAI TTS Model',
@ -157,6 +162,7 @@ export const FIELD_DESCRIPTIONS: Record<string, string> = {
'compression.enabled': 'Summarize older context when conversations get large.',
'voice.auto_tts': 'Automatically speak assistant responses.',
'stt.enabled': 'Enable local or provider-backed speech transcription.',
'stt.elevenlabs.language_code': 'Optional ISO-639-3 language code. Blank lets ElevenLabs auto-detect.',
'agent.max_turns': 'Upper bound for tool-calling turns before Hermes stops a run.'
}
@ -241,6 +247,10 @@ export const SECTIONS: DesktopConfigSection[] = [
'tts.elevenlabs.model_id',
'stt.local.model',
'stt.local.language',
'stt.elevenlabs.model_id',
'stt.elevenlabs.language_code',
'stt.elevenlabs.tag_audio_events',
'stt.elevenlabs.diarize',
'voice.record_key',
'voice.max_recording_seconds'
]

View file

@ -0,0 +1,39 @@
import { describe, expect, it } from 'vitest'
import { formatRefValue, hermesDirectiveFormatter } from './directive-text'
describe('formatRefValue', () => {
it('leaves simple paths untouched', () => {
expect(formatRefValue('src/index.ts')).toBe('src/index.ts')
expect(formatRefValue('https://example.com/post')).toBe('https://example.com/post')
})
it('wraps paths with whitespace in backticks', () => {
expect(formatRefValue('apple-touch-icon (1).png')).toBe('`apple-touch-icon (1).png`')
})
it('falls back to double quotes when value contains backticks', () => {
expect(formatRefValue('weird `name` (1).md')).toBe('"weird `name` (1).md"')
})
})
describe('hermesDirectiveFormatter.parse', () => {
it('keeps quoted file paths whole when parsing', () => {
const segments = hermesDirectiveFormatter.parse('see @image:`apple-touch-icon (1).png` for the icon')
expect(segments).toEqual([
{ kind: 'text', text: 'see ' },
{ kind: 'mention', type: 'image', label: 'apple-touch-icon (1).png', id: 'apple-touch-icon (1).png' },
{ kind: 'text', text: ' for the icon' }
])
})
it('still parses unquoted paths', () => {
const segments = hermesDirectiveFormatter.parse('@file:src/main.tsx the entry point')
expect(segments).toEqual([
{ kind: 'mention', type: 'file', label: 'main.tsx', id: 'src/main.tsx' },
{ kind: 'text', text: ' the entry point' }
])
})
})

View file

@ -24,10 +24,63 @@ const ICONS: Record<HermesRefType, ComponentType<{ className?: string }>> = {
* so they render as inline chips in user messages instead of raw text.
*
* Supported types: file, folder, url, image. Anything else stays plain text.
*
* Mirrors the Python `agent/context_references.REFERENCE_PATTERN` syntax:
* the value may be wrapped in backticks, single quotes, or double quotes so
* paths with spaces/parens/etc. survive parsing intact.
*/
const CANONICAL_DIRECTIVE_RE = /:([\w-]{1,64})\[([^\]\n]{1,1024})\](?:\{name=([^}\n]{1,1024})\})?/gu
const CANONICAL_DIRECTIVE_RE = /:([\w-]{1,64})\[([^\]\n]{1,1024})\](?:\{name=([^}\n]{1,1024})\})?/g
const HERMES_DIRECTIVE_RE = /@(file|folder|url|image|tool):(\S+)/gu
const HERMES_DIRECTIVE_RE = new RegExp(
'@(file|folder|url|image|tool):(' +
'`[^`\\n]+`' +
'|"[^"\\n]+"' +
"|'[^'\\n]+'" +
'|\\S+' +
')',
'g'
)
const TRAILING_PUNCTUATION_RE = /[,.;!?]+$/
function unwrapRefValue(raw: string): string {
if (raw.length < 2) {
return raw
}
const head = raw[0]
const tail = raw[raw.length - 1]
if ((head === '`' && tail === '`') || (head === '"' && tail === '"') || (head === "'" && tail === "'")) {
return raw.slice(1, -1)
}
return raw.replace(TRAILING_PUNCTUATION_RE, '')
}
function needsQuoting(value: string): boolean {
return /[\s()\[\]{}<>"'`]/.test(value)
}
export function formatRefValue(value: string): string {
if (!needsQuoting(value)) {
return value
}
if (!value.includes('`')) {
return `\`${value}\``
}
if (!value.includes('"')) {
return `"${value}"`
}
if (!value.includes("'")) {
return `'${value}'`
}
return value
}
export const hermesDirectiveFormatter: Unstable_DirectiveFormatter = {
serialize(item: Unstable_TriggerItem): string {
@ -35,7 +88,7 @@ export const hermesDirectiveFormatter: Unstable_DirectiveFormatter = {
return `@${item.id}`
}
return `@${item.type}:${item.id}`
return `@${item.type}:${formatRefValue(item.id)}`
},
parse(text: string): readonly Unstable_DirectiveSegment[] {
return parseDirectiveText(text)
@ -51,13 +104,17 @@ function parseDirectiveText(text: string): Unstable_DirectiveSegment[] {
label: match[2] || match[3] || '',
id: match[3] || match[2] || ''
})),
...Array.from(text.matchAll(HERMES_DIRECTIVE_RE)).map(match => ({
start: match.index ?? 0,
end: (match.index ?? 0) + match[0].length,
type: match[1] || 'file',
label: shortLabel(match[1] as HermesRefType, match[2] || ''),
id: match[2] || ''
}))
...Array.from(text.matchAll(HERMES_DIRECTIVE_RE)).map(match => {
const id = unwrapRefValue(match[2] || '')
return {
start: match.index ?? 0,
end: (match.index ?? 0) + match[0].length,
type: match[1] || 'file',
label: shortLabel(match[1] as HermesRefType, id),
id
}
})
]
.filter(match => match.id)
.sort((a, b) => a.start - b.start)
@ -136,14 +193,14 @@ const DirectiveChip: FC<{
return (
<span
className={cn(
'mx-0.5 inline-flex max-w-56 items-center gap-1 rounded-full border border-border/80 bg-background/95 px-1.5 py-0.5 align-[0.05em] text-[0.82em] font-medium leading-none text-foreground shadow-sm ring-1 ring-black/3'
'mx-0.5 inline-flex max-w-64 items-center gap-1 rounded-full bg-[color-mix(in_srgb,var(--dt-primary)_16%,transparent)] px-2 py-0.5 align-[0.02em] text-[0.92em] font-semibold leading-tight text-primary ring-1 ring-inset ring-primary/10'
)}
data-directive-id={id}
data-directive-type={type}
data-slot="aui_directive-chip"
title={id}
>
{Icon && <Icon className="size-3 shrink-0 text-muted-foreground" />}
{Icon && <Icon className="size-3.5 shrink-0 text-primary" />}
<span className="truncate">{label}</span>
</span>
)

View file

@ -19,6 +19,7 @@ export type IntroProps = {
const NEUTRAL_PERSONALITIES = new Set(['', 'default', 'none', 'neutral'])
const HERMES_FRAME_COUNT = 8
const ASSET_BASE_URL = import.meta.env.BASE_URL || '/'
const FALLBACK_COPY: IntroCopy[] = [
{
@ -154,6 +155,10 @@ function resolveCopy(personality?: string, seed?: number): IntroCopy {
return pickCopy(copies, seed)
}
function publicAssetPath(path: string): string {
return `${ASSET_BASE_URL}${path}`.replace(/([^:]\/)\/+/g, '$1')
}
export const Intro: FC<IntroProps> = ({ personality, seed }) => {
const [mountSeed] = useState(() => Math.floor(Math.random() * 100000))
const [frameOffset, setFrameOffset] = useState(0)
@ -184,7 +189,7 @@ export const Intro: FC<IntroProps> = ({ personality, seed }) => {
aria-hidden="true"
className="h-full w-full scale-110 object-contain select-none"
draggable={false}
src={`/hermes-frames/hermes-frame-${frameIndex}.png?v=matte-clean-6`}
src={publicAssetPath(`hermes-frames/hermes-frame-${frameIndex}.png?v=matte-clean-6`)}
/>
</button>
<p className="mb-3 text-xs font-medium uppercase tracking-[0.18em] text-muted-foreground/75">Hermes Agent</p>

View file

@ -1,19 +1,53 @@
import { AssistantRuntimeProvider, type ThreadMessage, useExternalStoreRuntime } from '@assistant-ui/react'
import { act, render, screen, waitFor } from '@testing-library/react'
import { act, fireEvent, render, screen, waitFor } from '@testing-library/react'
import { useEffect, useState } from 'react'
import { describe, expect, it, vi } from 'vitest'
import { beforeEach, describe, expect, it, vi } from 'vitest'
import { Thread } from './thread'
const createdAt = new Date('2026-05-01T00:00:00.000Z')
const resizeObservers = new Set<TestResizeObserver>()
class TestResizeObserver {
observe() {}
private target: Element | null = null
constructor(private readonly callback: ResizeObserverCallback) {
resizeObservers.add(this)
}
observe(target: Element) {
this.target = target
}
unobserve() {}
disconnect() {}
disconnect() {
resizeObservers.delete(this)
}
trigger(height: number) {
if (!this.target) {
return
}
this.callback(
[
{
contentRect: { height } as DOMRectReadOnly,
target: this.target
} as ResizeObserverEntry
],
this as unknown as ResizeObserver
)
}
}
vi.stubGlobal('ResizeObserver', TestResizeObserver)
vi.stubGlobal('requestAnimationFrame', (callback: FrameRequestCallback) =>
window.setTimeout(() => callback(performance.now()), 0)
)
vi.stubGlobal('cancelAnimationFrame', (id: number) => window.clearTimeout(id))
Element.prototype.scrollTo = function scrollTo() {}
@ -90,6 +124,10 @@ function StreamingHarness() {
}
describe('assistant-ui streaming renderer', () => {
beforeEach(() => {
resizeObservers.clear()
})
it('renders assistant text incrementally before completion', async () => {
const { container } = render(<StreamingHarness />)
@ -115,4 +153,42 @@ describe('assistant-ui streaming renderer', () => {
expect(container.textContent).toContain('first chunk second chunk')
})
})
it('does not pull the viewport back down after the user scrolls up during streaming', async () => {
const { container } = render(<StreamingHarness />)
const viewport = container.querySelector('[data-slot="aui_thread-viewport"]') as HTMLDivElement
let scrollHeight = 1_000
Object.defineProperty(viewport, 'clientHeight', { configurable: true, value: 200 })
Object.defineProperty(viewport, 'scrollHeight', {
configurable: true,
get: () => scrollHeight
})
await wait(80)
await act(async () => {
viewport.scrollTop = 800
fireEvent.scroll(viewport)
})
await wait(0)
await act(async () => {
fireEvent.wheel(viewport, { deltaY: -120 })
viewport.scrollTop = 420
fireEvent.scroll(viewport)
})
scrollHeight = 1_200
await act(async () => {
for (const observer of resizeObservers) {
observer.trigger(1_200)
}
})
await wait(0)
expect(viewport.scrollTop).toBe(420)
})
})

View file

@ -8,18 +8,28 @@ import {
type ToolCallMessagePartProps,
useAuiState
} from '@assistant-ui/react'
import { useStore } from '@nanostores/react'
import {
CheckIcon,
ChevronLeftIcon,
ChevronRightIcon,
CopyIcon,
GitBranchIcon,
Loader2Icon,
MoreHorizontalIcon,
RefreshCwIcon,
Volume2Icon,
VolumeXIcon
} from 'lucide-react'
import { type FC, type ReactNode, useCallback, useEffect, useLayoutEffect, useRef, useState } from 'react'
import {
type FC,
type ReactNode,
useCallback,
useEffect,
useLayoutEffect,
useRef,
useState
} from 'react'
import { useElapsedSeconds } from '@/components/assistant-ui/activity-timer'
import { ActivityTimerText } from '@/components/assistant-ui/activity-timer-text'
@ -38,11 +48,12 @@ import {
DropdownMenuTrigger
} from '@/components/ui/dropdown-menu'
import { Loader } from '@/components/ui/loader'
import { speakText } from '@/hermes'
import { triggerHaptic } from '@/lib/haptics'
import { cn } from '@/lib/utils'
import { playSpeechText, stopVoicePlayback } from '@/lib/voice-playback'
import { notifyError } from '@/store/notifications'
import { setThreadScrolledUp } from '@/store/thread-scroll'
import { $voicePlayback } from '@/store/voice-playback'
const THINKING_FACES = [
'(。•́︿•̀。)',
@ -119,12 +130,16 @@ export const Thread: FC<{
intro?: IntroProps
loading?: ThreadLoadingState
onBranchInNewChat?: (messageId: string) => void
}> = ({ intro, loading, onBranchInNewChat }) => {
sessionKey?: string | null
}> = ({ intro, loading, onBranchInNewChat, sessionKey }) => {
const viewportRef = useRef<HTMLDivElement | null>(null)
const contentRef = useRef<HTMLDivElement | null>(null)
const messageCount = useAuiState(s => s.thread.messages.length)
const isRunning = useAuiState(s => s.thread.isRunning)
const lastMessageId = useAuiState(s => s.thread.messages.at(-1)?.id ?? '')
const shouldStickToBottomRef = useRef(true)
const scrollFrameRef = useRef<number | null>(null)
const sessionKeyRef = useRef<string | null>(sessionKey ?? null)
const handleScroll = useCallback((event: React.UIEvent<HTMLDivElement>) => {
const nearBottom = isNearBottom(event.currentTarget)
@ -132,8 +147,44 @@ export const Thread: FC<{
setThreadScrolledUp(!nearBottom)
}, [])
const handleWheel = useCallback((event: React.WheelEvent<HTMLDivElement>) => {
if (event.deltaY < 0) {
shouldStickToBottomRef.current = false
setThreadScrolledUp(true)
}
}, [])
const scrollToBottom = useCallback(() => {
const viewport = viewportRef.current
if (!viewport) {
return
}
viewport.scrollTop = viewport.scrollHeight
shouldStickToBottomRef.current = true
setThreadScrolledUp(false)
}, [])
const scheduleScrollToBottom = useCallback(() => {
if (scrollFrameRef.current !== null) {
window.cancelAnimationFrame(scrollFrameRef.current)
}
scrollFrameRef.current = window.requestAnimationFrame(() => {
scrollFrameRef.current = null
scrollToBottom()
})
}, [scrollToBottom])
useEffect(() => {
return () => setThreadScrolledUp(false)
return () => {
if (scrollFrameRef.current !== null) {
window.cancelAnimationFrame(scrollFrameRef.current)
}
setThreadScrolledUp(false)
}
}, [])
useLayoutEffect(() => {
@ -143,16 +194,48 @@ export const Thread: FC<{
return
}
const force = loading === 'session'
const nextSessionKey = sessionKey ?? null
const sessionChanged = sessionKeyRef.current !== nextSessionKey
sessionKeyRef.current = nextSessionKey
const force = loading === 'session' || sessionChanged
if (!force && !shouldStickToBottomRef.current) {
return
}
viewport.scrollTop = viewport.scrollHeight
shouldStickToBottomRef.current = true
setThreadScrolledUp(false)
}, [isRunning, lastMessageId, loading, messageCount])
scheduleScrollToBottom()
}, [isRunning, lastMessageId, loading, messageCount, scheduleScrollToBottom, sessionKey])
useLayoutEffect(() => {
const content = contentRef.current
const viewport = viewportRef.current
if (!content || !viewport) {
return
}
let previousHeight = content.getBoundingClientRect().height
const observer = new ResizeObserver(entries => {
const height = entries[0]?.contentRect.height ?? content.getBoundingClientRect().height
if (height === previousHeight) {
return
}
previousHeight = height
if (!shouldStickToBottomRef.current && !isNearBottom(viewport)) {
return
}
scheduleScrollToBottom()
})
observer.observe(content)
return () => observer.disconnect()
}, [scheduleScrollToBottom])
return (
<GeneratedImageProvider>
@ -160,15 +243,17 @@ export const Thread: FC<{
<AuiIf condition={s => Boolean(intro) && s.thread.isEmpty}>{intro && <Intro {...intro} />}</AuiIf>
<ThreadPrimitive.Viewport
className="h-full min-h-0 overflow-y-auto overscroll-contain px-[clamp(1rem,10%,12rem)] pt-[calc(var(--vsq)*19)] scroll-smooth"
autoScroll={false}
className="h-full min-h-0 overflow-y-auto overscroll-contain px-[clamp(1rem,10%,12rem)] pt-[calc(var(--vsq)*19)]"
data-slot="aui_thread-viewport"
onScroll={handleScroll}
onWheel={handleWheel}
ref={viewportRef}
scrollToBottomOnInitialize
scrollToBottomOnRunStart
scrollToBottomOnThreadSwitch
>
<div className="flex w-full flex-col gap-3">
<div className="flex w-full flex-col gap-3" ref={contentRef}>
<ThreadPrimitive.Messages>{() => <ThreadMessage onBranchInNewChat={onBranchInNewChat} />}</ThreadPrimitive.Messages>
{loading === 'response' && <ResponseLoadingIndicator />}
{loading === 'working' && <WorkingIndicator />}
@ -446,7 +531,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
<GitBranchIcon />
Branch in new chat
</DropdownMenuItem>
<ReadAloudItem text={messageText} />
<ReadAloudItem messageId={messageId} text={messageText} />
</DropdownMenuContent>
</DropdownMenu>
</ActionBarPrimitive.Root>
@ -479,80 +564,39 @@ const CopyMessageButton: FC<{ text: string }> = ({ text }) => {
)
}
let currentAudio: HTMLAudioElement | null = null
const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, text }) => {
const voicePlayback = useStore($voicePlayback)
function stopCurrentAudio() {
if (!currentAudio) {
return
}
const readAloudStatus =
voicePlayback.source === 'read-aloud' && voicePlayback.messageId === messageId ? voicePlayback.status : 'idle'
currentAudio.pause()
currentAudio.src = ''
currentAudio = null
}
const ReadAloudItem: FC<{ text: string }> = ({ text }) => {
const [reading, setReading] = useState(false)
const seqRef = useRef(0)
const stop = useCallback(() => {
seqRef.current += 1
stopCurrentAudio()
setReading(false)
}, [])
const isPreparing = readAloudStatus === 'preparing'
const isSpeaking = readAloudStatus === 'speaking'
const anyPlaybackActive = voicePlayback.status !== 'idle'
const Icon = isPreparing ? Loader2Icon : isSpeaking ? VolumeXIcon : Volume2Icon
const read = useCallback(async () => {
if (!text) {
if (!text || $voicePlayback.get().status !== 'idle') {
return
}
stopCurrentAudio()
const seq = ++seqRef.current
const isCurrent = () => seq === seqRef.current
const finish = () => {
if (!isCurrent()) {
return
}
currentAudio = null
setReading(false)
}
setReading(true)
try {
const { data_url } = await speakText(text)
if (!isCurrent()) {
return
}
const audio = new Audio(data_url)
currentAudio = audio
audio.addEventListener('ended', finish, { once: true })
audio.addEventListener('error', finish, { once: true })
await audio.play()
await playSpeechText(text, { messageId, source: 'read-aloud' })
} catch (error) {
if (isCurrent()) {
notifyError(error, 'Read aloud failed')
finish()
}
notifyError(error, 'Read aloud failed')
}
}, [text])
const Icon = reading ? VolumeXIcon : Volume2Icon
}, [messageId, text])
return (
<DropdownMenuItem
disabled={!reading && !text}
disabled={isPreparing || (!isSpeaking && (anyPlaybackActive || !text))}
onSelect={e => {
e.preventDefault()
void (reading ? stop() : read())
void (isSpeaking ? stopVoicePlayback() : read())
}}
>
<Icon />
{reading ? 'Stop reading' : 'Read aloud'}
<Icon className={isPreparing ? 'animate-spin' : undefined} />
{isPreparing ? 'Preparing audio...' : isSpeaking ? 'Stop reading' : 'Read aloud'}
</DropdownMenuItem>
)
}

View file

@ -0,0 +1,18 @@
import { describe, expect, it } from 'vitest'
import { chatMessageText, toChatMessages } from './chat-messages'
describe('toChatMessages', () => {
it('hides attached context payloads from user message display', () => {
const [message] = toChatMessages([
{
role: 'user',
content:
'what is this file\n\n--- Attached Context ---\n\n📄 @file:tsconfig.tsbuildinfo (981 tokens)\n```json\n{"root":["./src/main.tsx"]}\n```',
timestamp: 1
}
])
expect(chatMessageText(message)).toBe('@file:tsconfig.tsbuildinfo\n\nwhat is this file')
})
})

View file

@ -29,6 +29,7 @@ export type GatewayEventPayload = {
todos?: unknown
model?: string
provider?: string
running?: boolean
cwd?: string
branch?: string
personality?: string
@ -49,6 +50,28 @@ export function chatMessageText(message: ChatMessage): string {
.join('')
}
const ATTACHED_CONTEXT_MARKER_RE = /(?:^|\n)--- Attached Context ---\s*\n/
const CONTEXT_WARNINGS_MARKER_RE = /(?:^|\n)--- Context Warnings ---[\s\S]*$/
const CONTEXT_REF_RE = /@(file|folder|url|image|tool):(?:"[^"\n]+"|'[^'\n]+'|`[^`\n]+`|\S+)/g
function displayContentForMessage(role: SessionMessage['role'], content: string): string {
if (role !== 'user') {
return content
}
const marker = content.match(ATTACHED_CONTEXT_MARKER_RE)
if (!marker || marker.index === undefined) {
return content.replace(CONTEXT_WARNINGS_MARKER_RE, '').trim()
}
const visibleText = content.slice(0, marker.index).replace(CONTEXT_WARNINGS_MARKER_RE, '').trim()
const attachedContext = content.slice(marker.index + marker[0].length)
const refs = [...new Set(Array.from(attachedContext.matchAll(CONTEXT_REF_RE)).map(match => match[0]))]
return [refs.join('\n'), visibleText].filter(Boolean).join('\n\n') || visibleText
}
export function appendTextPart(parts: ChatMessagePart[], delta: string): ChatMessagePart[] {
const next = [...parts]
const last = next.at(-1)
@ -363,6 +386,7 @@ export function toChatMessages(messages: SessionMessage[]): ChatMessage[] {
}
const content = message.content || message.text || message.context || message.name || ''
const displayContent = displayContentForMessage(message.role, content)
const parts: ChatMessagePart[] = []
const reasoning =
@ -374,8 +398,8 @@ export function toChatMessages(messages: SessionMessage[]): ChatMessage[] {
parts.push(reasoningPart(reasoning))
}
if (content) {
parts.push(textPart(content))
if (displayContent) {
parts.push(textPart(displayContent))
}
if (message.role === 'assistant' && Array.isArray(message.tool_calls)) {

View file

@ -0,0 +1,18 @@
import { describe, expect, it } from 'vitest'
import { coerceThinkingText } from './chat-runtime'
describe('coerceThinkingText', () => {
it('strips streaming status prefixes from thinking deltas', () => {
expect(coerceThinkingText("◉_◉ processing... checking the user's request")).toBe("checking the user's request")
expect(coerceThinkingText('(¬‿¬) analyzing... reading the file')).toBe('reading the file')
})
it('drops empty thinking rewrite placeholder text', () => {
expect(
coerceThinkingText(
"◉_◉ processing... I don't see any current rewritten thinking or next thinking to process. Could you provide the thinking content you'd like me to rewrite?"
)
).toBe('')
})
})

View file

@ -2,6 +2,7 @@ import type { ThreadMessage } from '@assistant-ui/react'
import type { QuickModelOption } from '@/app/chat/composer/types'
import type { ClientSessionState, CommandDispatchResponse } from '@/app/types'
import { formatRefValue } from '@/components/assistant-ui/directive-text'
import { type ChatMessage, type ChatMessagePart, chatMessageText, textPart } from '@/lib/chat-messages'
import type { ComposerAttachment } from '@/store/composer'
import type { ModelOptionsResponse, SessionInfo } from '@/types/hermes'
@ -25,7 +26,11 @@ export const BUILTIN_PERSONALITIES = [
'hype'
]
const SPINNER_STATUS_RE = /^\s*[(][^\s)]{1,8}[)]\s+[^.\n]{2,48}\.\.\.\s*/
const THINKING_STATUS_PREFIX_RE =
/^\s*(?:(?:[^\s.]{1,16})\s+)?(?:processing|thinking|reasoning|analyzing|pondering|contemplating|musing|cogitating|ruminating|deliberating|mulling|reflecting|computing|synthesizing|formulating|brainstorming)\.\.\.\s*/i
const EMPTY_THINKING_PLACEHOLDER_RE =
/\b(?:current rewritten thinking|next thinking to process|provide the thinking content|don't see any .*thinking)\b/i
export function createClientSessionState(
storedSessionId: string | null = null,
@ -102,7 +107,9 @@ export function coerceGatewayText(value: unknown): string {
}
export function coerceThinkingText(value: unknown): string {
return coerceGatewayText(value).replace(SPINNER_STATUS_RE, '').trim()
const text = coerceGatewayText(value).replace(THINKING_STATUS_PREFIX_RE, '').trim()
return EMPTY_THINKING_PLACEHOLDER_RE.test(text) ? '' : text
}
export function isImageGenerationTool(name?: string): boolean {
@ -135,7 +142,7 @@ export function attachmentDisplayText(attachment: ComposerAttachment): string |
if (attachment.kind === 'image') {
const id = attachment.detail || attachment.path || attachment.label
return id ? `@image:${id}` : null
return id ? `@image:${formatRefValue(id)}` : null
}
return null

View file

@ -0,0 +1,19 @@
const EMOJI_RE = /[\p{Extended_Pictographic}\uFE0F\u200D]+/gu
const FENCED_CODE_RE = /```[\s\S]*?(?:```|$)/g
const INLINE_CODE_RE = /`([^`]+)`/g
const MARKDOWN_LINK_RE = /\[([^\]]+)\]\(([^)]+)\)/g
const URL_RE = /\bhttps?:\/\/\S+/gi
export function sanitizeTextForSpeech(text: string): string {
return text
.replace(FENCED_CODE_RE, ' ')
.replace(MARKDOWN_LINK_RE, '$1')
.replace(INLINE_CODE_RE, '$1')
.replace(URL_RE, ' link ')
.replace(EMOJI_RE, ' ')
.replace(/^#{1,6}\s+/gm, '')
.replace(/[*_~>#]/g, '')
.replace(/^\s*[-+*]\s+/gm, '')
.replace(/\s+/g, ' ')
.trim()
}

View file

@ -0,0 +1,96 @@
import { speakText } from '@/hermes'
import {
$voicePlayback,
setVoicePlaybackState,
type VoicePlaybackSource,
type VoicePlaybackState
} from '@/store/voice-playback'
import { sanitizeTextForSpeech } from './speech-text'
let currentAudio: HTMLAudioElement | null = null
let sequence = 0
function currentState(status: VoicePlaybackState['status'], options?: VoicePlaybackOptions): VoicePlaybackState {
return {
messageId: options?.messageId ?? null,
sequence,
source: options?.source ?? null,
status
}
}
export interface VoicePlaybackOptions {
messageId?: string | null
source: VoicePlaybackSource
}
export function stopVoicePlayback() {
sequence += 1
if (currentAudio) {
currentAudio.pause()
currentAudio.src = ''
currentAudio = null
}
setVoicePlaybackState({
messageId: null,
sequence,
source: null,
status: 'idle'
})
}
export async function playSpeechText(text: string, options: VoicePlaybackOptions): Promise<boolean> {
stopVoicePlayback()
const speakableText = sanitizeTextForSpeech(text)
if (!speakableText) {
return false
}
const ownSequence = sequence
const isCurrent = () => ownSequence === sequence
setVoicePlaybackState(currentState('preparing', options))
try {
const response = await speakText(speakableText)
if (!isCurrent()) {
return false
}
const audio = new Audio(response.data_url)
currentAudio = audio
setVoicePlaybackState(currentState('speaking', options))
await new Promise<void>((resolve, reject) => {
audio.addEventListener('ended', () => resolve(), { once: true })
audio.addEventListener('error', () => reject(new Error('Playback failed')), { once: true })
void audio.play().catch(reject)
})
if (!isCurrent()) {
return false
}
currentAudio = null
setVoicePlaybackState(currentState('idle'))
return true
} catch (error) {
if (isCurrent()) {
currentAudio = null
setVoicePlaybackState(currentState('idle'))
}
throw error
}
}
export function isVoicePlaybackActive() {
return $voicePlayback.get().status !== 'idle'
}

View file

@ -50,6 +50,13 @@ const ERROR_SUMMARIES: { test: (msg: string) => boolean; summarize: (msg: string
test: msg => /neither voice_tools_openai_key nor openai_api_key is set/i.test(msg),
summarize: () => 'OpenAI TTS needs VOICE_TOOLS_OPENAI_KEY or OPENAI_API_KEY.'
},
{
test: msg => /ELEVENLABS_API_KEY not set/i.test(msg) || /ElevenLabs STT API error \(HTTP 401\)/i.test(msg),
summarize: msg =>
/ELEVENLABS_API_KEY not set/i.test(msg)
? 'ElevenLabs STT needs ELEVENLABS_API_KEY.'
: 'ElevenLabs rejected the API key (401).'
},
{
test: msg => /method not allowed/i.test(msg),
summarize: () => 'The desktop backend does not support that audio endpoint yet. Restart Hermes Desktop.'

View file

@ -0,0 +1,22 @@
import { atom } from 'nanostores'
export type VoicePlaybackSource = 'read-aloud' | 'voice-conversation'
export type VoicePlaybackStatus = 'idle' | 'preparing' | 'speaking'
export interface VoicePlaybackState {
messageId: string | null
sequence: number
source: VoicePlaybackSource | null
status: VoicePlaybackStatus
}
export const $voicePlayback = atom<VoicePlaybackState>({
messageId: null,
sequence: 0,
source: null,
status: 'idle'
})
export function setVoicePlaybackState(next: VoicePlaybackState) {
$voicePlayback.set(next)
}

View file

@ -184,6 +184,29 @@ button {
-webkit-app-region: no-drag;
}
@keyframes voice-wave {
0%,
100% {
opacity: 0.45;
transform: scaleY(0.28);
}
35% {
opacity: 0.95;
transform: scaleY(1);
}
62% {
opacity: 0.7;
transform: scaleY(0.52);
}
}
.voice-wave-bar {
animation: voice-wave 860ms ease-in-out infinite;
transform-origin: center;
}
.composer-liquid-shell-wrap {
pointer-events: none;
border-radius: var(--composer-glass-radius, 20px);

View file

@ -168,6 +168,7 @@ export interface SessionRuntimeInfo {
personality?: string
provider?: string
reasoning_effort?: string
running?: boolean
service_tier?: string
skills?: Record<string, string[]> | string[]
tools?: Record<string, string[]>

View file

@ -4,6 +4,7 @@ import tailwindcss from '@tailwindcss/vite'
import path from 'path'
export default defineConfig({
base: './',
plugins: [react(), tailwindcss()],
resolve: {
alias: {

View file

@ -830,7 +830,7 @@ DEFAULT_CONFIG = {
"stt": {
"enabled": True,
"provider": "local", # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API) | "mistral" (Voxtral Transcribe)
"provider": "local", # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API) | "mistral" (Voxtral Transcribe) | "elevenlabs" (Scribe)
"local": {
"model": "base", # tiny, base, small, medium, large-v3
"language": "", # auto-detect by default; set to "en", "es", "fr", etc. to force
@ -841,6 +841,12 @@ DEFAULT_CONFIG = {
"mistral": {
"model": "voxtral-mini-latest", # voxtral-mini-latest, voxtral-mini-2602
},
"elevenlabs": {
"model_id": "scribe_v2", # scribe_v2, scribe_v1
"language_code": "", # auto-detect by default; set to "eng", "spa", "fra", etc. to force
"tag_audio_events": False,
"diarize": False,
},
},
"voice": {
@ -1791,9 +1797,10 @@ OPTIONAL_ENV_VARS = {
"category": "tool",
},
"ELEVENLABS_API_KEY": {
"description": "ElevenLabs API key for premium text-to-speech voices",
"description": "ElevenLabs API key for premium text-to-speech voices and Scribe transcription",
"prompt": "ElevenLabs API key",
"url": "https://elevenlabs.io/",
"tools": ["elevenlabs_tts", "voice_transcription"],
"password": True,
"category": "tool",
},

View file

@ -280,7 +280,12 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
"stt.provider": {
"type": "select",
"description": "Speech-to-text provider",
"options": ["local", "openai", "mistral"],
"options": ["local", "groq", "openai", "mistral", "xai", "elevenlabs"],
},
"stt.elevenlabs.model_id": {
"type": "select",
"description": "ElevenLabs Scribe model",
"options": ["scribe_v2", "scribe_v1"],
},
"display.skin": {
"type": "select",

View file

@ -24,6 +24,8 @@ def isolate_env(monkeypatch):
"MISTRAL_API_KEY",
"XAI_API_KEY",
"XAI_STT_BASE_URL",
"ELEVENLABS_API_KEY",
"ELEVENLABS_STT_BASE_URL",
):
monkeypatch.delenv(key, raising=False)
@ -87,6 +89,15 @@ class TestProviderSelectionGate:
return_value={"XAI_API_KEY": "dotenv-secret"}):
assert tt._get_provider({"enabled": True, "provider": "xai"}) == "xai"
def test_explicit_elevenlabs_sees_dotenv(self):
from tools import transcription_tools as tt
with patch.object(tt, "_HAS_FASTER_WHISPER", False), \
patch.object(tt, "_has_local_command", return_value=False), \
patch("hermes_cli.config.load_env",
return_value={"ELEVENLABS_API_KEY": "dotenv-secret"}):
assert tt._get_provider({"enabled": True, "provider": "elevenlabs"}) == "elevenlabs"
def test_auto_detect_sees_dotenv_groq(self):
"""No local backend, no explicit provider — auto-detect should fall
through to Groq when its key lives in dotenv only. Before the fix
@ -193,6 +204,33 @@ class TestTranscribeCallSitesReadDotenv:
assert result["success"] is True
assert captured["headers"]["Authorization"] == "Bearer xai-dotenv-key"
def test_transcribe_elevenlabs_forwards_dotenv_key(self):
from tools import transcription_tools as tt
captured: dict = {}
def fake_post(url, **kwargs):
captured["url"] = url
captured["headers"] = kwargs.get("headers", {})
response = MagicMock()
response.status_code = 200
response.json.return_value = {"text": "hello"}
return response
def fake_get_env_value(name, default=None):
if name == "ELEVENLABS_API_KEY":
return "elevenlabs-dotenv-key"
return None
with patch.object(tt, "get_env_value", side_effect=fake_get_env_value), \
patch.object(tt, "_load_stt_config", return_value={}), \
patch("requests.post", side_effect=fake_post), \
patch("builtins.open", MagicMock()):
result = tt._transcribe_elevenlabs("/tmp/fake.mp3", "scribe_v2")
assert result["success"] is True
assert captured["headers"]["xi-api-key"] == "elevenlabs-dotenv-key"
class TestEndToEndRegressionGuard:
"""End-to-end probe: patch ``hermes_cli.config.load_env`` to simulate

View file

@ -49,6 +49,7 @@ def clean_env(monkeypatch):
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("GROQ_API_KEY", raising=False)
monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
monkeypatch.delenv("ELEVENLABS_API_KEY", raising=False)
monkeypatch.delenv("HERMES_LOCAL_STT_COMMAND", raising=False)
monkeypatch.delenv("HERMES_LOCAL_STT_LANGUAGE", raising=False)
@ -1342,3 +1343,161 @@ class TestTranscribeAudioXAIDispatch:
transcribe_audio(sample_ogg, model="custom-stt")
assert mock_xai.call_args[0][1] == "custom-stt"
# ============================================================================
# _transcribe_elevenlabs
# ============================================================================
class TestTranscribeElevenLabs:
def test_no_key(self, monkeypatch):
monkeypatch.delenv("ELEVENLABS_API_KEY", raising=False)
from tools.transcription_tools import _transcribe_elevenlabs
result = _transcribe_elevenlabs("/tmp/test.ogg", "scribe_v2")
assert result["success"] is False
assert "ELEVENLABS_API_KEY" in result["error"]
def test_successful_transcription(self, monkeypatch, sample_ogg):
monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test-key")
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"text": "hello from elevenlabs"}
config = {
"elevenlabs": {
"language_code": "eng",
"tag_audio_events": True,
"diarize": True,
}
}
with patch("tools.transcription_tools._load_stt_config", return_value=config), \
patch("requests.post", return_value=mock_response) as mock_post:
from tools.transcription_tools import _transcribe_elevenlabs
result = _transcribe_elevenlabs(sample_ogg, "scribe_v2")
assert result["success"] is True
assert result["transcript"] == "hello from elevenlabs"
assert result["provider"] == "elevenlabs"
call_kwargs = mock_post.call_args.kwargs
assert call_kwargs["headers"]["xi-api-key"] == "eleven-test-key"
assert call_kwargs["data"]["model_id"] == "scribe_v2"
assert call_kwargs["data"]["language_code"] == "eng"
assert call_kwargs["data"]["tag_audio_events"] == "true"
assert call_kwargs["data"]["diarize"] == "true"
def test_api_error_returns_failure(self, monkeypatch, sample_ogg):
monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test-key")
mock_response = MagicMock()
mock_response.status_code = 401
mock_response.json.return_value = {"detail": {"message": "Invalid API key"}}
mock_response.text = '{"detail": {"message": "Invalid API key"}}'
with patch("tools.transcription_tools._load_stt_config", return_value={}), \
patch("requests.post", return_value=mock_response):
from tools.transcription_tools import _transcribe_elevenlabs
result = _transcribe_elevenlabs(sample_ogg, "scribe_v2")
assert result["success"] is False
assert "HTTP 401" in result["error"]
assert "Invalid API key" in result["error"]
def test_empty_transcript_returns_failure(self, monkeypatch, sample_ogg):
monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test-key")
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"text": " "}
with patch("tools.transcription_tools._load_stt_config", return_value={}), \
patch("requests.post", return_value=mock_response):
from tools.transcription_tools import _transcribe_elevenlabs
result = _transcribe_elevenlabs(sample_ogg, "scribe_v2")
assert result["success"] is False
assert "empty transcript" in result["error"]
# ============================================================================
# _get_provider — ElevenLabs
# ============================================================================
class TestGetProviderElevenLabs:
"""ElevenLabs-specific provider selection tests."""
def test_elevenlabs_when_key_set(self, monkeypatch):
monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test")
from tools.transcription_tools import _get_provider
assert _get_provider({"provider": "elevenlabs"}) == "elevenlabs"
def test_elevenlabs_explicit_no_key_returns_none(self, monkeypatch):
"""Explicit elevenlabs with no key returns none — no cross-provider fallback."""
monkeypatch.delenv("ELEVENLABS_API_KEY", raising=False)
from tools.transcription_tools import _get_provider
assert _get_provider({"provider": "elevenlabs"}) == "none"
def test_auto_detect_elevenlabs_after_xai(self, monkeypatch):
"""Auto-detect: elevenlabs is tried after xai when all above are unavailable."""
monkeypatch.delenv("GROQ_API_KEY", raising=False)
monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
monkeypatch.delenv("XAI_API_KEY", raising=False)
monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test")
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
patch("tools.transcription_tools._has_local_command", return_value=False), \
patch("tools.transcription_tools._HAS_OPENAI", False), \
patch("tools.transcription_tools._HAS_MISTRAL", False):
from tools.transcription_tools import _get_provider
assert _get_provider({}) == "elevenlabs"
def test_auto_detect_xai_preferred_over_elevenlabs(self, monkeypatch):
"""Auto-detect: xai is preferred over elevenlabs."""
monkeypatch.setenv("XAI_API_KEY", "xai-test")
monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test")
with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
patch("tools.transcription_tools._has_local_command", return_value=False), \
patch("tools.transcription_tools._HAS_OPENAI", False), \
patch("tools.transcription_tools._HAS_MISTRAL", False):
from tools.transcription_tools import _get_provider
assert _get_provider({}) == "xai"
# ============================================================================
# transcribe_audio — ElevenLabs dispatch
# ============================================================================
class TestTranscribeAudioElevenLabsDispatch:
def test_dispatches_to_elevenlabs(self, sample_ogg):
with patch("tools.transcription_tools._load_stt_config", return_value={"provider": "elevenlabs"}), \
patch("tools.transcription_tools._get_provider", return_value="elevenlabs"), \
patch("tools.transcription_tools._transcribe_elevenlabs",
return_value={"success": True, "transcript": "hi", "provider": "elevenlabs"}) as mock_elevenlabs:
from tools.transcription_tools import transcribe_audio
result = transcribe_audio(sample_ogg)
assert result["success"] is True
assert result["provider"] == "elevenlabs"
mock_elevenlabs.assert_called_once()
def test_config_elevenlabs_model_used(self, sample_ogg):
config = {"provider": "elevenlabs", "elevenlabs": {"model_id": "scribe_v1"}}
with patch("tools.transcription_tools._load_stt_config", return_value=config), \
patch("tools.transcription_tools._get_provider", return_value="elevenlabs"), \
patch("tools.transcription_tools._transcribe_elevenlabs",
return_value={"success": True, "transcript": "hi"}) as mock_elevenlabs:
from tools.transcription_tools import transcribe_audio
transcribe_audio(sample_ogg, model=None)
assert mock_elevenlabs.call_args[0][1] == "scribe_v1"
def test_model_override_passed_to_elevenlabs(self, sample_ogg):
with patch("tools.transcription_tools._load_stt_config", return_value={}), \
patch("tools.transcription_tools._get_provider", return_value="elevenlabs"), \
patch("tools.transcription_tools._transcribe_elevenlabs",
return_value={"success": True, "transcript": "hi"}) as mock_elevenlabs:
from tools.transcription_tools import transcribe_audio
transcribe_audio(sample_ogg, model="scribe_v2")
assert mock_elevenlabs.call_args[0][1] == "scribe_v2"

View file

@ -11,6 +11,7 @@ Provides speech-to-text transcription with six providers:
- **mistral** Mistral Voxtral Transcribe API, requires ``MISTRAL_API_KEY``.
- **xai** xAI Grok STT API, requires ``XAI_API_KEY``. High accuracy,
Inverse Text Normalization, diarization, 21 languages.
- **elevenlabs** ElevenLabs Scribe API, requires ``ELEVENLABS_API_KEY``.
Used by the messaging gateway to automatically transcribe voice messages
sent by users on Telegram, Discord, WhatsApp, Slack, and Signal.
@ -84,6 +85,7 @@ DEFAULT_LOCAL_STT_LANGUAGE = "en"
DEFAULT_STT_MODEL = os.getenv("STT_OPENAI_MODEL", "whisper-1")
DEFAULT_GROQ_STT_MODEL = os.getenv("STT_GROQ_MODEL", "whisper-large-v3-turbo")
DEFAULT_MISTRAL_STT_MODEL = os.getenv("STT_MISTRAL_MODEL", "voxtral-mini-latest")
DEFAULT_ELEVENLABS_STT_MODEL = os.getenv("STT_ELEVENLABS_MODEL", "scribe_v2")
LOCAL_STT_COMMAND_ENV = "HERMES_LOCAL_STT_COMMAND"
LOCAL_STT_LANGUAGE_ENV = "HERMES_LOCAL_STT_LANGUAGE"
COMMON_LOCAL_BIN_DIRS = ("/opt/homebrew/bin", "/usr/local/bin")
@ -91,6 +93,7 @@ COMMON_LOCAL_BIN_DIRS = ("/opt/homebrew/bin", "/usr/local/bin")
GROQ_BASE_URL = os.getenv("GROQ_BASE_URL", "https://api.groq.com/openai/v1")
OPENAI_BASE_URL = os.getenv("STT_OPENAI_BASE_URL", "https://api.openai.com/v1")
XAI_STT_BASE_URL = os.getenv("XAI_STT_BASE_URL", "https://api.x.ai/v1")
ELEVENLABS_STT_BASE_URL = os.getenv("ELEVENLABS_STT_BASE_URL", "https://api.elevenlabs.io/v1")
SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg", ".aac", ".flac"}
LOCAL_NATIVE_AUDIO_FORMATS = {".wav", ".aiff", ".aif"}
@ -268,9 +271,17 @@ def _get_provider(stt_config: dict) -> str:
)
return "none"
if provider == "elevenlabs":
if get_env_value("ELEVENLABS_API_KEY"):
return "elevenlabs"
logger.warning(
"STT provider 'elevenlabs' configured but ELEVENLABS_API_KEY not set"
)
return "none"
return provider # Unknown — let it fail downstream
# --- Auto-detect (no explicit provider): local > groq > openai > mistral > xai -
# --- Auto-detect (no explicit provider): local > groq > openai > mistral > xai > elevenlabs -
if _HAS_FASTER_WHISPER:
return "local"
@ -288,6 +299,9 @@ def _get_provider(stt_config: dict) -> str:
if get_env_value("XAI_API_KEY"):
logger.info("No local STT available, using xAI Grok STT API")
return "xai"
if get_env_value("ELEVENLABS_API_KEY"):
logger.info("No local STT available, using ElevenLabs Scribe STT API")
return "elevenlabs"
return "none"
# ---------------------------------------------------------------------------
@ -781,6 +795,92 @@ def _transcribe_xai(file_path: str, model_name: str) -> Dict[str, Any]:
return {"success": False, "transcript": "", "error": f"xAI STT transcription failed: {e}"}
# ---------------------------------------------------------------------------
# Provider: ElevenLabs (Scribe STT API)
# ---------------------------------------------------------------------------
def _transcribe_elevenlabs(file_path: str, model_name: str) -> Dict[str, Any]:
"""Transcribe using ElevenLabs Scribe STT API."""
api_key = get_env_value("ELEVENLABS_API_KEY")
if not api_key:
return {"success": False, "transcript": "", "error": "ELEVENLABS_API_KEY not set"}
stt_config = _load_stt_config()
elevenlabs_config = stt_config.get("elevenlabs", {})
base_url = str(
elevenlabs_config.get("base_url")
or get_env_value("ELEVENLABS_STT_BASE_URL")
or ELEVENLABS_STT_BASE_URL
).strip().rstrip("/")
language_code = str(elevenlabs_config.get("language_code") or "").strip()
tag_audio_events = is_truthy_value(elevenlabs_config.get("tag_audio_events", False))
diarize = is_truthy_value(elevenlabs_config.get("diarize", False))
try:
import requests
data: Dict[str, str] = {
"model_id": model_name,
"tag_audio_events": "true" if tag_audio_events else "false",
"diarize": "true" if diarize else "false",
}
if language_code:
data["language_code"] = language_code
with open(file_path, "rb") as audio_file:
response = requests.post(
f"{base_url}/speech-to-text",
headers={"xi-api-key": api_key},
files={"file": (Path(file_path).name, audio_file)},
data=data,
timeout=120,
)
if response.status_code != 200:
detail = ""
try:
err_body = response.json()
error_value = err_body.get("detail") or err_body.get("error")
if isinstance(error_value, dict):
detail = str(error_value.get("message") or error_value)
elif error_value:
detail = str(error_value)
else:
detail = response.text[:300]
except Exception:
detail = response.text[:300]
return {
"success": False,
"transcript": "",
"error": f"ElevenLabs STT API error (HTTP {response.status_code}): {detail}",
}
result = response.json()
transcript_text = _extract_transcript_text(result)
if not transcript_text:
return {
"success": False,
"transcript": "",
"error": "ElevenLabs STT returned empty transcript",
}
logger.info(
"Transcribed %s via ElevenLabs Scribe (%s, %d chars)",
Path(file_path).name,
model_name,
len(transcript_text),
)
return {"success": True, "transcript": transcript_text, "provider": "elevenlabs"}
except PermissionError:
return {"success": False, "transcript": "", "error": f"Permission denied: {file_path}"}
except Exception as e:
logger.error("ElevenLabs STT transcription failed: %s", e, exc_info=True)
return {"success": False, "transcript": "", "error": f"ElevenLabs STT transcription failed: {e}"}
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
@ -792,7 +892,7 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
Provider priority:
1. User config (``stt.provider`` in config.yaml)
2. Auto-detect: local faster-whisper (free) > Groq (free tier) > OpenAI (paid)
2. Auto-detect: local > Groq > OpenAI > Mistral > xAI > ElevenLabs
Args:
file_path: Absolute path to the audio file to transcribe.
@ -854,6 +954,11 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
model_name = model or "grok-stt"
return _transcribe_xai(file_path, model_name)
if provider == "elevenlabs":
elevenlabs_cfg = stt_config.get("elevenlabs", {})
model_name = model or elevenlabs_cfg.get("model_id", DEFAULT_ELEVENLABS_STT_MODEL)
return _transcribe_elevenlabs(file_path, model_name)
# No provider available
return {
"success": False,
@ -862,8 +967,9 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
"No STT provider available. Install faster-whisper for free local "
f"transcription, configure {LOCAL_STT_COMMAND_ENV} or install a local whisper CLI, "
"set GROQ_API_KEY for free Groq Whisper, set MISTRAL_API_KEY for Mistral "
"Voxtral Transcribe, set XAI_API_KEY for xAI Grok STT, or set VOICE_TOOLS_OPENAI_KEY "
"or OPENAI_API_KEY for the OpenAI Whisper API."
"Voxtral Transcribe, set XAI_API_KEY for xAI Grok STT, set ELEVENLABS_API_KEY "
"for ElevenLabs Scribe, or set VOICE_TOOLS_OPENAI_KEY or OPENAI_API_KEY for "
"the OpenAI Whisper API."
),
}

View file

@ -1409,6 +1409,7 @@ def _session_info(agent, session: dict | None = None) -> dict:
"cwd": cwd,
"branch": _git_branch_for_cwd(cwd),
"personality": str(personality or ""),
"running": bool((session or {}).get("running")),
"version": "",
"release_date": "",
"update_behind": None,