diff --git a/.env.example b/.env.example
index 589978e6b5..d35c829d41 100644
--- a/.env.example
+++ b/.env.example
@@ -384,9 +384,9 @@ IMAGE_TOOLS_DEBUG=false
 # Default STT provider is "local" (faster-whisper) — runs on your machine, no API key needed.
 # Install with: pip install faster-whisper
 # Model downloads automatically on first use (~150 MB for "base").
-# To use cloud providers instead, set GROQ_API_KEY or VOICE_TOOLS_OPENAI_KEY above.
-# Provider priority: local > groq > openai
-# Configure in config.yaml: stt.provider: local | groq | openai
+# To use cloud providers instead, set GROQ_API_KEY, VOICE_TOOLS_OPENAI_KEY, or ELEVENLABS_API_KEY above.
+# Provider priority: local > groq > openai > mistral > xai > elevenlabs
+# Configure in config.yaml: stt.provider: local | groq | openai | mistral | xai | elevenlabs
 
 # =============================================================================
 # STT ADVANCED OVERRIDES (optional)
@@ -394,10 +394,12 @@ IMAGE_TOOLS_DEBUG=false
 # Override default STT models per provider (normally set via stt.model in config.yaml)
 # STT_GROQ_MODEL=whisper-large-v3-turbo
 # STT_OPENAI_MODEL=whisper-1
+# STT_ELEVENLABS_MODEL=scribe_v2
 
 # Override STT provider endpoints (for proxies or self-hosted instances)
 # GROQ_BASE_URL=https://api.groq.com/openai/v1
 # STT_OPENAI_BASE_URL=https://api.openai.com/v1
+# ELEVENLABS_STT_BASE_URL=https://api.elevenlabs.io/v1
 
 # =============================================================================
 # MICROSOFT TEAMS INTEGRATION
diff --git a/apps/desktop/package-lock.json b/apps/desktop/package-lock.json
index 2dabfaca5c..b8e9e9e77c 100644
--- a/apps/desktop/package-lock.json
+++ b/apps/desktop/package-lock.json
@@ -10,6 +10,7 @@
       "dependencies": {
         "@assistant-ui/react": "^0.12.28",
         "@assistant-ui/react-streamdown": "^0.1.11",
+        "@audiowave/react": "^0.6.2",
         "@chenglou/pretext": "^0.0.6",
         "@nanostores/react": "^1.1.0",
         "@radix-ui/react-slot": "^1.2.4",
@@ -305,6 +306,25 @@
         }
       }
     },
+    "node_modules/@audiowave/core": {
+      "version": "0.3.1",
+      "resolved": "https://registry.npmjs.org/@audiowave/core/-/core-0.3.1.tgz",
+      "integrity": "sha512-KtC2MTWKp6Orkedty3I8IklVBVQ2IFaFWDJ1cz+UsACpX2x1gINwZGTRZT7bw/dx8KazNSMuVK5lm1jL67KQkQ==",
+      "license": "MIT"
+    },
+    "node_modules/@audiowave/react": {
+      "version": "0.6.2",
+      "resolved": "https://registry.npmjs.org/@audiowave/react/-/react-0.6.2.tgz",
+      "integrity": "sha512-hajG2Iv3mVxived9wXad8L0ZQF+HmYnB3IrfOkIdkTv4RxOJDXwFWMAd0zb7ZU1Qz0IEYZXCbASFWyuxEQ7PAw==",
+      "license": "MIT",
+      "dependencies": {
+        "@audiowave/core": "0.3.1"
+      },
+      "peerDependencies": {
+        "react": ">=16.8.0",
+        "react-dom": ">=16.8.0"
+      }
+    },
     "node_modules/@babel/code-frame": {
       "version": "7.29.0",
       "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz",
diff --git a/apps/desktop/package.json b/apps/desktop/package.json
index 7a2672a007..26e12d29dc 100644
--- a/apps/desktop/package.json
+++ b/apps/desktop/package.json
@@ -23,6 +23,7 @@
   "dependencies": {
     "@assistant-ui/react": "^0.12.28",
     "@assistant-ui/react-streamdown": "^0.1.11",
+    "@audiowave/react": "^0.6.2",
     "@chenglou/pretext": "^0.0.6",
     "@nanostores/react": "^1.1.0",
     "@radix-ui/react-slot": "^1.2.4",
diff --git a/apps/desktop/src/app/chat/composer/constants.ts b/apps/desktop/src/app/chat/composer/constants.ts
index 60b0d1d1e1..945f2a59e2 100644
--- a/apps/desktop/src/app/chat/composer/constants.ts
+++ b/apps/desktop/src/app/chat/composer/constants.ts
@@ -1,4 +1,3 @@
-import type { Unstable_TriggerItem } from '@assistant-ui/core'
 import type { Unstable_IconComponent } from '@assistant-ui/react'
 import { FileText, FolderOpen, ImageIcon, Link, type LucideIcon } from 'lucide-react'
 import type { CSSProperties } from 'react'
@@ -37,7 +36,7 @@ export const DIRECTIVE_ICONS: Record<string, Unstable_IconComponent> = {
 }
 
 export const DIRECTIVE_POPOVER_CLASS =
-  'absolute bottom-24 left-1/2 z-50 w-[min(calc(100vw-1.5rem),28rem)] max-h-[min(28rem,calc(100vh-8rem))] -translate-x-1/2 overflow-y-auto overscroll-contain rounded-2xl border border-border/70 bg-popover p-1.5 text-popover-foreground shadow-2xl'
+  'absolute bottom-24 left-1/2 z-50 w-[min(calc(100vw-1.5rem),26rem)] max-h-[min(24rem,calc(100vh-8rem))] -translate-x-1/2 overflow-y-auto overscroll-contain rounded-2xl border border-border/60 bg-popover/95 p-1.5 text-popover-foreground shadow-2xl backdrop-blur-md ring-1 ring-black/5'
 
 export const PROMPT_SNIPPETS = [
   {
@@ -64,37 +63,6 @@ export const ASK_PLACEHOLDERS = [
   'Duck mode: gentle debugging, together.'
 ]
 
-export const REF_ITEMS: Unstable_TriggerItem[] = [
-  {
-    id: 'file:',
-    type: 'file',
-    label: 'File',
-    description: 'Attach a file path',
-    metadata: { icon: 'file' }
-  },
-  {
-    id: 'folder:',
-    type: 'folder',
-    label: 'Folder',
-    description: 'Attach a folder path',
-    metadata: { icon: 'folder' }
-  },
-  {
-    id: 'url:',
-    type: 'url',
-    label: 'URL',
-    description: 'Attach a web page',
-    metadata: { icon: 'url' }
-  },
-  {
-    id: 'image:',
-    type: 'image',
-    label: 'Image',
-    description: 'Attach an image path',
-    metadata: { icon: 'image' }
-  }
-]
-
 export const EDGE_NEWLINES_RE = /^[\t ]*(?:\r\n|\r|\n)+|(?:\r\n|\r|\n)+[\t ]*$/g
 export const DEFAULT_MAX_RECORDING_SECONDS = 120
 
diff --git a/apps/desktop/src/app/chat/composer/context-menu.tsx b/apps/desktop/src/app/chat/composer/context-menu.tsx
index 96851f9ac3..253b70e5a7 100644
--- a/apps/desktop/src/app/chat/composer/context-menu.tsx
+++ b/apps/desktop/src/app/chat/composer/context-menu.tsx
@@ -15,11 +15,10 @@ import {
 import { cn } from '@/lib/utils'
 
 import { GHOST_ICON_BTN, PROMPT_SNIPPETS } from './constants'
-import type { ChatBarState, ContextSuggestion } from './types'
+import type { ChatBarState } from './types'
 
 export function ContextMenu({
   state,
-  onAddContextRef,
   onInsertText,
   onOpenUrlDialog,
   onPasteClipboardImage,
@@ -28,7 +27,6 @@ export function ContextMenu({
   onPickImages
 }: {
   state: ChatBarState
-  onAddContextRef?: (refText: string, label?: string, detail?: string) => void
   onInsertText: (text: string) => void
   onOpenUrlDialog: () => void
   onPasteClipboardImage?: () => void
@@ -36,11 +34,6 @@ export function ContextMenu({
   onPickFolders?: () => void
   onPickImages?: () => void
 }) {
-  const choose = (item: ContextSuggestion) =>
-    onAddContextRef ? onAddContextRef(item.text, item.display, item.meta) : onInsertText(item.text)
-
-  const suggestions = state.tools.suggestions?.slice(0, 8) ?? []
-
   return (
     <DropdownMenu>
       <DropdownMenuTrigger asChild>
@@ -56,48 +49,28 @@ export function ContextMenu({
           <Plus size={18} />
         </Button>
       </DropdownMenuTrigger>
-      <DropdownMenuContent align="start" className="w-64" side="top" sideOffset={10}>
-        <DropdownMenuLabel className="text-xs text-muted-foreground">Add context</DropdownMenuLabel>
+      <DropdownMenuContent align="start" className="w-60" side="top" sideOffset={10}>
+        <DropdownMenuLabel className="text-[0.7rem] font-medium uppercase tracking-wide text-muted-foreground/85">
+          Attach
+        </DropdownMenuLabel>
         <ContextMenuItem disabled={!onPickFiles} icon={FileText} onSelect={onPickFiles}>
-          Files
+          Files…
         </ContextMenuItem>
         <ContextMenuItem disabled={!onPickFolders} icon={FolderOpen} onSelect={onPickFolders}>
-          Folders
+          Folder…
         </ContextMenuItem>
         <ContextMenuItem disabled={!onPickImages} icon={ImageIcon} onSelect={onPickImages}>
-          Images
+          Images…
         </ContextMenuItem>
         <ContextMenuItem disabled={!onPasteClipboardImage} icon={Clipboard} onSelect={onPasteClipboardImage}>
-          Image from clipboard
+          Paste image
         </ContextMenuItem>
         <ContextMenuItem icon={Link} onSelect={onOpenUrlDialog}>
-          URL
+          URL…
         </ContextMenuItem>
 
         <DropdownMenuSeparator />
 
-        <DropdownMenuSub>
-          <DropdownMenuSubTrigger>
-            <FileText />
-            <span>Suggested files</span>
-          </DropdownMenuSubTrigger>
-          <DropdownMenuSubContent className="w-72">
-            {suggestions.length === 0 ? (
-              <DropdownMenuItem disabled>
-                <span className="text-muted-foreground">No suggestions</span>
-              </DropdownMenuItem>
-            ) : (
-              suggestions.map(item => (
-                <DropdownMenuItem key={item.text} onSelect={() => choose(item)}>
-                  <FileText />
-                  <span className="min-w-0 flex-1 truncate">{item.display}</span>
-                  {item.meta && <span className="max-w-28 truncate text-xs text-muted-foreground">{item.meta}</span>}
-                </DropdownMenuItem>
-              ))
-            )}
-          </DropdownMenuSubContent>
-        </DropdownMenuSub>
-
         <DropdownMenuSub>
           <DropdownMenuSubTrigger>
             <MessageSquareText />
@@ -111,6 +84,13 @@ export function ContextMenu({
             ))}
           </DropdownMenuSubContent>
         </DropdownMenuSub>
+
+        <DropdownMenuSeparator />
+
+        <div className="px-2 py-1 text-[0.7rem] text-muted-foreground/80">
+          Tip: type <kbd className="rounded bg-muted/70 px-1 py-px font-mono text-[0.65rem]">@</kbd> to reference files
+          inline.
+        </div>
       </DropdownMenuContent>
     </DropdownMenu>
   )
diff --git a/apps/desktop/src/app/chat/composer/controls.tsx b/apps/desktop/src/app/chat/composer/controls.tsx
index 56cf5a8a9d..c191762cd4 100644
--- a/apps/desktop/src/app/chat/composer/controls.tsx
+++ b/apps/desktop/src/app/chat/composer/controls.tsx
@@ -15,6 +15,7 @@ interface ConversationProps {
   status: ConversationStatus
   onEnd: () => void
   onStart: () => void
+  onStopTurn: () => void
   onToggleMute: () => void
 }
 
@@ -80,6 +81,7 @@ function ConversationPill({
   level,
   muted,
   onEnd,
+  onStopTurn,
   onToggleMute,
   status
 }: ConversationProps & { disabled: boolean }) {
@@ -104,10 +106,10 @@ function ConversationPill({
         aria-pressed={muted}
         className={cn(GHOST_ICON_BTN, 'p-0', muted && 'bg-muted text-muted-foreground')}
         disabled={disabled}
-      onClick={() => {
-        triggerHaptic('selection')
-        onToggleMute()
-      }}
+        onClick={() => {
+          triggerHaptic('selection')
+          onToggleMute()
+        }}
         size="icon"
         title={muted ? 'Unmute microphone' : 'Mute microphone'}
         type="button"
@@ -115,6 +117,23 @@ function ConversationPill({
       >
         {muted ? <MicOff size={16} /> : <Mic size={16} />}
       </Button>
+      {listening && (
+        <Button
+          aria-label="Stop listening and send"
+          className="h-8 shrink-0 gap-1.5 rounded-full px-2.5 text-xs text-muted-foreground hover:bg-accent hover:text-foreground"
+          disabled={disabled}
+          onClick={() => {
+            triggerHaptic('submit')
+            onStopTurn()
+          }}
+          title="Stop listening and send"
+          type="button"
+          variant="ghost"
+        >
+          <Square className="fill-current" size={11} />
+          <span>Stop</span>
+        </Button>
+      )}
       <Button
         aria-label="End voice conversation"
         className="h-8 gap-1.5 rounded-full bg-primary px-3 text-xs font-medium text-primary-foreground hover:bg-primary/90"
diff --git a/apps/desktop/src/app/chat/composer/directive-popover.tsx b/apps/desktop/src/app/chat/composer/directive-popover.tsx
index 60a2bcd3e9..d5d69b36f2 100644
--- a/apps/desktop/src/app/chat/composer/directive-popover.tsx
+++ b/apps/desktop/src/app/chat/composer/directive-popover.tsx
@@ -5,9 +5,9 @@ import {
   type Unstable_MentionCategory,
   type Unstable_MentionDirective
 } from '@assistant-ui/react'
-import { ChevronDown } from 'lucide-react'
+import { FileText } from 'lucide-react'
 
-import { DIRECTIVE_POPOVER_CLASS, REF_ITEMS } from './constants'
+import { DIRECTIVE_POPOVER_CLASS } from './constants'
 import type { ContextSuggestion } from './types'
 
 export function DirectivePopover({
@@ -24,80 +24,73 @@ export function DirectivePopover({
   return (
     <ComposerPrimitive.Unstable_TriggerPopover adapter={adapter} char="@" className={DIRECTIVE_POPOVER_CLASS}>
       <ComposerPrimitive.Unstable_TriggerPopover.Directive {...directive} />
-      <ComposerPrimitive.Unstable_TriggerPopoverCategories>
-        {categories => (
-          <div className="grid gap-1">
-            {categories.map(c => (
-              <ComposerPrimitive.Unstable_TriggerPopoverCategoryItem
-                categoryId={c.id}
-                className="flex w-full items-center justify-between rounded-xl px-3 py-2 text-left text-sm hover:bg-accent data-highlighted:bg-accent"
-                key={c.id}
-              >
-                <span>{c.label}</span>
-                <ChevronDown className="-rotate-90 size-3.5 text-muted-foreground" />
-              </ComposerPrimitive.Unstable_TriggerPopoverCategoryItem>
-            ))}
-          </div>
-        )}
-      </ComposerPrimitive.Unstable_TriggerPopoverCategories>
       <ComposerPrimitive.Unstable_TriggerPopoverItems>
         {items => (
-          <div className="grid gap-1">
-            <ComposerPrimitive.Unstable_TriggerPopoverBack className="mb-1 text-xs text-muted-foreground hover:text-foreground">
-              Back
-            </ComposerPrimitive.Unstable_TriggerPopoverBack>
-            {items.map((item, index) => {
-              const Icon = directiveIcon(item, iconMap, Fallback)
+          <div className="grid gap-0.5">
+            <div className="px-2 pb-1 pt-0.5 text-[0.7rem] font-medium uppercase tracking-wide text-muted-foreground/80">
+              Reference a file
+            </div>
+            {items.length === 0 ? (
+              <div className="px-3 py-3 text-sm text-muted-foreground">
+                <p>No file suggestions yet.</p>
+                <p className="mt-1 text-xs text-muted-foreground/80">
+                  Keep typing to filter, or click <span className="font-medium text-foreground/80">+</span> to attach
+                  files, folders, or a URL.
+                </p>
+              </div>
+            ) : (
+              items.map((item, index) => {
+                const Icon = directiveIcon(item, iconMap, Fallback)
 
-              return (
-                <ComposerPrimitive.Unstable_TriggerPopoverItem
-                  className="flex w-full items-center gap-2 rounded-xl px-3 py-2 text-left text-sm hover:bg-accent data-highlighted:bg-accent"
-                  index={index}
-                  item={item}
-                  key={`${item.type}:${item.id}`}
-                >
-                  <Icon className="size-4 shrink-0 text-muted-foreground" />
-                  <span className="grid min-w-0 flex-1 gap-0.5">
-                    <span className="truncate font-medium">{item.label}</span>
-                    {item.description && (
-                      <span className="truncate text-xs text-muted-foreground">{item.description}</span>
-                    )}
-                  </span>
-                </ComposerPrimitive.Unstable_TriggerPopoverItem>
-              )
-            })}
+                return (
+                  <ComposerPrimitive.Unstable_TriggerPopoverItem
+                    className="flex w-full items-center gap-2 rounded-xl px-2.5 py-1.5 text-left text-sm transition-colors hover:bg-accent/70 data-highlighted:bg-accent"
+                    index={index}
+                    item={item}
+                    key={`${item.type}:${item.id}`}
+                  >
+                    <Icon className="size-4 shrink-0 text-muted-foreground/80" />
+                    <span className="grid min-w-0 flex-1 gap-0.5">
+                      <span className="truncate font-medium text-foreground">{item.label}</span>
+                      {item.description && (
+                        <span className="truncate text-[0.72rem] text-muted-foreground/85">{item.description}</span>
+                      )}
+                    </span>
+                  </ComposerPrimitive.Unstable_TriggerPopoverItem>
+                )
+              })
+            )}
           </div>
         )}
       </ComposerPrimitive.Unstable_TriggerPopoverItems>
     </ComposerPrimitive.Unstable_TriggerPopover>
   )
 }
+
 export function buildMentionCategories(suggestions: ContextSuggestion[] | undefined): Unstable_MentionCategory[] {
-  const items = (suggestions ?? [])
-    .map(s => {
-      const match = s.text.match(/^@(file|folder|url|image):(.+)$/)
+  const items: Unstable_TriggerItem[] = []
 
-      if (!match) {
-        return null
-      }
+  for (const s of suggestions ?? []) {
+    const match = s.text.match(/^@(file|folder|url|image):(.+)$/)
 
-      const [, type, id] = match
+    if (!match) {
+      continue
+    }
 
-      return {
-        id,
-        type,
-        label: s.display || id,
-        description: s.meta,
-        metadata: { icon: type }
-      }
+    const [, type, id] = match
+
+    items.push({
+      id,
+      type,
+      label: s.display || id,
+      description: s.meta,
+      metadata: { icon: type }
     })
-    .filter((item): item is NonNullable<typeof item> => Boolean(item))
+  }
 
-  return [
-    { id: 'refs', label: 'Hermes refs', items: REF_ITEMS },
-    ...(items.length ? [{ id: 'context', label: 'Suggested files', items }] : [])
-  ]
+  return [{ id: 'context', label: 'References', items }]
 }
+
 function directiveIcon(
   item: Unstable_TriggerItem,
   iconMap: Record<string, Unstable_IconComponent>,
@@ -106,5 +99,5 @@ function directiveIcon(
   const meta = item.metadata as Record<string, unknown> | undefined
   const key = typeof meta?.icon === 'string' ? meta.icon : item.type
 
-  return iconMap[key] ?? iconMap[item.type] ?? fallback
+  return iconMap[key] ?? iconMap[item.type] ?? fallback ?? FileText
 }
diff --git a/apps/desktop/src/app/chat/composer/hooks/use-voice-conversation.ts b/apps/desktop/src/app/chat/composer/hooks/use-voice-conversation.ts
index 79cd5eabe3..483451e21c 100644
--- a/apps/desktop/src/app/chat/composer/hooks/use-voice-conversation.ts
+++ b/apps/desktop/src/app/chat/composer/hooks/use-voice-conversation.ts
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useRef, useState } from 'react'
 
-import { speakText } from '@/hermes'
+import { playSpeechText, stopVoicePlayback } from '@/lib/voice-playback'
 import { notify, notifyError } from '@/store/notifications'
 
 import {
@@ -14,13 +14,19 @@ import { useMicRecorder } from './use-mic-recorder'
 
 export type ConversationStatus = 'idle' | 'listening' | 'transcribing' | 'thinking' | 'speaking'
 
+interface PendingVoiceResponse {
+  id: string
+  pending: boolean
+  text: string
+}
+
 interface VoiceConversationOptions {
   busy: boolean
   enabled: boolean
   onFatalError?: () => void
-  onSubmit: (text: string) => void
+  onSubmit: (text: string) => Promise<void> | void
   onTranscribeAudio?: (audio: Blob) => Promise<string>
-  pendingResponseText: () => string | null
+  pendingResponse: () => PendingVoiceResponse | null
   consumePendingResponse: () => void
 }
 
@@ -30,16 +36,19 @@ export function useVoiceConversation({
   onFatalError,
   onSubmit,
   onTranscribeAudio,
-  pendingResponseText,
+  pendingResponse,
   consumePendingResponse
 }: VoiceConversationOptions) {
   const { handle, level } = useMicRecorder()
   const [status, setStatus] = useState<ConversationStatus>('idle')
   const [muted, setMuted] = useState(false)
-  const audioRef = useRef<HTMLAudioElement | null>(null)
   const turnTimeoutRef = useRef<number | null>(null)
   const pendingStartRef = useRef(false)
-  const lastSpokenRef = useRef<string | null>(null)
+  const turnClosingRef = useRef(false)
+  const awaitingSpokenResponseRef = useRef(false)
+  const responseIdRef = useRef<string | null>(null)
+  const spokenSourceLengthRef = useRef(0)
+  const speechBufferRef = useRef('')
   const enabledRef = useRef(enabled)
   const mutedRef = useRef(muted)
   const busyRef = useRef(busy)
@@ -69,36 +78,74 @@ export function useVoiceConversation({
     }
   }
 
-  const stopAudio = useCallback(() => {
-    const audio = audioRef.current
+  const resetSpeechBuffer = () => {
+    responseIdRef.current = null
+    spokenSourceLengthRef.current = 0
+    speechBufferRef.current = ''
+  }
 
-    if (audio) {
-      audio.pause()
-      audio.src = ''
-      audioRef.current = null
-    }
-  }, [])
-
-  const handleTurn = useCallback(async () => {
-    clearTurnTimeout()
-    setStatus('transcribing')
-    const result = await handle.stop()
-
-    if (!result || !result.heardSpeech || !onTranscribeAudio) {
-      if (enabledRef.current && !mutedRef.current && !busyRef.current && statusRef.current !== 'speaking') {
-        pendingStartRef.current = true
-      }
-
-      setStatus('idle')
+  const appendSpeechText = (text: string) => {
+    const cleaned = text
 
+    if (!cleaned) {
       return
     }
 
-    try {
-      const transcript = (await onTranscribeAudio(result.audio)).trim()
+    speechBufferRef.current = `${speechBufferRef.current} ${cleaned}`.trim()
+  }
 
-      if (!transcript) {
-        if (enabledRef.current) {
+  const takeSpeechChunk = (force = false): string | null => {
+    const buffer = speechBufferRef.current.replace(/\s+/g, ' ').trim()
+
+    if (!buffer) {
+      speechBufferRef.current = ''
+
+      return null
+    }
+
+    const sentence = buffer.match(/^(.+?[.!?。！？])(?:\s+|$)/)
+
+    if (sentence?.[1] && (sentence[1].length >= 8 || force)) {
+      const chunk = sentence[1].trim()
+      speechBufferRef.current = buffer.slice(sentence[1].length).trim()
+
+      return chunk
+    }
+
+    if (!force && buffer.length > 220) {
+      const softBoundary = Math.max(buffer.lastIndexOf(', ', 180), buffer.lastIndexOf('; ', 180), buffer.lastIndexOf(': ', 180))
+
+      if (softBoundary > 80) {
+        const chunk = buffer.slice(0, softBoundary + 1).trim()
+        speechBufferRef.current = buffer.slice(softBoundary + 1).trim()
+
+        return chunk
+      }
+    }
+
+    if (!force) {
+      return null
+    }
+
+    speechBufferRef.current = ''
+
+    return buffer
+  }
+
+  const handleTurn = useCallback(async (forceTranscribe = false) => {
+    if (turnClosingRef.current) {
+      return
+    }
+
+    turnClosingRef.current = true
+    clearTurnTimeout()
+    setStatus('transcribing')
+
+    try {
+      const result = await handle.stop()
+
+      if (!result || (!result.heardSpeech && !forceTranscribe) || !onTranscribeAudio) {
+        if (enabledRef.current && !mutedRef.current && !busyRef.current && statusRef.current !== 'speaking') {
           pendingStartRef.current = true
         }
 
@@ -107,16 +154,34 @@ export function useVoiceConversation({
         return
       }
 
-      onSubmit(transcript)
-      setStatus('thinking')
-    } catch (error) {
-      notifyError(error, 'Voice transcription failed')
+      try {
+        const transcript = (await onTranscribeAudio(result.audio)).trim()
 
-      if (enabledRef.current && !mutedRef.current && !busyRef.current) {
-        pendingStartRef.current = true
+        if (!transcript) {
+          if (enabledRef.current) {
+            pendingStartRef.current = true
+          }
+
+          setStatus('idle')
+
+          return
+        }
+
+        awaitingSpokenResponseRef.current = true
+        resetSpeechBuffer()
+        await onSubmit(transcript)
+        setStatus('thinking')
+      } catch (error) {
+        notifyError(error, 'Voice transcription failed')
+
+        if (enabledRef.current && !mutedRef.current && !busyRef.current) {
+          pendingStartRef.current = true
+        }
+
+        setStatus('idle')
       }
-
-      setStatus('idle')
+    } finally {
+      turnClosingRef.current = false
     }
   }, [handle, onSubmit, onTranscribeAudio])
 
@@ -158,24 +223,13 @@ export function useVoiceConversation({
 
   const speak = useCallback(
     async (text: string) => {
-      stopAudio()
       setStatus('speaking')
 
       try {
-        const response = await speakText(text)
-        const audio = new Audio(response.data_url)
-        audioRef.current = audio
-
-        await new Promise<void>((resolve, reject) => {
-          audio.addEventListener('ended', () => resolve(), { once: true })
-          audio.addEventListener('error', () => reject(new Error('Playback failed')), { once: true })
-          void audio.play().catch(reject)
-        })
+        await playSpeechText(text, { source: 'voice-conversation' })
       } catch (error) {
         notifyError(error, 'Voice playback failed')
       } finally {
-        audioRef.current = null
-
         if (enabledRef.current) {
           pendingStartRef.current = true
           setStatus('idle')
@@ -184,7 +238,7 @@ export function useVoiceConversation({
         }
       }
     },
-    [stopAudio]
+    []
   )
 
   const start = useCallback(async () => {
@@ -200,20 +254,31 @@ export function useVoiceConversation({
     }
 
     setMuted(false)
-    lastSpokenRef.current = null
+    awaitingSpokenResponseRef.current = false
+    resetSpeechBuffer()
+    consumePendingResponse()
     pendingStartRef.current = true
-  }, [onFatalError, onTranscribeAudio])
+    await startListening()
+  }, [consumePendingResponse, onFatalError, onTranscribeAudio, startListening])
 
   const end = useCallback(async () => {
     pendingStartRef.current = false
     clearTurnTimeout()
-    stopAudio()
+    stopVoicePlayback()
     handle.cancel()
-    lastSpokenRef.current = null
+    turnClosingRef.current = false
+    awaitingSpokenResponseRef.current = false
+    resetSpeechBuffer()
     consumePendingResponse()
     setMuted(false)
     setStatus('idle')
-  }, [consumePendingResponse, handle, stopAudio])
+  }, [consumePendingResponse, handle])
+
+  const stopTurn = useCallback(() => {
+    if (statusRef.current === 'listening') {
+      void handleTurn(true)
+    }
+  }, [handleTurn])
 
   const toggleMute = useCallback(() => {
     setMuted(value => {
@@ -231,22 +296,77 @@ export function useVoiceConversation({
     })
   }, [handle])
 
-  // Drive the loop: speak any new assistant response, otherwise start listening
-  // when the agent is idle and we're between turns.
+  useEffect(() => {
+    if (!enabled) {
+      return
+    }
+
+    const onKeyDown = (event: KeyboardEvent) => {
+      if (event.code !== 'Space' || event.repeat || event.metaKey || event.ctrlKey || event.altKey) {
+        return
+      }
+
+      if (statusRef.current !== 'listening') {
+        return
+      }
+
+      event.preventDefault()
+      stopTurn()
+    }
+
+    window.addEventListener('keydown', onKeyDown, { capture: true })
+
+    return () => window.removeEventListener('keydown', onKeyDown, { capture: true })
+  }, [enabled, stopTurn])
+
+  // Drive the loop: after a voice-submitted turn, speak stable chunks as the
+  // assistant stream grows. Otherwise start listening when idle between turns.
   useEffect(() => {
     if (!enabled || muted) {
       return
     }
 
-    const text = pendingResponseText()
-    const trimmed = text?.trim() ?? ''
+    if (awaitingSpokenResponseRef.current && status !== 'speaking') {
+      const response = pendingResponse()
 
-    if (trimmed && trimmed !== lastSpokenRef.current && status !== 'speaking') {
-      lastSpokenRef.current = trimmed
-      consumePendingResponse()
-      void speak(trimmed)
+      if (response) {
+        if (response.id !== responseIdRef.current) {
+          resetSpeechBuffer()
+          responseIdRef.current = response.id
+        }
 
-      return
+        if (response.text.length > spokenSourceLengthRef.current) {
+          appendSpeechText(response.text.slice(spokenSourceLengthRef.current))
+          spokenSourceLengthRef.current = response.text.length
+        }
+
+        const chunk = takeSpeechChunk(!response.pending && !busy)
+
+        if (chunk) {
+          void speak(chunk)
+
+          return
+        }
+
+        if (!response.pending && !busy) {
+          awaitingSpokenResponseRef.current = false
+          consumePendingResponse()
+          resetSpeechBuffer()
+          pendingStartRef.current = true
+          setStatus('idle')
+
+          return
+        }
+      }
+
+      if (!busy && status === 'thinking') {
+        awaitingSpokenResponseRef.current = false
+        resetSpeechBuffer()
+        pendingStartRef.current = true
+        setStatus('idle')
+
+        return
+      }
     }
 
     if (busy || status !== 'idle') {
@@ -256,7 +376,7 @@ export function useVoiceConversation({
     if (pendingStartRef.current) {
       void startListening()
     }
-  }, [busy, consumePendingResponse, enabled, muted, pendingResponseText, speak, startListening, status])
+  }, [busy, consumePendingResponse, enabled, muted, pendingResponse, speak, startListening, status])
 
   useEffect(() => {
     if (enabled && !wasEnabledRef.current) {
@@ -270,5 +390,5 @@ export function useVoiceConversation({
     wasEnabledRef.current = enabled
   }, [enabled, end, start])
 
-  return { end, level, muted, start, status, toggleMute }
+  return { end, level, muted, start, status, stopTurn, toggleMute }
 }
diff --git a/apps/desktop/src/app/chat/composer/index.tsx b/apps/desktop/src/app/chat/composer/index.tsx
index ae45e1fbe0..f8eae715ff 100644
--- a/apps/desktop/src/app/chat/composer/index.tsx
+++ b/apps/desktop/src/app/chat/composer/index.tsx
@@ -32,7 +32,7 @@ import { useVoiceConversation } from './hooks/use-voice-conversation'
 import { useVoiceRecorder } from './hooks/use-voice-recorder'
 import type { ChatBarProps } from './types'
 import { UrlDialog } from './url-dialog'
-import { VoiceActivity } from './voice-activity'
+import { VoiceActivity, VoicePlaybackActivity } from './voice-activity'
 
 function trimPastedEdgeNewlines(text: string): string {
   return text.replace(EDGE_NEWLINES_RE, '')
@@ -45,7 +45,6 @@ export function ChatBar({
   maxRecordingSeconds = DEFAULT_MAX_RECORDING_SECONDS,
   state,
   onCancel,
-  onAddContextRef,
   onAddUrl,
   onPasteClipboardImage,
   onPickFiles,
@@ -203,7 +202,7 @@ export function ChatBar({
       onCancel()
     } else if (draft.trim() || attachments.length > 0) {
       triggerHaptic('submit')
-      onSubmit(draft)
+      void onSubmit(draft)
       aui.composer().setText('')
     }
 
@@ -235,9 +234,9 @@ export function ChatBar({
     onTranscribeAudio
   })
 
-  const pendingResponseText = () => {
+  const pendingResponse = () => {
     const messages = $messages.get()
-    const last = messages.findLast(m => m.role === 'assistant' && !m.pending && !m.hidden)
+    const last = messages.findLast(m => m.role === 'assistant' && !m.hidden)
 
     if (!last || last.id === lastSpokenIdRef.current) {
       return null
@@ -249,9 +248,11 @@ export function ChatBar({
       return null
     }
 
-    lastSpokenIdRef.current = last.id
-
-    return text
+    return {
+      id: last.id,
+      pending: Boolean(last.pending),
+      text
+    }
   }
 
   const consumePendingResponse = () => {
@@ -263,13 +264,13 @@ export function ChatBar({
     }
   }
 
-  const submitVoiceTurn = (text: string) => {
+  const submitVoiceTurn = async (text: string) => {
     if (busy) {
       return
     }
 
     triggerHaptic('submit')
-    onSubmit(text)
+    await onSubmit(text)
     aui.composer().setText('')
     draftRef.current = ''
   }
@@ -281,12 +282,11 @@ export function ChatBar({
     onFatalError: () => setVoiceConversationActive(false),
     onSubmit: submitVoiceTurn,
     onTranscribeAudio,
-    pendingResponseText
+    pendingResponse
   })
 
   const contextMenu = (
     <ContextMenu
-      onAddContextRef={onAddContextRef}
       onInsertText={insertText}
       onOpenUrlDialog={() => {
         triggerHaptic('open')
@@ -313,6 +313,7 @@ export function ChatBar({
           void conversation.end()
         },
         onStart: () => setVoiceConversationActive(true),
+        onStopTurn: conversation.stopTurn,
         onToggleMute: conversation.toggleMute,
         status: conversation.status
       }}
@@ -343,14 +344,12 @@ export function ChatBar({
   return (
     <>
       <ComposerPrimitive.Unstable_TriggerPopoverRoot>
-        {mentionCategories.length > 0 && (
-          <DirectivePopover
-            adapter={mention.adapter}
-            directive={mention.directive}
-            fallbackIcon={mention.fallbackIcon ?? FileText}
-            iconMap={mention.iconMap ?? DIRECTIVE_ICONS}
-          />
-        )}
+        <DirectivePopover
+          adapter={mention.adapter}
+          directive={mention.directive}
+          fallbackIcon={mention.fallbackIcon ?? FileText}
+          iconMap={mention.iconMap ?? DIRECTIVE_ICONS}
+        />
         <ComposerPrimitive.Root
           className={cn(SHELL, 'group/composer pb-8 pt-2')}
           onSubmit={e => {
@@ -407,6 +406,7 @@ export function ChatBar({
               style={{ ...COMPOSER_BACKDROP_STYLE, borderRadius: `${glassTweaks.liquid.cornerRadius}px` }}
             >
               <VoiceActivity state={voiceActivityState} />
+              <VoicePlaybackActivity />
               {attachments.length > 0 && <AttachmentList attachments={attachments} onRemove={onRemoveAttachment} />}
               {stacked ? (
                 <>
diff --git a/apps/desktop/src/app/chat/composer/types.ts b/apps/desktop/src/app/chat/composer/types.ts
index ba2326510d..7a39715bc8 100644
--- a/apps/desktop/src/app/chat/composer/types.ts
+++ b/apps/desktop/src/app/chat/composer/types.ts
@@ -36,7 +36,7 @@ export interface ChatBarProps {
   onPickFolders?: () => void
   onPickImages?: () => void
   onRemoveAttachment?: (id: string) => void
-  onSubmit: (value: string) => void
+  onSubmit: (value: string) => Promise<void> | void
   onTranscribeAudio?: (audio: Blob) => Promise<string>
 }
 
diff --git a/apps/desktop/src/app/chat/composer/url-dialog.tsx b/apps/desktop/src/app/chat/composer/url-dialog.tsx
index 60d8886299..92fefcc714 100644
--- a/apps/desktop/src/app/chat/composer/url-dialog.tsx
+++ b/apps/desktop/src/app/chat/composer/url-dialog.tsx
@@ -1,9 +1,12 @@
+import { Globe } from 'lucide-react'
 import type * as React from 'react'
 
 import { Button } from '@/components/ui/button'
 import { Dialog, DialogContent, DialogDescription, DialogFooter, DialogHeader, DialogTitle } from '@/components/ui/dialog'
 import { Input } from '@/components/ui/input'
 
+const URL_HINT = /^https?:\/\//i
+
 export function UrlDialog({
   inputRef,
   onChange,
@@ -19,14 +22,23 @@ export function UrlDialog({
   open: boolean
   value: string
 }) {
+  const trimmed = value.trim()
+  const looksLikeUrl = trimmed.length > 0 && URL_HINT.test(trimmed)
+
   return (
     <Dialog onOpenChange={onOpenChange} open={open}>
-      <DialogContent className="max-w-md">
-        <DialogHeader>
-          <DialogTitle>Add URL Context</DialogTitle>
-          <DialogDescription>
-            Hermes will fetch this URL via the existing @url context resolver when you send the prompt.
-          </DialogDescription>
+      <DialogContent className="max-w-md gap-5">
+        <DialogHeader className="flex-row items-center gap-3 sm:items-center">
+          <span
+            aria-hidden
+            className="grid size-9 shrink-0 place-items-center rounded-xl bg-[color-mix(in_srgb,var(--dt-primary)_14%,transparent)] text-primary ring-1 ring-inset ring-primary/15"
+          >
+            <Globe className="size-4" />
+          </span>
+          <div className="grid gap-0.5 text-left">
+            <DialogTitle>Attach a URL</DialogTitle>
+            <DialogDescription>Hermes will fetch the page and include it as context for this turn.</DialogDescription>
+          </div>
         </DialogHeader>
         <form
           className="grid gap-4"
@@ -35,18 +47,29 @@ export function UrlDialog({
             onSubmit()
           }}
         >
-          <Input
-            onChange={e => onChange(e.target.value)}
-            placeholder="https://example.com"
-            ref={inputRef}
-            value={value}
-          />
+          <div className="grid gap-1.5">
+            <Input
+              autoComplete="off"
+              autoCorrect="off"
+              inputMode="url"
+              onChange={e => onChange(e.target.value)}
+              placeholder="https://example.com/post"
+              ref={inputRef}
+              spellCheck={false}
+              value={value}
+            />
+            {trimmed.length > 0 && !looksLikeUrl && (
+              <p className="text-xs text-muted-foreground/85">
+                Include the full URL, e.g. <span className="font-mono">https://…</span>
+              </p>
+            )}
+          </div>
           <DialogFooter>
             <Button onClick={() => onOpenChange(false)} type="button" variant="ghost">
               Cancel
             </Button>
-            <Button disabled={!value.trim()} type="submit">
-              Add URL
+            <Button disabled={!looksLikeUrl} type="submit">
+              Attach
             </Button>
           </DialogFooter>
         </form>
diff --git a/apps/desktop/src/app/chat/composer/voice-activity.tsx b/apps/desktop/src/app/chat/composer/voice-activity.tsx
index f0f28ec3df..2f653bc198 100644
--- a/apps/desktop/src/app/chat/composer/voice-activity.tsx
+++ b/apps/desktop/src/app/chat/composer/voice-activity.tsx
@@ -1,6 +1,10 @@
-import { Loader2, Mic } from 'lucide-react'
+import { useStore } from '@nanostores/react'
+import { Loader2, Mic, Volume2, VolumeX } from 'lucide-react'
 
+import { Button } from '@/components/ui/button'
 import { cn } from '@/lib/utils'
+import { stopVoicePlayback } from '@/lib/voice-playback'
+import { $voicePlayback } from '@/store/voice-playback'
 
 import type { VoiceActivityState } from './types'
 
@@ -36,6 +40,25 @@ function VoiceLevelBars({ level, active }: { active: boolean; level: number }) {
   )
 }
 
+function PlaybackBars() {
+  const bars = [820, 940, 760, 880, 700, 980, 790]
+
+  return (
+    <div aria-hidden="true" className="flex h-4 items-center gap-0.75">
+      {bars.map((duration, index) => (
+        <span
+          className="voice-wave-bar h-full w-0.5 rounded-full bg-current"
+          key={index}
+          style={{
+            animationDelay: `${index * -110}ms`,
+            animationDuration: `${duration}ms`
+          }}
+        />
+      ))}
+    </div>
+  )
+}
+
 export function VoiceActivity({
   state
 }: {
@@ -75,3 +98,50 @@ export function VoiceActivity({
     </div>
   )
 }
+
+export function VoicePlaybackActivity() {
+  const playback = useStore($voicePlayback)
+
+  if (playback.status === 'idle') {
+    return null
+  }
+
+  const preparing = playback.status === 'preparing'
+
+  const title = preparing
+    ? 'Preparing audio'
+    : playback.source === 'voice-conversation'
+      ? 'Speaking response'
+      : 'Reading aloud'
+
+  return (
+    <div
+      aria-live="polite"
+      className={cn(
+        'flex h-8 items-center gap-2 rounded-xl border border-primary/20 bg-primary/10 px-2.5 text-xs text-primary',
+        'shadow-[inset_0_1px_0_rgba(255,255,255,0.35)] backdrop-blur-sm'
+      )}
+      role="status"
+    >
+      <div className="flex size-5 shrink-0 items-center justify-center rounded-full bg-primary/15 text-primary">
+        {preparing ? <Loader2 className="animate-spin" size={12} /> : <Volume2 size={12} />}
+      </div>
+
+      <div className="flex min-w-0 flex-1 items-center gap-2">
+        <span className="truncate font-medium text-foreground/85">{title}</span>
+        {!preparing && <PlaybackBars />}
+      </div>
+
+      <Button
+        className="h-6 shrink-0 gap-1 rounded-full px-2 text-[0.6875rem]"
+        onClick={stopVoicePlayback}
+        size="sm"
+        type="button"
+        variant="ghost"
+      >
+        <VolumeX size={12} />
+        Stop
+      </Button>
+    </div>
+  )
+}
diff --git a/apps/desktop/src/app/chat/hooks/use-composer-actions.ts b/apps/desktop/src/app/chat/hooks/use-composer-actions.ts
index 279db6bd1e..16b3b3e930 100644
--- a/apps/desktop/src/app/chat/hooks/use-composer-actions.ts
+++ b/apps/desktop/src/app/chat/hooks/use-composer-actions.ts
@@ -1,5 +1,6 @@
 import { useCallback } from 'react'
 
+import { formatRefValue } from '@/components/assistant-ui/directive-text'
 import { attachmentId, contextPath, pathLabel } from '@/lib/chat-runtime'
 import {
   addComposerAttachment,
@@ -57,7 +58,7 @@ export function useComposerActions({ activeSessionId, currentCwd, requestGateway
           kind,
           label: pathLabel(path),
           detail: rel,
-          refText: `@${kind}:${rel}`,
+          refText: `@${kind}:${formatRefValue(rel)}`,
           path
         })
       }
diff --git a/apps/desktop/src/app/chat/index.tsx b/apps/desktop/src/app/chat/index.tsx
index 3d467a781e..0c7be68b31 100644
--- a/apps/desktop/src/app/chat/index.tsx
+++ b/apps/desktop/src/app/chat/index.tsx
@@ -8,13 +8,14 @@ import { useStore } from '@nanostores/react'
 import { useQuery } from '@tanstack/react-query'
 import { ChevronDown } from 'lucide-react'
 import type * as React from 'react'
-import { Suspense, useMemo } from 'react'
+import { Suspense, useMemo, useRef } from 'react'
 import { useLocation } from 'react-router-dom'
 
 import { Thread } from '@/components/assistant-ui/thread'
 import { NotificationStack } from '@/components/notifications'
 import { Button } from '@/components/ui/button'
 import { getGlobalModelOptions, type HermesGateway } from '@/hermes'
+import type { ChatMessage } from '@/lib/chat-messages'
 import { quickModelOptions, sessionTitle, toRuntimeMessage } from '@/lib/chat-runtime'
 import { cn } from '@/lib/utils'
 import { $pinnedSessionIds } from '@/store/layout'
@@ -57,7 +58,7 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
   onPickFolders: () => void
   onPickImages: () => void
   onRemoveAttachment: (id: string) => void
-  onSubmit: (text: string) => void
+  onSubmit: (text: string) => Promise<void> | void
   onChangeCwd: (cwd: string) => void
   onBrowseCwd: () => void
   onOpenModelPicker: () => void
@@ -118,6 +119,7 @@ export function ChatView({
   const pinnedSessionIds = useStore($pinnedSessionIds)
   const selectedSessionId = useStore($selectedStoredSessionId)
   const sessions = useStore($sessions)
+  const runtimeMessageCacheRef = useRef(new WeakMap<ChatMessage, ThreadMessage>())
   const activeStoredSession = sessions.find(session => session.id === selectedSessionId) || null
   const isRoutedSessionView = Boolean(routeSessionId(location.pathname))
   const selectedIsPinned = selectedSessionId ? pinnedSessionIds.includes(selectedSessionId) : false
@@ -128,6 +130,7 @@ export function ChatView({
   const loadingSession = isRoutedSessionView && messages.length === 0
   const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse)
   const showChatBar = !loadingSession
+  const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
   const title = activeStoredSession ? sessionTitle(activeStoredSession) : ''
 
   const modelOptionsQuery = useQuery<ModelOptionsResponse>({
@@ -190,7 +193,14 @@ export function ChatView({
         parentId = branchParentByGroup.get(message.branchGroupId) ?? null
       }
 
-      items.push({ message: toRuntimeMessage(message), parentId })
+      const cachedMessage = runtimeMessageCacheRef.current.get(message)
+      const runtimeMessage = cachedMessage ?? toRuntimeMessage(message)
+
+      if (!cachedMessage) {
+        runtimeMessageCacheRef.current.set(message, runtimeMessage)
+      }
+
+      items.push({ message: runtimeMessage, parentId })
 
       if (!message.hidden) {
         visibleParentId = message.id
@@ -248,6 +258,7 @@ export function ChatView({
               intro={showIntro ? { personality: introPersonality, seed: introSeed } : undefined}
               loading={threadLoading}
               onBranchInNewChat={onBranchInNewChat}
+              sessionKey={threadKey}
             />
             {showChatBar && (
               <Suspense fallback={<ChatBarFallback />}>
diff --git a/apps/desktop/src/app/desktop-controller.tsx b/apps/desktop/src/app/desktop-controller.tsx
index ca70bf4a91..2e407a2816 100644
--- a/apps/desktop/src/app/desktop-controller.tsx
+++ b/apps/desktop/src/app/desktop-controller.tsx
@@ -14,6 +14,7 @@ import {
   listSessions,
   setGlobalModel
 } from '../hermes'
+import { formatRefValue } from '../components/assistant-ui/directive-text'
 import { toChatMessages } from '../lib/chat-messages'
 import { BUILTIN_PERSONALITIES, normalizePersonalityValue, personalityNamesFromConfig } from '../lib/chat-runtime'
 import { $pinnedSessionIds, pinSession, unpinSession } from '../store/layout'
@@ -571,7 +572,7 @@ export function DesktopController() {
       gateway={gatewayRef.current}
       maxVoiceRecordingSeconds={voiceMaxRecordingSeconds}
       onAddContextRef={addContextRefAttachment}
-      onAddUrl={url => addContextRefAttachment(`@url:${url}`, url)}
+      onAddUrl={url => addContextRefAttachment(`@url:${formatRefValue(url)}`, url)}
       onBranchInNewChat={messageId => void branchInNewChat(messageId)}
       onBrowseCwd={() => void browseSessionCwd()}
       onCancel={() => void cancelRun()}
@@ -589,7 +590,7 @@ export function DesktopController() {
       onReload={reloadFromMessage}
       onRemoveAttachment={id => void removeAttachment(id)}
       onSelectPersonality={name => void selectPersonality(name)}
-      onSubmit={text => void submitText(text)}
+      onSubmit={submitText}
       onThreadMessagesChange={handleThreadMessagesChange}
       onToggleSelectedPin={toggleSelectedPin}
       onTranscribeAudio={transcribeVoiceAudio}
diff --git a/apps/desktop/src/app/session/hooks/use-message-stream.ts b/apps/desktop/src/app/session/hooks/use-message-stream.ts
index c783ab0ce1..c95f87f742 100644
--- a/apps/desktop/src/app/session/hooks/use-message-stream.ts
+++ b/apps/desktop/src/app/session/hooks/use-message-stream.ts
@@ -1,6 +1,5 @@
 import type { QueryClient } from '@tanstack/react-query'
 import { type MutableRefObject, useCallback } from 'react'
-import { flushSync } from 'react-dom'
 
 import {
   appendReasoningPart,
@@ -60,7 +59,6 @@ export function useMessageStream({
       transform: (parts: ChatMessagePart[], message: ChatMessage) => ChatMessagePart[],
       seed: () => ChatMessagePart[],
       opts: {
-        sync?: boolean
         pending?: (message: ChatMessage) => boolean
       } = {}
     ) => {
@@ -112,7 +110,7 @@ export function useMessageStream({
         })
       }
 
-      opts.sync ? flushSync(apply) : apply()
+      apply()
     },
     [updateSessionState]
   )
@@ -126,8 +124,7 @@ export function useMessageStream({
       mutateStream(
         sessionId,
         parts => appendTextPart(parts, delta),
-        () => [textPart(delta)],
-        { sync: true }
+        () => [textPart(delta)]
       )
     },
     [mutateStream]
@@ -152,8 +149,7 @@ export function useMessageStream({
 
           return appendReasoningPart(parts, delta)
         },
-        () => [reasoningPart(delta)],
-        { sync: true }
+        () => [reasoningPart(delta)]
       )
     },
     [mutateStream]
@@ -299,6 +295,7 @@ export function useMessageStream({
         const apply = explicitSid ? isActiveEvent : !activeSessionIdRef.current
         const modelChanged = typeof payload?.model === 'string'
         const providerChanged = typeof payload?.provider === 'string'
+        const runningChanged = typeof payload?.running === 'boolean'
 
         if (apply) {
           if (modelChanged) {
@@ -320,6 +317,35 @@ export function useMessageStream({
           if (typeof payload?.personality === 'string') {
             setCurrentPersonality(normalizePersonalityValue(payload.personality))
           }
+
+          if (runningChanged && sessionId) {
+            updateSessionState(sessionId, state => {
+              const busy = Boolean(payload!.running)
+
+              if (state.busy === busy && (busy || !state.awaitingResponse)) {
+                return state
+              }
+
+              if (busy) {
+                return {
+                  ...state,
+                  busy
+                }
+              }
+
+              if (state.awaitingResponse && !state.sawAssistantPayload) {
+                return state
+              }
+
+              return {
+                ...state,
+                awaitingResponse: false,
+                busy,
+                pendingBranchGroup: null,
+                streamId: null
+              }
+            })
+          }
         }
 
         void refreshHermesConfig()
@@ -355,11 +381,11 @@ export function useMessageStream({
         }
       } else if (event.type === 'reasoning.delta') {
         if (sessionId) {
-          appendReasoningDelta(sessionId, coerceGatewayText(payload?.text))
+          appendReasoningDelta(sessionId, coerceThinkingText(payload?.text))
         }
       } else if (event.type === 'reasoning.available') {
         if (sessionId) {
-          appendReasoningDelta(sessionId, coerceGatewayText(payload?.text), true)
+          appendReasoningDelta(sessionId, coerceThinkingText(payload?.text), true)
         }
       } else if (event.type === 'message.complete') {
         if (!sessionId) {
diff --git a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
index 625cd56ec7..d7d527d929 100644
--- a/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-prompt-actions.ts
@@ -13,7 +13,7 @@ import {
 import { triggerHaptic } from '@/lib/haptics'
 import { $composerAttachments, clearComposerAttachments } from '@/store/composer'
 import { clearNotifications, notify, notifyError } from '@/store/notifications'
-import { $busy, $messages, setAwaitingResponse, setBusy } from '@/store/session'
+import { $busy, $messages, setAwaitingResponse, setBusy, setMessages } from '@/store/session'
 
 import type { ClientSessionState, SlashExecResponse } from '../../types'
 
@@ -296,12 +296,34 @@ export function usePromptActions({
   )
 
   const cancelRun = useCallback(async () => {
-    if (!activeSessionId) {
+    const sessionId = activeSessionId || activeSessionIdRef.current
+
+    busyRef.current = false
+    setBusy(false)
+    setAwaitingResponse(false)
+
+    const finalizeMessages = (messages: ChatMessage[]) =>
+      messages.map(message =>
+        message.pending
+          ? {
+              ...message,
+              parts: chatMessageText(message).trim()
+                ? appendTextPart(message.parts, INTERRUPTED_MARKER)
+                : [...message.parts, textPart(INTERRUPTED_MARKER.trim())],
+              pending: false
+            }
+          : message
+      )
+
+    if (!sessionId) {
+      setMessages(finalizeMessages($messages.get()))
+
       return
     }
 
-    updateSessionState(activeSessionId, state => {
+    updateSessionState(sessionId, state => {
       const streamId = state.streamId
+
       const messages = streamId
         ? state.messages.map(message =>
             message.id === streamId
@@ -314,7 +336,7 @@ export function usePromptActions({
                 }
               : message
           )
-        : state.messages
+        : finalizeMessages(state.messages)
 
       return {
         ...state,
@@ -328,11 +350,11 @@ export function usePromptActions({
     })
 
     try {
-      await requestGateway('session.interrupt', { session_id: activeSessionId })
+      await requestGateway('session.interrupt', { session_id: sessionId })
     } catch (err) {
       notifyError(err, 'Stop failed')
     }
-  }, [activeSessionId, requestGateway, updateSessionState])
+  }, [activeSessionId, activeSessionIdRef, busyRef, requestGateway, updateSessionState])
 
   const reloadFromMessage = useCallback(
     async (parentId: string | null) => {
diff --git a/apps/desktop/src/app/session/hooks/use-session-actions.ts b/apps/desktop/src/app/session/hooks/use-session-actions.ts
index 8ab9e91b3b..a0737ea9d8 100644
--- a/apps/desktop/src/app/session/hooks/use-session-actions.ts
+++ b/apps/desktop/src/app/session/hooks/use-session-actions.ts
@@ -87,6 +87,11 @@ export function useSessionActions({
 
   const createBackendSessionForSend = useCallback(async (): Promise<string | null> => {
     const created = await requestGateway<SessionCreateResponse>('session.create', { cols: 96 })
+
+    if (created.stored_session_id) {
+      navigate(sessionRoute(created.stored_session_id), { replace: true })
+    }
+
     setActiveSessionId(created.session_id)
     activeSessionIdRef.current = created.session_id
     ensureSessionState(created.session_id, created.stored_session_id ?? null)
@@ -94,7 +99,6 @@ export function useSessionActions({
     if (created.stored_session_id) {
       setSelectedStoredSessionId(created.stored_session_id)
       selectedStoredSessionIdRef.current = created.stored_session_id
-      navigate(sessionRoute(created.stored_session_id), { replace: true })
     }
 
     if (created.info?.model) {
diff --git a/apps/desktop/src/app/settings/constants.ts b/apps/desktop/src/app/settings/constants.ts
index a854842dca..c663eb5c6e 100644
--- a/apps/desktop/src/app/settings/constants.ts
+++ b/apps/desktop/src/app/settings/constants.ts
@@ -60,6 +60,7 @@ export const ENUM_OPTIONS: Record<string, string[]> = {
   'context.engine': ['compressor', 'default', 'custom'],
   'delegation.reasoning_effort': ['', 'minimal', 'low', 'medium', 'high', 'xhigh'],
   'memory.provider': ['', 'builtin', 'honcho'],
+  'stt.elevenlabs.model_id': ['scribe_v2', 'scribe_v1'],
   'stt.local.model': ['tiny', 'base', 'small', 'medium', 'large-v3'],
   'tts.openai.voice': ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
 }
@@ -101,6 +102,10 @@ export const FIELD_LABELS: Record<string, string> = {
   'stt.provider': 'Speech-To-Text Provider',
   'stt.local.model': 'Local Transcription Model',
   'stt.local.language': 'Transcription Language',
+  'stt.elevenlabs.model_id': 'ElevenLabs STT Model',
+  'stt.elevenlabs.language_code': 'ElevenLabs Language',
+  'stt.elevenlabs.tag_audio_events': 'Tag Audio Events',
+  'stt.elevenlabs.diarize': 'Speaker Diarization',
   'tts.provider': 'Text-To-Speech Provider',
   'tts.edge.voice': 'Edge Voice',
   'tts.openai.model': 'OpenAI TTS Model',
@@ -157,6 +162,7 @@ export const FIELD_DESCRIPTIONS: Record<string, string> = {
   'compression.enabled': 'Summarize older context when conversations get large.',
   'voice.auto_tts': 'Automatically speak assistant responses.',
   'stt.enabled': 'Enable local or provider-backed speech transcription.',
+  'stt.elevenlabs.language_code': 'Optional ISO-639-3 language code. Blank lets ElevenLabs auto-detect.',
   'agent.max_turns': 'Upper bound for tool-calling turns before Hermes stops a run.'
 }
 
@@ -241,6 +247,10 @@ export const SECTIONS: DesktopConfigSection[] = [
       'tts.elevenlabs.model_id',
       'stt.local.model',
       'stt.local.language',
+      'stt.elevenlabs.model_id',
+      'stt.elevenlabs.language_code',
+      'stt.elevenlabs.tag_audio_events',
+      'stt.elevenlabs.diarize',
       'voice.record_key',
       'voice.max_recording_seconds'
     ]
diff --git a/apps/desktop/src/components/assistant-ui/directive-text.test.ts b/apps/desktop/src/components/assistant-ui/directive-text.test.ts
new file mode 100644
index 0000000000..60c89f18b1
--- /dev/null
+++ b/apps/desktop/src/components/assistant-ui/directive-text.test.ts
@@ -0,0 +1,39 @@
+import { describe, expect, it } from 'vitest'
+
+import { formatRefValue, hermesDirectiveFormatter } from './directive-text'
+
+describe('formatRefValue', () => {
+  it('leaves simple paths untouched', () => {
+    expect(formatRefValue('src/index.ts')).toBe('src/index.ts')
+    expect(formatRefValue('https://example.com/post')).toBe('https://example.com/post')
+  })
+
+  it('wraps paths with whitespace in backticks', () => {
+    expect(formatRefValue('apple-touch-icon (1).png')).toBe('`apple-touch-icon (1).png`')
+  })
+
+  it('falls back to double quotes when value contains backticks', () => {
+    expect(formatRefValue('weird `name` (1).md')).toBe('"weird `name` (1).md"')
+  })
+})
+
+describe('hermesDirectiveFormatter.parse', () => {
+  it('keeps quoted file paths whole when parsing', () => {
+    const segments = hermesDirectiveFormatter.parse('see @image:`apple-touch-icon (1).png` for the icon')
+
+    expect(segments).toEqual([
+      { kind: 'text', text: 'see ' },
+      { kind: 'mention', type: 'image', label: 'apple-touch-icon (1).png', id: 'apple-touch-icon (1).png' },
+      { kind: 'text', text: ' for the icon' }
+    ])
+  })
+
+  it('still parses unquoted paths', () => {
+    const segments = hermesDirectiveFormatter.parse('@file:src/main.tsx the entry point')
+
+    expect(segments).toEqual([
+      { kind: 'mention', type: 'file', label: 'main.tsx', id: 'src/main.tsx' },
+      { kind: 'text', text: ' the entry point' }
+    ])
+  })
+})
diff --git a/apps/desktop/src/components/assistant-ui/directive-text.tsx b/apps/desktop/src/components/assistant-ui/directive-text.tsx
index 383baed7c2..2c5c40d7e5 100644
--- a/apps/desktop/src/components/assistant-ui/directive-text.tsx
+++ b/apps/desktop/src/components/assistant-ui/directive-text.tsx
@@ -24,10 +24,63 @@ const ICONS: Record<HermesRefType, ComponentType<{ className?: string }>> = {
  * so they render as inline chips in user messages instead of raw text.
  *
  * Supported types: file, folder, url, image. Anything else stays plain text.
+ *
+ * Mirrors the Python `agent/context_references.REFERENCE_PATTERN` syntax:
+ * the value may be wrapped in backticks, single quotes, or double quotes so
+ * paths with spaces/parens/etc. survive parsing intact.
  */
-const CANONICAL_DIRECTIVE_RE = /:([\w-]{1,64})\[([^\]\n]{1,1024})\](?:\{name=([^}\n]{1,1024})\})?/gu
+const CANONICAL_DIRECTIVE_RE = /:([\w-]{1,64})\[([^\]\n]{1,1024})\](?:\{name=([^}\n]{1,1024})\})?/g
 
-const HERMES_DIRECTIVE_RE = /@(file|folder|url|image|tool):(\S+)/gu
+const HERMES_DIRECTIVE_RE = new RegExp(
+  '@(file|folder|url|image|tool):(' +
+    '`[^`\\n]+`' +
+    '|"[^"\\n]+"' +
+    "|'[^'\\n]+'" +
+    '|\\S+' +
+    ')',
+  'g'
+)
+
+const TRAILING_PUNCTUATION_RE = /[,.;!?]+$/
+
+function unwrapRefValue(raw: string): string {
+  if (raw.length < 2) {
+    return raw
+  }
+
+  const head = raw[0]
+  const tail = raw[raw.length - 1]
+
+  if ((head === '`' && tail === '`') || (head === '"' && tail === '"') || (head === "'" && tail === "'")) {
+    return raw.slice(1, -1)
+  }
+
+  return raw.replace(TRAILING_PUNCTUATION_RE, '')
+}
+
+function needsQuoting(value: string): boolean {
+  return /[\s()\[\]{}<>"'`]/.test(value)
+}
+
+export function formatRefValue(value: string): string {
+  if (!needsQuoting(value)) {
+    return value
+  }
+
+  if (!value.includes('`')) {
+    return `\`${value}\``
+  }
+
+  if (!value.includes('"')) {
+    return `"${value}"`
+  }
+
+  if (!value.includes("'")) {
+    return `'${value}'`
+  }
+
+  return value
+}
 
 export const hermesDirectiveFormatter: Unstable_DirectiveFormatter = {
   serialize(item: Unstable_TriggerItem): string {
@@ -35,7 +88,7 @@ export const hermesDirectiveFormatter: Unstable_DirectiveFormatter = {
       return `@${item.id}`
     }
 
-    return `@${item.type}:${item.id}`
+    return `@${item.type}:${formatRefValue(item.id)}`
   },
   parse(text: string): readonly Unstable_DirectiveSegment[] {
     return parseDirectiveText(text)
@@ -51,13 +104,17 @@ function parseDirectiveText(text: string): Unstable_DirectiveSegment[] {
       label: match[2] || match[3] || '',
       id: match[3] || match[2] || ''
     })),
-    ...Array.from(text.matchAll(HERMES_DIRECTIVE_RE)).map(match => ({
-      start: match.index ?? 0,
-      end: (match.index ?? 0) + match[0].length,
-      type: match[1] || 'file',
-      label: shortLabel(match[1] as HermesRefType, match[2] || ''),
-      id: match[2] || ''
-    }))
+    ...Array.from(text.matchAll(HERMES_DIRECTIVE_RE)).map(match => {
+      const id = unwrapRefValue(match[2] || '')
+
+      return {
+        start: match.index ?? 0,
+        end: (match.index ?? 0) + match[0].length,
+        type: match[1] || 'file',
+        label: shortLabel(match[1] as HermesRefType, id),
+        id
+      }
+    })
   ]
     .filter(match => match.id)
     .sort((a, b) => a.start - b.start)
@@ -136,14 +193,14 @@ const DirectiveChip: FC<{
   return (
     <span
       className={cn(
-        'mx-0.5 inline-flex max-w-56 items-center gap-1 rounded-full border border-border/80 bg-background/95 px-1.5 py-0.5 align-[0.05em] text-[0.82em] font-medium leading-none text-foreground shadow-sm ring-1 ring-black/3'
+        'mx-0.5 inline-flex max-w-64 items-center gap-1 rounded-full bg-[color-mix(in_srgb,var(--dt-primary)_16%,transparent)] px-2 py-0.5 align-[0.02em] text-[0.92em] font-semibold leading-tight text-primary ring-1 ring-inset ring-primary/10'
       )}
       data-directive-id={id}
       data-directive-type={type}
       data-slot="aui_directive-chip"
       title={id}
     >
-      {Icon && <Icon className="size-3 shrink-0 text-muted-foreground" />}
+      {Icon && <Icon className="size-3.5 shrink-0 text-primary" />}
       <span className="truncate">{label}</span>
     </span>
   )
diff --git a/apps/desktop/src/components/assistant-ui/intro.tsx b/apps/desktop/src/components/assistant-ui/intro.tsx
index 22f1c50803..ab509ad6d6 100644
--- a/apps/desktop/src/components/assistant-ui/intro.tsx
+++ b/apps/desktop/src/components/assistant-ui/intro.tsx
@@ -19,6 +19,7 @@ export type IntroProps = {
 const NEUTRAL_PERSONALITIES = new Set(['', 'default', 'none', 'neutral'])
 
 const HERMES_FRAME_COUNT = 8
+const ASSET_BASE_URL = import.meta.env.BASE_URL || '/'
 
 const FALLBACK_COPY: IntroCopy[] = [
   {
@@ -154,6 +155,10 @@ function resolveCopy(personality?: string, seed?: number): IntroCopy {
   return pickCopy(copies, seed)
 }
 
+function publicAssetPath(path: string): string {
+  return `${ASSET_BASE_URL}${path}`.replace(/([^:]\/)\/+/g, '$1')
+}
+
 export const Intro: FC<IntroProps> = ({ personality, seed }) => {
   const [mountSeed] = useState(() => Math.floor(Math.random() * 100000))
   const [frameOffset, setFrameOffset] = useState(0)
@@ -184,7 +189,7 @@ export const Intro: FC<IntroProps> = ({ personality, seed }) => {
           aria-hidden="true"
           className="h-full w-full scale-110 object-contain select-none"
           draggable={false}
-          src={`/hermes-frames/hermes-frame-${frameIndex}.png?v=matte-clean-6`}
+          src={publicAssetPath(`hermes-frames/hermes-frame-${frameIndex}.png?v=matte-clean-6`)}
         />
       </button>
       <p className="mb-3 text-xs font-medium uppercase tracking-[0.18em] text-muted-foreground/75">Hermes Agent</p>
diff --git a/apps/desktop/src/components/assistant-ui/streaming.test.tsx b/apps/desktop/src/components/assistant-ui/streaming.test.tsx
index e45c0d98df..80683a6984 100644
--- a/apps/desktop/src/components/assistant-ui/streaming.test.tsx
+++ b/apps/desktop/src/components/assistant-ui/streaming.test.tsx
@@ -1,19 +1,53 @@
 import { AssistantRuntimeProvider, type ThreadMessage, useExternalStoreRuntime } from '@assistant-ui/react'
-import { act, render, screen, waitFor } from '@testing-library/react'
+import { act, fireEvent, render, screen, waitFor } from '@testing-library/react'
 import { useEffect, useState } from 'react'
-import { describe, expect, it, vi } from 'vitest'
+import { beforeEach, describe, expect, it, vi } from 'vitest'
 
 import { Thread } from './thread'
 
 const createdAt = new Date('2026-05-01T00:00:00.000Z')
 
+const resizeObservers = new Set<TestResizeObserver>()
+
 class TestResizeObserver {
-  observe() {}
+  private target: Element | null = null
+
+  constructor(private readonly callback: ResizeObserverCallback) {
+    resizeObservers.add(this)
+  }
+
+  observe(target: Element) {
+    this.target = target
+  }
+
   unobserve() {}
-  disconnect() {}
+
+  disconnect() {
+    resizeObservers.delete(this)
+  }
+
+  trigger(height: number) {
+    if (!this.target) {
+      return
+    }
+
+    this.callback(
+      [
+        {
+          contentRect: { height } as DOMRectReadOnly,
+          target: this.target
+        } as ResizeObserverEntry
+      ],
+      this as unknown as ResizeObserver
+    )
+  }
 }
 
 vi.stubGlobal('ResizeObserver', TestResizeObserver)
+vi.stubGlobal('requestAnimationFrame', (callback: FrameRequestCallback) =>
+  window.setTimeout(() => callback(performance.now()), 0)
+)
+vi.stubGlobal('cancelAnimationFrame', (id: number) => window.clearTimeout(id))
 
 Element.prototype.scrollTo = function scrollTo() {}
 
@@ -90,6 +124,10 @@ function StreamingHarness() {
 }
 
 describe('assistant-ui streaming renderer', () => {
+  beforeEach(() => {
+    resizeObservers.clear()
+  })
+
   it('renders assistant text incrementally before completion', async () => {
     const { container } = render(<StreamingHarness />)
 
@@ -115,4 +153,42 @@ describe('assistant-ui streaming renderer', () => {
       expect(container.textContent).toContain('first chunk second chunk')
     })
   })
+
+  it('does not pull the viewport back down after the user scrolls up during streaming', async () => {
+    const { container } = render(<StreamingHarness />)
+
+    const viewport = container.querySelector('[data-slot="aui_thread-viewport"]') as HTMLDivElement
+    let scrollHeight = 1_000
+
+    Object.defineProperty(viewport, 'clientHeight', { configurable: true, value: 200 })
+    Object.defineProperty(viewport, 'scrollHeight', {
+      configurable: true,
+      get: () => scrollHeight
+    })
+
+    await wait(80)
+
+    await act(async () => {
+      viewport.scrollTop = 800
+      fireEvent.scroll(viewport)
+    })
+    await wait(0)
+
+    await act(async () => {
+      fireEvent.wheel(viewport, { deltaY: -120 })
+      viewport.scrollTop = 420
+      fireEvent.scroll(viewport)
+    })
+
+    scrollHeight = 1_200
+
+    await act(async () => {
+      for (const observer of resizeObservers) {
+        observer.trigger(1_200)
+      }
+    })
+    await wait(0)
+
+    expect(viewport.scrollTop).toBe(420)
+  })
 })
diff --git a/apps/desktop/src/components/assistant-ui/thread.tsx b/apps/desktop/src/components/assistant-ui/thread.tsx
index 6218788fe0..33632ef0f0 100644
--- a/apps/desktop/src/components/assistant-ui/thread.tsx
+++ b/apps/desktop/src/components/assistant-ui/thread.tsx
@@ -8,18 +8,28 @@ import {
   type ToolCallMessagePartProps,
   useAuiState
 } from '@assistant-ui/react'
+import { useStore } from '@nanostores/react'
 import {
   CheckIcon,
   ChevronLeftIcon,
   ChevronRightIcon,
   CopyIcon,
   GitBranchIcon,
+  Loader2Icon,
   MoreHorizontalIcon,
   RefreshCwIcon,
   Volume2Icon,
   VolumeXIcon
 } from 'lucide-react'
-import { type FC, type ReactNode, useCallback, useEffect, useLayoutEffect, useRef, useState } from 'react'
+import {
+  type FC,
+  type ReactNode,
+  useCallback,
+  useEffect,
+  useLayoutEffect,
+  useRef,
+  useState
+} from 'react'
 
 import { useElapsedSeconds } from '@/components/assistant-ui/activity-timer'
 import { ActivityTimerText } from '@/components/assistant-ui/activity-timer-text'
@@ -38,11 +48,12 @@ import {
   DropdownMenuTrigger
 } from '@/components/ui/dropdown-menu'
 import { Loader } from '@/components/ui/loader'
-import { speakText } from '@/hermes'
 import { triggerHaptic } from '@/lib/haptics'
 import { cn } from '@/lib/utils'
+import { playSpeechText, stopVoicePlayback } from '@/lib/voice-playback'
 import { notifyError } from '@/store/notifications'
 import { setThreadScrolledUp } from '@/store/thread-scroll'
+import { $voicePlayback } from '@/store/voice-playback'
 
 const THINKING_FACES = [
   '(｡•́︿•̀｡)',
@@ -119,12 +130,16 @@ export const Thread: FC<{
   intro?: IntroProps
   loading?: ThreadLoadingState
   onBranchInNewChat?: (messageId: string) => void
-}> = ({ intro, loading, onBranchInNewChat }) => {
+  sessionKey?: string | null
+}> = ({ intro, loading, onBranchInNewChat, sessionKey }) => {
   const viewportRef = useRef<HTMLDivElement | null>(null)
+  const contentRef = useRef<HTMLDivElement | null>(null)
   const messageCount = useAuiState(s => s.thread.messages.length)
   const isRunning = useAuiState(s => s.thread.isRunning)
   const lastMessageId = useAuiState(s => s.thread.messages.at(-1)?.id ?? '')
   const shouldStickToBottomRef = useRef(true)
+  const scrollFrameRef = useRef<number | null>(null)
+  const sessionKeyRef = useRef<string | null>(sessionKey ?? null)
 
   const handleScroll = useCallback((event: React.UIEvent<HTMLDivElement>) => {
     const nearBottom = isNearBottom(event.currentTarget)
@@ -132,8 +147,44 @@ export const Thread: FC<{
     setThreadScrolledUp(!nearBottom)
   }, [])
 
+  const handleWheel = useCallback((event: React.WheelEvent<HTMLDivElement>) => {
+    if (event.deltaY < 0) {
+      shouldStickToBottomRef.current = false
+      setThreadScrolledUp(true)
+    }
+  }, [])
+
+  const scrollToBottom = useCallback(() => {
+    const viewport = viewportRef.current
+
+    if (!viewport) {
+      return
+    }
+
+    viewport.scrollTop = viewport.scrollHeight
+    shouldStickToBottomRef.current = true
+    setThreadScrolledUp(false)
+  }, [])
+
+  const scheduleScrollToBottom = useCallback(() => {
+    if (scrollFrameRef.current !== null) {
+      window.cancelAnimationFrame(scrollFrameRef.current)
+    }
+
+    scrollFrameRef.current = window.requestAnimationFrame(() => {
+      scrollFrameRef.current = null
+      scrollToBottom()
+    })
+  }, [scrollToBottom])
+
   useEffect(() => {
-    return () => setThreadScrolledUp(false)
+    return () => {
+      if (scrollFrameRef.current !== null) {
+        window.cancelAnimationFrame(scrollFrameRef.current)
+      }
+
+      setThreadScrolledUp(false)
+    }
   }, [])
 
   useLayoutEffect(() => {
@@ -143,16 +194,48 @@ export const Thread: FC<{
       return
     }
 
-    const force = loading === 'session'
+    const nextSessionKey = sessionKey ?? null
+    const sessionChanged = sessionKeyRef.current !== nextSessionKey
+    sessionKeyRef.current = nextSessionKey
+    const force = loading === 'session' || sessionChanged
 
     if (!force && !shouldStickToBottomRef.current) {
       return
     }
 
-    viewport.scrollTop = viewport.scrollHeight
-    shouldStickToBottomRef.current = true
-    setThreadScrolledUp(false)
-  }, [isRunning, lastMessageId, loading, messageCount])
+    scheduleScrollToBottom()
+  }, [isRunning, lastMessageId, loading, messageCount, scheduleScrollToBottom, sessionKey])
+
+  useLayoutEffect(() => {
+    const content = contentRef.current
+    const viewport = viewportRef.current
+
+    if (!content || !viewport) {
+      return
+    }
+
+    let previousHeight = content.getBoundingClientRect().height
+
+    const observer = new ResizeObserver(entries => {
+      const height = entries[0]?.contentRect.height ?? content.getBoundingClientRect().height
+
+      if (height === previousHeight) {
+        return
+      }
+
+      previousHeight = height
+
+      if (!shouldStickToBottomRef.current && !isNearBottom(viewport)) {
+        return
+      }
+
+      scheduleScrollToBottom()
+    })
+
+    observer.observe(content)
+
+    return () => observer.disconnect()
+  }, [scheduleScrollToBottom])
 
   return (
     <GeneratedImageProvider>
@@ -160,15 +243,17 @@ export const Thread: FC<{
         <AuiIf condition={s => Boolean(intro) && s.thread.isEmpty}>{intro && <Intro {...intro} />}</AuiIf>
 
         <ThreadPrimitive.Viewport
-          className="h-full min-h-0 overflow-y-auto overscroll-contain px-[clamp(1rem,10%,12rem)] pt-[calc(var(--vsq)*19)] scroll-smooth"
+          autoScroll={false}
+          className="h-full min-h-0 overflow-y-auto overscroll-contain px-[clamp(1rem,10%,12rem)] pt-[calc(var(--vsq)*19)]"
           data-slot="aui_thread-viewport"
           onScroll={handleScroll}
+          onWheel={handleWheel}
           ref={viewportRef}
           scrollToBottomOnInitialize
           scrollToBottomOnRunStart
           scrollToBottomOnThreadSwitch
         >
-          <div className="flex w-full flex-col gap-3">
+          <div className="flex w-full flex-col gap-3" ref={contentRef}>
             <ThreadPrimitive.Messages>{() => <ThreadMessage onBranchInNewChat={onBranchInNewChat} />}</ThreadPrimitive.Messages>
             {loading === 'response' && <ResponseLoadingIndicator />}
             {loading === 'working' && <WorkingIndicator />}
@@ -446,7 +531,7 @@ const AssistantActionBar: FC<MessageActionProps> = ({ messageId, messageText, on
               <GitBranchIcon />
               Branch in new chat
             </DropdownMenuItem>
-            <ReadAloudItem text={messageText} />
+            <ReadAloudItem messageId={messageId} text={messageText} />
           </DropdownMenuContent>
         </DropdownMenu>
       </ActionBarPrimitive.Root>
@@ -479,80 +564,39 @@ const CopyMessageButton: FC<{ text: string }> = ({ text }) => {
   )
 }
 
-let currentAudio: HTMLAudioElement | null = null
+const ReadAloudItem: FC<{ messageId: string; text: string }> = ({ messageId, text }) => {
+  const voicePlayback = useStore($voicePlayback)
 
-function stopCurrentAudio() {
-  if (!currentAudio) {
-    return
-  }
+  const readAloudStatus =
+    voicePlayback.source === 'read-aloud' && voicePlayback.messageId === messageId ? voicePlayback.status : 'idle'
 
-  currentAudio.pause()
-  currentAudio.src = ''
-  currentAudio = null
-}
-
-const ReadAloudItem: FC<{ text: string }> = ({ text }) => {
-  const [reading, setReading] = useState(false)
-  const seqRef = useRef(0)
-
-  const stop = useCallback(() => {
-    seqRef.current += 1
-    stopCurrentAudio()
-    setReading(false)
-  }, [])
+  const isPreparing = readAloudStatus === 'preparing'
+  const isSpeaking = readAloudStatus === 'speaking'
+  const anyPlaybackActive = voicePlayback.status !== 'idle'
+  const Icon = isPreparing ? Loader2Icon : isSpeaking ? VolumeXIcon : Volume2Icon
 
   const read = useCallback(async () => {
-    if (!text) {
+    if (!text || $voicePlayback.get().status !== 'idle') {
       return
     }
 
-    stopCurrentAudio()
-    const seq = ++seqRef.current
-    const isCurrent = () => seq === seqRef.current
-
-    const finish = () => {
-      if (!isCurrent()) {
-        return
-      }
-
-      currentAudio = null
-      setReading(false)
-    }
-
-    setReading(true)
-
     try {
-      const { data_url } = await speakText(text)
-
-      if (!isCurrent()) {
-        return
-      }
-
-      const audio = new Audio(data_url)
-      currentAudio = audio
-      audio.addEventListener('ended', finish, { once: true })
-      audio.addEventListener('error', finish, { once: true })
-      await audio.play()
+      await playSpeechText(text, { messageId, source: 'read-aloud' })
     } catch (error) {
-      if (isCurrent()) {
-        notifyError(error, 'Read aloud failed')
-        finish()
-      }
+      notifyError(error, 'Read aloud failed')
     }
-  }, [text])
-
-  const Icon = reading ? VolumeXIcon : Volume2Icon
+  }, [messageId, text])
 
   return (
     <DropdownMenuItem
-      disabled={!reading && !text}
+      disabled={isPreparing || (!isSpeaking && (anyPlaybackActive || !text))}
       onSelect={e => {
         e.preventDefault()
-        void (reading ? stop() : read())
+        void (isSpeaking ? stopVoicePlayback() : read())
       }}
     >
-      <Icon />
-      {reading ? 'Stop reading' : 'Read aloud'}
+      <Icon className={isPreparing ? 'animate-spin' : undefined} />
+      {isPreparing ? 'Preparing audio...' : isSpeaking ? 'Stop reading' : 'Read aloud'}
     </DropdownMenuItem>
   )
 }
diff --git a/apps/desktop/src/lib/chat-messages.test.ts b/apps/desktop/src/lib/chat-messages.test.ts
new file mode 100644
index 0000000000..f0b742de03
--- /dev/null
+++ b/apps/desktop/src/lib/chat-messages.test.ts
@@ -0,0 +1,18 @@
+import { describe, expect, it } from 'vitest'
+
+import { chatMessageText, toChatMessages } from './chat-messages'
+
+describe('toChatMessages', () => {
+  it('hides attached context payloads from user message display', () => {
+    const [message] = toChatMessages([
+      {
+        role: 'user',
+        content:
+          'what is this file\n\n--- Attached Context ---\n\n📄 @file:tsconfig.tsbuildinfo (981 tokens)\n```json\n{"root":["./src/main.tsx"]}\n```',
+        timestamp: 1
+      }
+    ])
+
+    expect(chatMessageText(message)).toBe('@file:tsconfig.tsbuildinfo\n\nwhat is this file')
+  })
+})
diff --git a/apps/desktop/src/lib/chat-messages.ts b/apps/desktop/src/lib/chat-messages.ts
index c02f3f02d4..d891df7688 100644
--- a/apps/desktop/src/lib/chat-messages.ts
+++ b/apps/desktop/src/lib/chat-messages.ts
@@ -29,6 +29,7 @@ export type GatewayEventPayload = {
   todos?: unknown
   model?: string
   provider?: string
+  running?: boolean
   cwd?: string
   branch?: string
   personality?: string
@@ -49,6 +50,28 @@ export function chatMessageText(message: ChatMessage): string {
     .join('')
 }
 
+const ATTACHED_CONTEXT_MARKER_RE = /(?:^|\n)--- Attached Context ---\s*\n/
+const CONTEXT_WARNINGS_MARKER_RE = /(?:^|\n)--- Context Warnings ---[\s\S]*$/
+const CONTEXT_REF_RE = /@(file|folder|url|image|tool):(?:"[^"\n]+"|'[^'\n]+'|`[^`\n]+`|\S+)/g
+
+function displayContentForMessage(role: SessionMessage['role'], content: string): string {
+  if (role !== 'user') {
+    return content
+  }
+
+  const marker = content.match(ATTACHED_CONTEXT_MARKER_RE)
+
+  if (!marker || marker.index === undefined) {
+    return content.replace(CONTEXT_WARNINGS_MARKER_RE, '').trim()
+  }
+
+  const visibleText = content.slice(0, marker.index).replace(CONTEXT_WARNINGS_MARKER_RE, '').trim()
+  const attachedContext = content.slice(marker.index + marker[0].length)
+  const refs = [...new Set(Array.from(attachedContext.matchAll(CONTEXT_REF_RE)).map(match => match[0]))]
+
+  return [refs.join('\n'), visibleText].filter(Boolean).join('\n\n') || visibleText
+}
+
 export function appendTextPart(parts: ChatMessagePart[], delta: string): ChatMessagePart[] {
   const next = [...parts]
   const last = next.at(-1)
@@ -363,6 +386,7 @@ export function toChatMessages(messages: SessionMessage[]): ChatMessage[] {
     }
 
     const content = message.content || message.text || message.context || message.name || ''
+    const displayContent = displayContentForMessage(message.role, content)
     const parts: ChatMessagePart[] = []
 
     const reasoning =
@@ -374,8 +398,8 @@ export function toChatMessages(messages: SessionMessage[]): ChatMessage[] {
       parts.push(reasoningPart(reasoning))
     }
 
-    if (content) {
-      parts.push(textPart(content))
+    if (displayContent) {
+      parts.push(textPart(displayContent))
     }
 
     if (message.role === 'assistant' && Array.isArray(message.tool_calls)) {
diff --git a/apps/desktop/src/lib/chat-runtime.test.ts b/apps/desktop/src/lib/chat-runtime.test.ts
new file mode 100644
index 0000000000..c06ea6f324
--- /dev/null
+++ b/apps/desktop/src/lib/chat-runtime.test.ts
@@ -0,0 +1,18 @@
+import { describe, expect, it } from 'vitest'
+
+import { coerceThinkingText } from './chat-runtime'
+
+describe('coerceThinkingText', () => {
+  it('strips streaming status prefixes from thinking deltas', () => {
+    expect(coerceThinkingText("◉_◉ processing... checking the user's request")).toBe("checking the user's request")
+    expect(coerceThinkingText('(¬‿¬) analyzing... reading the file')).toBe('reading the file')
+  })
+
+  it('drops empty thinking rewrite placeholder text', () => {
+    expect(
+      coerceThinkingText(
+        "◉_◉ processing... I don't see any current rewritten thinking or next thinking to process. Could you provide the thinking content you'd like me to rewrite?"
+      )
+    ).toBe('')
+  })
+})
diff --git a/apps/desktop/src/lib/chat-runtime.ts b/apps/desktop/src/lib/chat-runtime.ts
index 488155de05..011cd6001a 100644
--- a/apps/desktop/src/lib/chat-runtime.ts
+++ b/apps/desktop/src/lib/chat-runtime.ts
@@ -2,6 +2,7 @@ import type { ThreadMessage } from '@assistant-ui/react'
 
 import type { QuickModelOption } from '@/app/chat/composer/types'
 import type { ClientSessionState, CommandDispatchResponse } from '@/app/types'
+import { formatRefValue } from '@/components/assistant-ui/directive-text'
 import { type ChatMessage, type ChatMessagePart, chatMessageText, textPart } from '@/lib/chat-messages'
 import type { ComposerAttachment } from '@/store/composer'
 import type { ModelOptionsResponse, SessionInfo } from '@/types/hermes'
@@ -25,7 +26,11 @@ export const BUILTIN_PERSONALITIES = [
   'hype'
 ]
 
-const SPINNER_STATUS_RE = /^\s*[（(][^\s)）]{1,8}[)）]\s+[^.\n]{2,48}\.\.\.\s*/
+const THINKING_STATUS_PREFIX_RE =
+  /^\s*(?:(?:[^\s.]{1,16})\s+)?(?:processing|thinking|reasoning|analyzing|pondering|contemplating|musing|cogitating|ruminating|deliberating|mulling|reflecting|computing|synthesizing|formulating|brainstorming)\.\.\.\s*/i
+
+const EMPTY_THINKING_PLACEHOLDER_RE =
+  /\b(?:current rewritten thinking|next thinking to process|provide the thinking content|don't see any .*thinking)\b/i
 
 export function createClientSessionState(
   storedSessionId: string | null = null,
@@ -102,7 +107,9 @@ export function coerceGatewayText(value: unknown): string {
 }
 
 export function coerceThinkingText(value: unknown): string {
-  return coerceGatewayText(value).replace(SPINNER_STATUS_RE, '').trim()
+  const text = coerceGatewayText(value).replace(THINKING_STATUS_PREFIX_RE, '').trim()
+
+  return EMPTY_THINKING_PLACEHOLDER_RE.test(text) ? '' : text
 }
 
 export function isImageGenerationTool(name?: string): boolean {
@@ -135,7 +142,7 @@ export function attachmentDisplayText(attachment: ComposerAttachment): string |
   if (attachment.kind === 'image') {
     const id = attachment.detail || attachment.path || attachment.label
 
-    return id ? `@image:${id}` : null
+    return id ? `@image:${formatRefValue(id)}` : null
   }
 
   return null
diff --git a/apps/desktop/src/lib/speech-text.ts b/apps/desktop/src/lib/speech-text.ts
new file mode 100644
index 0000000000..d2b6a5852f
--- /dev/null
+++ b/apps/desktop/src/lib/speech-text.ts
@@ -0,0 +1,19 @@
+const EMOJI_RE = /[\p{Extended_Pictographic}\uFE0F\u200D]+/gu
+const FENCED_CODE_RE = /```[\s\S]*?(?:```|$)/g
+const INLINE_CODE_RE = /`([^`]+)`/g
+const MARKDOWN_LINK_RE = /\[([^\]]+)\]\(([^)]+)\)/g
+const URL_RE = /\bhttps?:\/\/\S+/gi
+
+export function sanitizeTextForSpeech(text: string): string {
+  return text
+    .replace(FENCED_CODE_RE, ' ')
+    .replace(MARKDOWN_LINK_RE, '$1')
+    .replace(INLINE_CODE_RE, '$1')
+    .replace(URL_RE, ' link ')
+    .replace(EMOJI_RE, ' ')
+    .replace(/^#{1,6}\s+/gm, '')
+    .replace(/[*_~>#]/g, '')
+    .replace(/^\s*[-+*]\s+/gm, '')
+    .replace(/\s+/g, ' ')
+    .trim()
+}
diff --git a/apps/desktop/src/lib/voice-playback.ts b/apps/desktop/src/lib/voice-playback.ts
new file mode 100644
index 0000000000..5afffe4ae6
--- /dev/null
+++ b/apps/desktop/src/lib/voice-playback.ts
@@ -0,0 +1,96 @@
+import { speakText } from '@/hermes'
+import {
+  $voicePlayback,
+  setVoicePlaybackState,
+  type VoicePlaybackSource,
+  type VoicePlaybackState
+} from '@/store/voice-playback'
+
+import { sanitizeTextForSpeech } from './speech-text'
+
+let currentAudio: HTMLAudioElement | null = null
+let sequence = 0
+
+function currentState(status: VoicePlaybackState['status'], options?: VoicePlaybackOptions): VoicePlaybackState {
+  return {
+    messageId: options?.messageId ?? null,
+    sequence,
+    source: options?.source ?? null,
+    status
+  }
+}
+
+export interface VoicePlaybackOptions {
+  messageId?: string | null
+  source: VoicePlaybackSource
+}
+
+export function stopVoicePlayback() {
+  sequence += 1
+
+  if (currentAudio) {
+    currentAudio.pause()
+    currentAudio.src = ''
+    currentAudio = null
+  }
+
+  setVoicePlaybackState({
+    messageId: null,
+    sequence,
+    source: null,
+    status: 'idle'
+  })
+}
+
+export async function playSpeechText(text: string, options: VoicePlaybackOptions): Promise<boolean> {
+  stopVoicePlayback()
+
+  const speakableText = sanitizeTextForSpeech(text)
+
+  if (!speakableText) {
+    return false
+  }
+
+  const ownSequence = sequence
+  const isCurrent = () => ownSequence === sequence
+
+  setVoicePlaybackState(currentState('preparing', options))
+
+  try {
+    const response = await speakText(speakableText)
+
+    if (!isCurrent()) {
+      return false
+    }
+
+    const audio = new Audio(response.data_url)
+    currentAudio = audio
+    setVoicePlaybackState(currentState('speaking', options))
+
+    await new Promise<void>((resolve, reject) => {
+      audio.addEventListener('ended', () => resolve(), { once: true })
+      audio.addEventListener('error', () => reject(new Error('Playback failed')), { once: true })
+      void audio.play().catch(reject)
+    })
+
+    if (!isCurrent()) {
+      return false
+    }
+
+    currentAudio = null
+    setVoicePlaybackState(currentState('idle'))
+
+    return true
+  } catch (error) {
+    if (isCurrent()) {
+      currentAudio = null
+      setVoicePlaybackState(currentState('idle'))
+    }
+
+    throw error
+  }
+}
+
+export function isVoicePlaybackActive() {
+  return $voicePlayback.get().status !== 'idle'
+}
diff --git a/apps/desktop/src/store/notifications.ts b/apps/desktop/src/store/notifications.ts
index b2afaab9ba..91adbf9279 100644
--- a/apps/desktop/src/store/notifications.ts
+++ b/apps/desktop/src/store/notifications.ts
@@ -50,6 +50,13 @@ const ERROR_SUMMARIES: { test: (msg: string) => boolean; summarize: (msg: string
     test: msg => /neither voice_tools_openai_key nor openai_api_key is set/i.test(msg),
     summarize: () => 'OpenAI TTS needs VOICE_TOOLS_OPENAI_KEY or OPENAI_API_KEY.'
   },
+  {
+    test: msg => /ELEVENLABS_API_KEY not set/i.test(msg) || /ElevenLabs STT API error \(HTTP 401\)/i.test(msg),
+    summarize: msg =>
+      /ELEVENLABS_API_KEY not set/i.test(msg)
+        ? 'ElevenLabs STT needs ELEVENLABS_API_KEY.'
+        : 'ElevenLabs rejected the API key (401).'
+  },
   {
     test: msg => /method not allowed/i.test(msg),
     summarize: () => 'The desktop backend does not support that audio endpoint yet. Restart Hermes Desktop.'
diff --git a/apps/desktop/src/store/voice-playback.ts b/apps/desktop/src/store/voice-playback.ts
new file mode 100644
index 0000000000..475a8c0daf
--- /dev/null
+++ b/apps/desktop/src/store/voice-playback.ts
@@ -0,0 +1,22 @@
+import { atom } from 'nanostores'
+
+export type VoicePlaybackSource = 'read-aloud' | 'voice-conversation'
+export type VoicePlaybackStatus = 'idle' | 'preparing' | 'speaking'
+
+export interface VoicePlaybackState {
+  messageId: string | null
+  sequence: number
+  source: VoicePlaybackSource | null
+  status: VoicePlaybackStatus
+}
+
+export const $voicePlayback = atom<VoicePlaybackState>({
+  messageId: null,
+  sequence: 0,
+  source: null,
+  status: 'idle'
+})
+
+export function setVoicePlaybackState(next: VoicePlaybackState) {
+  $voicePlayback.set(next)
+}
diff --git a/apps/desktop/src/styles.css b/apps/desktop/src/styles.css
index 3ddebe6d5f..9d63d7c7b4 100644
--- a/apps/desktop/src/styles.css
+++ b/apps/desktop/src/styles.css
@@ -184,6 +184,29 @@ button {
   -webkit-app-region: no-drag;
 }
 
+@keyframes voice-wave {
+  0%,
+  100% {
+    opacity: 0.45;
+    transform: scaleY(0.28);
+  }
+
+  35% {
+    opacity: 0.95;
+    transform: scaleY(1);
+  }
+
+  62% {
+    opacity: 0.7;
+    transform: scaleY(0.52);
+  }
+}
+
+.voice-wave-bar {
+  animation: voice-wave 860ms ease-in-out infinite;
+  transform-origin: center;
+}
+
 .composer-liquid-shell-wrap {
   pointer-events: none;
   border-radius: var(--composer-glass-radius, 20px);
diff --git a/apps/desktop/src/types/hermes.ts b/apps/desktop/src/types/hermes.ts
index a7628acdd4..5f125e6d32 100644
--- a/apps/desktop/src/types/hermes.ts
+++ b/apps/desktop/src/types/hermes.ts
@@ -168,6 +168,7 @@ export interface SessionRuntimeInfo {
   personality?: string
   provider?: string
   reasoning_effort?: string
+  running?: boolean
   service_tier?: string
   skills?: Record<string, string[]> | string[]
   tools?: Record<string, string[]>
diff --git a/apps/desktop/vite.config.ts b/apps/desktop/vite.config.ts
index 2307808397..e678a904b6 100644
--- a/apps/desktop/vite.config.ts
+++ b/apps/desktop/vite.config.ts
@@ -4,6 +4,7 @@ import tailwindcss from '@tailwindcss/vite'
 import path from 'path'
 
 export default defineConfig({
+  base: './',
   plugins: [react(), tailwindcss()],
   resolve: {
     alias: {
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 605ab04de6..c7e7730e8b 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -830,7 +830,7 @@ DEFAULT_CONFIG = {
     
     "stt": {
         "enabled": True,
-        "provider": "local",  # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API) | "mistral" (Voxtral Transcribe)
+        "provider": "local",  # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API) | "mistral" (Voxtral Transcribe) | "elevenlabs" (Scribe)
         "local": {
             "model": "base",  # tiny, base, small, medium, large-v3
             "language": "",  # auto-detect by default; set to "en", "es", "fr", etc. to force
@@ -841,6 +841,12 @@ DEFAULT_CONFIG = {
         "mistral": {
             "model": "voxtral-mini-latest",  # voxtral-mini-latest, voxtral-mini-2602
         },
+        "elevenlabs": {
+            "model_id": "scribe_v2",  # scribe_v2, scribe_v1
+            "language_code": "",  # auto-detect by default; set to "eng", "spa", "fra", etc. to force
+            "tag_audio_events": False,
+            "diarize": False,
+        },
     },
 
     "voice": {
@@ -1791,9 +1797,10 @@ OPTIONAL_ENV_VARS = {
         "category": "tool",
     },
     "ELEVENLABS_API_KEY": {
-        "description": "ElevenLabs API key for premium text-to-speech voices",
+        "description": "ElevenLabs API key for premium text-to-speech voices and Scribe transcription",
         "prompt": "ElevenLabs API key",
         "url": "https://elevenlabs.io/",
+        "tools": ["elevenlabs_tts", "voice_transcription"],
         "password": True,
         "category": "tool",
     },
diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py
index 652731a8bc..1f073ae62c 100644
--- a/hermes_cli/web_server.py
+++ b/hermes_cli/web_server.py
@@ -280,7 +280,12 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
     "stt.provider": {
         "type": "select",
         "description": "Speech-to-text provider",
-        "options": ["local", "openai", "mistral"],
+        "options": ["local", "groq", "openai", "mistral", "xai", "elevenlabs"],
+    },
+    "stt.elevenlabs.model_id": {
+        "type": "select",
+        "description": "ElevenLabs Scribe model",
+        "options": ["scribe_v2", "scribe_v1"],
     },
     "display.skin": {
         "type": "select",
diff --git a/tests/tools/test_transcription_dotenv_fallback.py b/tests/tools/test_transcription_dotenv_fallback.py
index 39f5ca108e..081aa483ce 100644
--- a/tests/tools/test_transcription_dotenv_fallback.py
+++ b/tests/tools/test_transcription_dotenv_fallback.py
@@ -24,6 +24,8 @@ def isolate_env(monkeypatch):
         "MISTRAL_API_KEY",
         "XAI_API_KEY",
         "XAI_STT_BASE_URL",
+        "ELEVENLABS_API_KEY",
+        "ELEVENLABS_STT_BASE_URL",
     ):
         monkeypatch.delenv(key, raising=False)
 
@@ -87,6 +89,15 @@ class TestProviderSelectionGate:
                    return_value={"XAI_API_KEY": "dotenv-secret"}):
             assert tt._get_provider({"enabled": True, "provider": "xai"}) == "xai"
 
+    def test_explicit_elevenlabs_sees_dotenv(self):
+        from tools import transcription_tools as tt
+
+        with patch.object(tt, "_HAS_FASTER_WHISPER", False), \
+             patch.object(tt, "_has_local_command", return_value=False), \
+             patch("hermes_cli.config.load_env",
+                   return_value={"ELEVENLABS_API_KEY": "dotenv-secret"}):
+            assert tt._get_provider({"enabled": True, "provider": "elevenlabs"}) == "elevenlabs"
+
     def test_auto_detect_sees_dotenv_groq(self):
         """No local backend, no explicit provider — auto-detect should fall
         through to Groq when its key lives in dotenv only. Before the fix
@@ -193,6 +204,33 @@ class TestTranscribeCallSitesReadDotenv:
         assert result["success"] is True
         assert captured["headers"]["Authorization"] == "Bearer xai-dotenv-key"
 
+    def test_transcribe_elevenlabs_forwards_dotenv_key(self):
+        from tools import transcription_tools as tt
+
+        captured: dict = {}
+
+        def fake_post(url, **kwargs):
+            captured["url"] = url
+            captured["headers"] = kwargs.get("headers", {})
+            response = MagicMock()
+            response.status_code = 200
+            response.json.return_value = {"text": "hello"}
+            return response
+
+        def fake_get_env_value(name, default=None):
+            if name == "ELEVENLABS_API_KEY":
+                return "elevenlabs-dotenv-key"
+            return None
+
+        with patch.object(tt, "get_env_value", side_effect=fake_get_env_value), \
+             patch.object(tt, "_load_stt_config", return_value={}), \
+             patch("requests.post", side_effect=fake_post), \
+             patch("builtins.open", MagicMock()):
+            result = tt._transcribe_elevenlabs("/tmp/fake.mp3", "scribe_v2")
+
+        assert result["success"] is True
+        assert captured["headers"]["xi-api-key"] == "elevenlabs-dotenv-key"
+
 
 class TestEndToEndRegressionGuard:
     """End-to-end probe: patch ``hermes_cli.config.load_env`` to simulate
diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py
index 5e4a9ad716..c075cfa9eb 100644
--- a/tests/tools/test_transcription_tools.py
+++ b/tests/tools/test_transcription_tools.py
@@ -49,6 +49,7 @@ def clean_env(monkeypatch):
     monkeypatch.delenv("OPENAI_API_KEY", raising=False)
     monkeypatch.delenv("GROQ_API_KEY", raising=False)
     monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
+    monkeypatch.delenv("ELEVENLABS_API_KEY", raising=False)
     monkeypatch.delenv("HERMES_LOCAL_STT_COMMAND", raising=False)
     monkeypatch.delenv("HERMES_LOCAL_STT_LANGUAGE", raising=False)
 
@@ -1342,3 +1343,161 @@ class TestTranscribeAudioXAIDispatch:
             transcribe_audio(sample_ogg, model="custom-stt")
 
         assert mock_xai.call_args[0][1] == "custom-stt"
+
+
+# ============================================================================
+# _transcribe_elevenlabs
+# ============================================================================
+
+class TestTranscribeElevenLabs:
+    def test_no_key(self, monkeypatch):
+        monkeypatch.delenv("ELEVENLABS_API_KEY", raising=False)
+        from tools.transcription_tools import _transcribe_elevenlabs
+        result = _transcribe_elevenlabs("/tmp/test.ogg", "scribe_v2")
+        assert result["success"] is False
+        assert "ELEVENLABS_API_KEY" in result["error"]
+
+    def test_successful_transcription(self, monkeypatch, sample_ogg):
+        monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test-key")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"text": "hello from elevenlabs"}
+
+        config = {
+            "elevenlabs": {
+                "language_code": "eng",
+                "tag_audio_events": True,
+                "diarize": True,
+            }
+        }
+        with patch("tools.transcription_tools._load_stt_config", return_value=config), \
+             patch("requests.post", return_value=mock_response) as mock_post:
+            from tools.transcription_tools import _transcribe_elevenlabs
+            result = _transcribe_elevenlabs(sample_ogg, "scribe_v2")
+
+        assert result["success"] is True
+        assert result["transcript"] == "hello from elevenlabs"
+        assert result["provider"] == "elevenlabs"
+        call_kwargs = mock_post.call_args.kwargs
+        assert call_kwargs["headers"]["xi-api-key"] == "eleven-test-key"
+        assert call_kwargs["data"]["model_id"] == "scribe_v2"
+        assert call_kwargs["data"]["language_code"] == "eng"
+        assert call_kwargs["data"]["tag_audio_events"] == "true"
+        assert call_kwargs["data"]["diarize"] == "true"
+
+    def test_api_error_returns_failure(self, monkeypatch, sample_ogg):
+        monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test-key")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 401
+        mock_response.json.return_value = {"detail": {"message": "Invalid API key"}}
+        mock_response.text = '{"detail": {"message": "Invalid API key"}}'
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("requests.post", return_value=mock_response):
+            from tools.transcription_tools import _transcribe_elevenlabs
+            result = _transcribe_elevenlabs(sample_ogg, "scribe_v2")
+
+        assert result["success"] is False
+        assert "HTTP 401" in result["error"]
+        assert "Invalid API key" in result["error"]
+
+    def test_empty_transcript_returns_failure(self, monkeypatch, sample_ogg):
+        monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test-key")
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"text": "   "}
+
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("requests.post", return_value=mock_response):
+            from tools.transcription_tools import _transcribe_elevenlabs
+            result = _transcribe_elevenlabs(sample_ogg, "scribe_v2")
+
+        assert result["success"] is False
+        assert "empty transcript" in result["error"]
+
+
+# ============================================================================
+# _get_provider — ElevenLabs
+# ============================================================================
+
+class TestGetProviderElevenLabs:
+    """ElevenLabs-specific provider selection tests."""
+
+    def test_elevenlabs_when_key_set(self, monkeypatch):
+        monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test")
+        from tools.transcription_tools import _get_provider
+        assert _get_provider({"provider": "elevenlabs"}) == "elevenlabs"
+
+    def test_elevenlabs_explicit_no_key_returns_none(self, monkeypatch):
+        """Explicit elevenlabs with no key returns none — no cross-provider fallback."""
+        monkeypatch.delenv("ELEVENLABS_API_KEY", raising=False)
+        from tools.transcription_tools import _get_provider
+        assert _get_provider({"provider": "elevenlabs"}) == "none"
+
+    def test_auto_detect_elevenlabs_after_xai(self, monkeypatch):
+        """Auto-detect: elevenlabs is tried after xai when all above are unavailable."""
+        monkeypatch.delenv("GROQ_API_KEY", raising=False)
+        monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
+        monkeypatch.delenv("XAI_API_KEY", raising=False)
+        monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test")
+        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
+             patch("tools.transcription_tools._has_local_command", return_value=False), \
+             patch("tools.transcription_tools._HAS_OPENAI", False), \
+             patch("tools.transcription_tools._HAS_MISTRAL", False):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({}) == "elevenlabs"
+
+    def test_auto_detect_xai_preferred_over_elevenlabs(self, monkeypatch):
+        """Auto-detect: xai is preferred over elevenlabs."""
+        monkeypatch.setenv("XAI_API_KEY", "xai-test")
+        monkeypatch.setenv("ELEVENLABS_API_KEY", "eleven-test")
+        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
+             patch("tools.transcription_tools._has_local_command", return_value=False), \
+             patch("tools.transcription_tools._HAS_OPENAI", False), \
+             patch("tools.transcription_tools._HAS_MISTRAL", False):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({}) == "xai"
+
+
+# ============================================================================
+# transcribe_audio — ElevenLabs dispatch
+# ============================================================================
+
+class TestTranscribeAudioElevenLabsDispatch:
+    def test_dispatches_to_elevenlabs(self, sample_ogg):
+        with patch("tools.transcription_tools._load_stt_config", return_value={"provider": "elevenlabs"}), \
+             patch("tools.transcription_tools._get_provider", return_value="elevenlabs"), \
+             patch("tools.transcription_tools._transcribe_elevenlabs",
+                   return_value={"success": True, "transcript": "hi", "provider": "elevenlabs"}) as mock_elevenlabs:
+            from tools.transcription_tools import transcribe_audio
+            result = transcribe_audio(sample_ogg)
+
+        assert result["success"] is True
+        assert result["provider"] == "elevenlabs"
+        mock_elevenlabs.assert_called_once()
+
+    def test_config_elevenlabs_model_used(self, sample_ogg):
+        config = {"provider": "elevenlabs", "elevenlabs": {"model_id": "scribe_v1"}}
+        with patch("tools.transcription_tools._load_stt_config", return_value=config), \
+             patch("tools.transcription_tools._get_provider", return_value="elevenlabs"), \
+             patch("tools.transcription_tools._transcribe_elevenlabs",
+                   return_value={"success": True, "transcript": "hi"}) as mock_elevenlabs:
+            from tools.transcription_tools import transcribe_audio
+            transcribe_audio(sample_ogg, model=None)
+
+        assert mock_elevenlabs.call_args[0][1] == "scribe_v1"
+
+    def test_model_override_passed_to_elevenlabs(self, sample_ogg):
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("tools.transcription_tools._get_provider", return_value="elevenlabs"), \
+             patch("tools.transcription_tools._transcribe_elevenlabs",
+                   return_value={"success": True, "transcript": "hi"}) as mock_elevenlabs:
+            from tools.transcription_tools import transcribe_audio
+            transcribe_audio(sample_ogg, model="scribe_v2")
+
+        assert mock_elevenlabs.call_args[0][1] == "scribe_v2"
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index 663345eb74..0323b1c27e 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -11,6 +11,7 @@ Provides speech-to-text transcription with six providers:
   - **mistral** — Mistral Voxtral Transcribe API, requires ``MISTRAL_API_KEY``.
   - **xai** — xAI Grok STT API, requires ``XAI_API_KEY``. High accuracy,
     Inverse Text Normalization, diarization, 21 languages.
+  - **elevenlabs** — ElevenLabs Scribe API, requires ``ELEVENLABS_API_KEY``.
 
 Used by the messaging gateway to automatically transcribe voice messages
 sent by users on Telegram, Discord, WhatsApp, Slack, and Signal.
@@ -84,6 +85,7 @@ DEFAULT_LOCAL_STT_LANGUAGE = "en"
 DEFAULT_STT_MODEL = os.getenv("STT_OPENAI_MODEL", "whisper-1")
 DEFAULT_GROQ_STT_MODEL = os.getenv("STT_GROQ_MODEL", "whisper-large-v3-turbo")
 DEFAULT_MISTRAL_STT_MODEL = os.getenv("STT_MISTRAL_MODEL", "voxtral-mini-latest")
+DEFAULT_ELEVENLABS_STT_MODEL = os.getenv("STT_ELEVENLABS_MODEL", "scribe_v2")
 LOCAL_STT_COMMAND_ENV = "HERMES_LOCAL_STT_COMMAND"
 LOCAL_STT_LANGUAGE_ENV = "HERMES_LOCAL_STT_LANGUAGE"
 COMMON_LOCAL_BIN_DIRS = ("/opt/homebrew/bin", "/usr/local/bin")
@@ -91,6 +93,7 @@ COMMON_LOCAL_BIN_DIRS = ("/opt/homebrew/bin", "/usr/local/bin")
 GROQ_BASE_URL = os.getenv("GROQ_BASE_URL", "https://api.groq.com/openai/v1")
 OPENAI_BASE_URL = os.getenv("STT_OPENAI_BASE_URL", "https://api.openai.com/v1")
 XAI_STT_BASE_URL = os.getenv("XAI_STT_BASE_URL", "https://api.x.ai/v1")
+ELEVENLABS_STT_BASE_URL = os.getenv("ELEVENLABS_STT_BASE_URL", "https://api.elevenlabs.io/v1")
 
 SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg", ".aac", ".flac"}
 LOCAL_NATIVE_AUDIO_FORMATS = {".wav", ".aiff", ".aif"}
@@ -268,9 +271,17 @@ def _get_provider(stt_config: dict) -> str:
             )
             return "none"
 
+        if provider == "elevenlabs":
+            if get_env_value("ELEVENLABS_API_KEY"):
+                return "elevenlabs"
+            logger.warning(
+                "STT provider 'elevenlabs' configured but ELEVENLABS_API_KEY not set"
+            )
+            return "none"
+
         return provider  # Unknown — let it fail downstream
 
-    # --- Auto-detect (no explicit provider): local > groq > openai > mistral > xai -
+    # --- Auto-detect (no explicit provider): local > groq > openai > mistral > xai > elevenlabs -
 
     if _HAS_FASTER_WHISPER:
         return "local"
@@ -288,6 +299,9 @@ def _get_provider(stt_config: dict) -> str:
     if get_env_value("XAI_API_KEY"):
         logger.info("No local STT available, using xAI Grok STT API")
         return "xai"
+    if get_env_value("ELEVENLABS_API_KEY"):
+        logger.info("No local STT available, using ElevenLabs Scribe STT API")
+        return "elevenlabs"
     return "none"
 
 # ---------------------------------------------------------------------------
@@ -781,6 +795,92 @@ def _transcribe_xai(file_path: str, model_name: str) -> Dict[str, Any]:
         return {"success": False, "transcript": "", "error": f"xAI STT transcription failed: {e}"}
 
 
+# ---------------------------------------------------------------------------
+# Provider: ElevenLabs (Scribe STT API)
+# ---------------------------------------------------------------------------
+
+
+def _transcribe_elevenlabs(file_path: str, model_name: str) -> Dict[str, Any]:
+    """Transcribe using ElevenLabs Scribe STT API."""
+    api_key = get_env_value("ELEVENLABS_API_KEY")
+    if not api_key:
+        return {"success": False, "transcript": "", "error": "ELEVENLABS_API_KEY not set"}
+
+    stt_config = _load_stt_config()
+    elevenlabs_config = stt_config.get("elevenlabs", {})
+    base_url = str(
+        elevenlabs_config.get("base_url")
+        or get_env_value("ELEVENLABS_STT_BASE_URL")
+        or ELEVENLABS_STT_BASE_URL
+    ).strip().rstrip("/")
+    language_code = str(elevenlabs_config.get("language_code") or "").strip()
+    tag_audio_events = is_truthy_value(elevenlabs_config.get("tag_audio_events", False))
+    diarize = is_truthy_value(elevenlabs_config.get("diarize", False))
+
+    try:
+        import requests
+
+        data: Dict[str, str] = {
+            "model_id": model_name,
+            "tag_audio_events": "true" if tag_audio_events else "false",
+            "diarize": "true" if diarize else "false",
+        }
+        if language_code:
+            data["language_code"] = language_code
+
+        with open(file_path, "rb") as audio_file:
+            response = requests.post(
+                f"{base_url}/speech-to-text",
+                headers={"xi-api-key": api_key},
+                files={"file": (Path(file_path).name, audio_file)},
+                data=data,
+                timeout=120,
+            )
+
+        if response.status_code != 200:
+            detail = ""
+            try:
+                err_body = response.json()
+                error_value = err_body.get("detail") or err_body.get("error")
+                if isinstance(error_value, dict):
+                    detail = str(error_value.get("message") or error_value)
+                elif error_value:
+                    detail = str(error_value)
+                else:
+                    detail = response.text[:300]
+            except Exception:
+                detail = response.text[:300]
+            return {
+                "success": False,
+                "transcript": "",
+                "error": f"ElevenLabs STT API error (HTTP {response.status_code}): {detail}",
+            }
+
+        result = response.json()
+        transcript_text = _extract_transcript_text(result)
+        if not transcript_text:
+            return {
+                "success": False,
+                "transcript": "",
+                "error": "ElevenLabs STT returned empty transcript",
+            }
+
+        logger.info(
+            "Transcribed %s via ElevenLabs Scribe (%s, %d chars)",
+            Path(file_path).name,
+            model_name,
+            len(transcript_text),
+        )
+
+        return {"success": True, "transcript": transcript_text, "provider": "elevenlabs"}
+
+    except PermissionError:
+        return {"success": False, "transcript": "", "error": f"Permission denied: {file_path}"}
+    except Exception as e:
+        logger.error("ElevenLabs STT transcription failed: %s", e, exc_info=True)
+        return {"success": False, "transcript": "", "error": f"ElevenLabs STT transcription failed: {e}"}
+
+
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
@@ -792,7 +892,7 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
 
     Provider priority:
       1. User config (``stt.provider`` in config.yaml)
-      2. Auto-detect: local faster-whisper (free) > Groq (free tier) > OpenAI (paid)
+      2. Auto-detect: local > Groq > OpenAI > Mistral > xAI > ElevenLabs
 
     Args:
         file_path: Absolute path to the audio file to transcribe.
@@ -854,6 +954,11 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
         model_name = model or "grok-stt"
         return _transcribe_xai(file_path, model_name)
 
+    if provider == "elevenlabs":
+        elevenlabs_cfg = stt_config.get("elevenlabs", {})
+        model_name = model or elevenlabs_cfg.get("model_id", DEFAULT_ELEVENLABS_STT_MODEL)
+        return _transcribe_elevenlabs(file_path, model_name)
+
     # No provider available
     return {
         "success": False,
@@ -862,8 +967,9 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
             "No STT provider available. Install faster-whisper for free local "
             f"transcription, configure {LOCAL_STT_COMMAND_ENV} or install a local whisper CLI, "
             "set GROQ_API_KEY for free Groq Whisper, set MISTRAL_API_KEY for Mistral "
-            "Voxtral Transcribe, set XAI_API_KEY for xAI Grok STT, or set VOICE_TOOLS_OPENAI_KEY "
-            "or OPENAI_API_KEY for the OpenAI Whisper API."
+            "Voxtral Transcribe, set XAI_API_KEY for xAI Grok STT, set ELEVENLABS_API_KEY "
+            "for ElevenLabs Scribe, or set VOICE_TOOLS_OPENAI_KEY or OPENAI_API_KEY for "
+            "the OpenAI Whisper API."
         ),
     }
 
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index cf14660c19..e7a72b7d61 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -1409,6 +1409,7 @@ def _session_info(agent, session: dict | None = None) -> dict:
         "cwd": cwd,
         "branch": _git_branch_for_cwd(cwd),
         "personality": str(personality or ""),
+        "running": bool((session or {}).get("running")),
         "version": "",
         "release_date": "",
         "update_behind": None,