diff --git a/apps/desktop/electron/main.cjs b/apps/desktop/electron/main.cjs index f5f6a376d33..62af3d859a5 100644 --- a/apps/desktop/electron/main.cjs +++ b/apps/desktop/electron/main.cjs @@ -2611,15 +2611,54 @@ function installContextMenu(window) { }) } -function installMediaPermissions() { - session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback, details) => { - if (permission === 'media' && details?.mediaTypes?.includes('audio')) { - callback(true) +// Microphone capture for the voice composer. The renderer drives mic access +// through getUserMedia, which Chromium gates behind these two session hooks. +// +// The naive `details.mediaTypes.includes('audio')` check works on macOS but +// breaks on Windows: Chromium frequently fires the mic permission request with +// an empty/undefined `mediaTypes`, so the strict check denies it and +// getUserMedia throws NotAllowedError ("Microphone permission was denied"). +// We therefore treat an audio-capture request as allowed whenever it's the +// 'media'/'audioCapture' permission AND mediaTypes either includes 'audio' OR +// is empty/absent (the Windows case). Video is still denied. +function isAudioCapturePermission(permission, details) { + if (permission === 'audioCapture') { + return true + } + if (permission !== 'media') { + return false + } + const mediaTypes = details?.mediaTypes + if (!Array.isArray(mediaTypes) || mediaTypes.length === 0) { + // Windows: mediaTypes is often empty for a mic request. Don't deny on + // missing metadata. (A video request would carry mediaTypes:['video'].) + return true + } + return mediaTypes.includes('audio') && !mediaTypes.includes('video') +} - return +function installMediaPermissions() { + // Async request handler: the prompt-style path (most platforms). + session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback, details) => { + callback(isAudioCapturePermission(permission, details)) + }) + + // Synchronous check handler: Chromium consults this for getUserMedia on + // Windows in addition to (or instead of) the request handler. Without it, + // the check defaults to false and the mic is denied before the request + // handler ever runs. + session.defaultSession.setPermissionCheckHandler((_webContents, permission, _origin, details) => { + if (permission === 'media' || permission === 'audioCapture') { + // details.mediaType is a single string here (not the mediaTypes array). + const mediaType = details?.mediaType + if (mediaType === 'video') { + return false + } + + return true } - callback(false) + return false }) } diff --git a/apps/desktop/src/app/chat/composer/index.tsx b/apps/desktop/src/app/chat/composer/index.tsx index 10887fbb8d6..a0b1a370baa 100644 --- a/apps/desktop/src/app/chat/composer/index.tsx +++ b/apps/desktop/src/app/chat/composer/index.tsx @@ -331,6 +331,21 @@ export function ChatBar({ draftRef.current = nextDraft aui.composer().setText(nextDraft) + + // Push the new text into the contentEditable editor directly. Setting the + // assistant-ui composer state alone is not enough: the draft→editor sync + // effect only re-renders the editor when it is NOT focused + // (document.activeElement !== editor), and the dictation/insert paths + // typically run while the editor has (or immediately regains) focus — so + // the store would hold the text but the visible editor would stay empty + // and there'd be nothing to send. Mirror appendExternalText here. + const editor = editorRef.current + + if (editor) { + renderComposerContents(editor, nextDraft) + placeCaretEnd(editor) + } + requestMainFocus() }