diff --git a/apps/desktop/src/app/settings/constants.ts b/apps/desktop/src/app/settings/constants.ts
index 4d0e11b2822..1cf7cf3ce16 100644
--- a/apps/desktop/src/app/settings/constants.ts
+++ b/apps/desktop/src/app/settings/constants.ts
@@ -240,9 +240,37 @@ export const ENUM_OPTIONS: Record<string, string[]> = {
   'context.engine': ['compressor', 'default', 'custom'],
   'delegation.reasoning_effort': ['', 'minimal', 'low', 'medium', 'high', 'xhigh'],
   'memory.provider': ['', 'builtin', 'honcho'],
+  // Terminal execution backends — kept in sync with the dispatch ladder in
+  // tools/terminal_tool.py::_create_environment (local/docker/singularity/
+  // modal/daytona/ssh). Remote backends need extra env (image, tokens, host).
+  'terminal.backend': ['local', 'docker', 'singularity', 'modal', 'daytona', 'ssh'],
   'stt.elevenlabs.model_id': ['scribe_v2', 'scribe_v1'],
   'stt.local.model': ['tiny', 'base', 'small', 'medium', 'large-v3'],
+  // Speech-to-text backends — kept in sync with the stt block in
+  // hermes_cli/config.py (local/groq/openai/mistral/elevenlabs).
+  'stt.provider': ['local', 'groq', 'openai', 'mistral', 'xai', 'elevenlabs'],
   'tts.openai.voice': ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'],
+  // Text-to-speech backends — kept in sync with the built-in source of truth
+  // (agent/tts_registry.py::_BUILTIN_NAMES / tools/tts_tool.py::
+  // BUILTIN_TTS_PROVIDERS). 'xai' is Grok TTS.
+  'tts.provider': [
+    'edge',
+    'elevenlabs',
+    'openai',
+    'xai',
+    'minimax',
+    'mistral',
+    'gemini',
+    'neutts',
+    'kittentts',
+    'piper'
+  ],
+  'stt.openai.model': ['whisper-1', 'gpt-4o-mini-transcribe', 'gpt-4o-transcribe'],
+  'stt.mistral.model': ['voxtral-mini-latest', 'voxtral-mini-2602'],
+  'tts.openai.model': ['gpt-4o-mini-tts', 'tts-1', 'tts-1-hd'],
+  'tts.elevenlabs.model_id': ['eleven_multilingual_v2', 'eleven_turbo_v2_5', 'eleven_flash_v2_5'],
+  // NeuTTS local inference device.
+  'tts.neutts.device': ['cpu', 'cuda', 'mps'],
   'updates.non_interactive_local_changes': ['stash', 'discard']
 }
 
@@ -268,7 +296,11 @@ export const FIELD_LABELS: Record<string, string> = defineFieldCopy({
     backend: 'Execution Backend',
     timeout: 'Command Timeout',
     persistentShell: 'Persistent Shell',
-    envPassthrough: 'Environment Passthrough'
+    envPassthrough: 'Environment Passthrough',
+    dockerImage: 'Docker Image',
+    singularityImage: 'Singularity Image',
+    modalImage: 'Modal Image',
+    daytonaImage: 'Daytona Image'
   },
   fileReadMaxChars: 'File Read Limit',
   toolOutput: {
@@ -309,6 +341,15 @@ export const FIELD_LABELS: Record<string, string> = defineFieldCopy({
       model: 'Local Transcription Model',
       language: 'Transcription Language'
     },
+    openai: {
+      model: 'OpenAI STT Model'
+    },
+    groq: {
+      model: 'Groq STT Model'
+    },
+    mistral: {
+      model: 'Mistral STT Model'
+    },
     elevenlabs: {
       modelId: 'ElevenLabs STT Model',
       languageCode: 'ElevenLabs Language',
@@ -328,6 +369,33 @@ export const FIELD_LABELS: Record<string, string> = defineFieldCopy({
     elevenlabs: {
       voiceId: 'ElevenLabs Voice',
       modelId: 'ElevenLabs Model'
+    },
+    xai: {
+      voiceId: 'xAI (Grok) Voice',
+      language: 'xAI Language'
+    },
+    minimax: {
+      model: 'MiniMax TTS Model',
+      voiceId: 'MiniMax Voice'
+    },
+    mistral: {
+      model: 'Mistral TTS Model',
+      voiceId: 'Mistral Voice'
+    },
+    gemini: {
+      model: 'Gemini TTS Model',
+      voice: 'Gemini Voice'
+    },
+    neutts: {
+      model: 'NeuTTS Model',
+      device: 'NeuTTS Device'
+    },
+    kittentts: {
+      model: 'KittenTTS Model',
+      voice: 'KittenTTS Voice'
+    },
+    piper: {
+      voice: 'Piper Voice'
     }
   },
   memory: {
@@ -375,7 +443,11 @@ export const FIELD_DESCRIPTIONS: Record<string, string> = defineFieldCopy({
   terminal: {
     cwd: 'Default project folder for tool and terminal work.',
     persistentShell: 'Keep shell state between commands when the backend supports it.',
-    envPassthrough: 'Environment variables to pass into tool execution.'
+    envPassthrough: 'Environment variables to pass into tool execution.',
+    dockerImage: 'Container image used when the execution backend is Docker.',
+    singularityImage: 'Image used when the execution backend is Singularity.',
+    modalImage: 'Image used when the execution backend is Modal.',
+    daytonaImage: 'Image used when the execution backend is Daytona.'
   },
   codeExecution: {
     mode: 'How strictly code execution is scoped to the current project.'
@@ -404,6 +476,15 @@ export const FIELD_DESCRIPTIONS: Record<string, string> = defineFieldCopy({
   voice: {
     autoTts: 'Automatically speak assistant responses.'
   },
+  tts: {
+    xai: {
+      voiceId: 'xAI voice ID (e.g. eve) or a custom voice ID.',
+      language: 'Spoken language code, e.g. en.'
+    },
+    neutts: {
+      device: 'Local inference device for NeuTTS.'
+    }
+  },
   stt: {
     enabled: 'Enable local or provider-backed speech transcription.',
     elevenlabs: {
@@ -495,8 +576,24 @@ export const SECTIONS: DesktopConfigSection[] = [
       'tts.openai.voice',
       'tts.elevenlabs.voice_id',
       'tts.elevenlabs.model_id',
+      'tts.xai.voice_id',
+      'tts.xai.language',
+      'tts.minimax.model',
+      'tts.minimax.voice_id',
+      'tts.mistral.model',
+      'tts.mistral.voice_id',
+      'tts.gemini.model',
+      'tts.gemini.voice',
+      'tts.neutts.model',
+      'tts.neutts.device',
+      'tts.kittentts.model',
+      'tts.kittentts.voice',
+      'tts.piper.voice',
       'stt.local.model',
       'stt.local.language',
+      'stt.openai.model',
+      'stt.groq.model',
+      'stt.mistral.model',
       'stt.elevenlabs.model_id',
       'stt.elevenlabs.language_code',
       'stt.elevenlabs.tag_audio_events',
@@ -513,6 +610,10 @@ export const SECTIONS: DesktopConfigSection[] = [
       'toolsets',
       'terminal.backend',
       'terminal.timeout',
+      'terminal.docker_image',
+      'terminal.singularity_image',
+      'terminal.modal_image',
+      'terminal.daytona_image',
       'tool_output.max_bytes',
       'tool_output.max_lines',
       'tool_output.max_line_length',
diff --git a/apps/desktop/src/app/settings/helpers.test.ts b/apps/desktop/src/app/settings/helpers.test.ts
index ee2377a24b1..b65d63d3296 100644
--- a/apps/desktop/src/app/settings/helpers.test.ts
+++ b/apps/desktop/src/app/settings/helpers.test.ts
@@ -3,7 +3,7 @@ import { describe, expect, it } from 'vitest'
 import type { HermesConfigRecord } from '@/types/hermes'
 
 import { defineFieldCopy, fieldCopyForSchemaKey, schemaKeyToFieldCopyKey } from './field-copy'
-import { getNested, providerGroup, setNested, stripToolsetLabel, toolsetDisplayLabel } from './helpers'
+import { enumOptionsFor, getNested, providerGroup, setNested, stripToolsetLabel, toolsetDisplayLabel } from './helpers'
 
 describe('settings helpers', () => {
   describe('defineFieldCopy', () => {
@@ -135,4 +135,38 @@ describe('settings helpers', () => {
       expect(providerGroup('SOMETHING_RANDOM')).toBe('Other')
     })
   })
+
+  describe('enumOptionsFor — backend selector dropdowns', () => {
+    const config: HermesConfigRecord = {}
+
+    it('renders a dropdown for the TTS provider including xAI (Grok)', () => {
+      const opts = enumOptionsFor('tts.provider', 'edge', config)
+      expect(opts).toBeDefined()
+      expect(opts).toContain('xai')
+      expect(opts).toContain('edge')
+      expect(opts).toContain('elevenlabs')
+    })
+
+    it('renders a dropdown for the STT provider including xAI (Grok)', () => {
+      const opts = enumOptionsFor('stt.provider', 'local', config)
+      expect(opts).toEqual(['local', 'groq', 'openai', 'mistral', 'xai', 'elevenlabs'])
+    })
+
+    it('renders dropdowns for per-backend model/device sub-fields', () => {
+      expect(enumOptionsFor('stt.openai.model', 'whisper-1', config)).toContain('gpt-4o-transcribe')
+      expect(enumOptionsFor('tts.openai.model', 'gpt-4o-mini-tts', config)).toContain('tts-1-hd')
+      expect(enumOptionsFor('tts.neutts.device', 'cpu', config)).toEqual(['cpu', 'cuda', 'mps'])
+    })
+
+    it('renders a dropdown for the terminal execution backend', () => {
+      const opts = enumOptionsFor('terminal.backend', 'local', config)
+      expect(opts).toEqual(['local', 'docker', 'singularity', 'modal', 'daytona', 'ssh'])
+    })
+
+    it('appends a hand-typed value not in the known list so it stays selected', () => {
+      const opts = enumOptionsFor('tts.provider', 'my-custom-command-tts', config)
+      expect(opts).toContain('my-custom-command-tts')
+      expect(opts).toContain('xai')
+    })
+  })
 })
diff --git a/apps/desktop/src/i18n/ja.ts b/apps/desktop/src/i18n/ja.ts
index 65a2bf6591e..625a4abdec6 100644
--- a/apps/desktop/src/i18n/ja.ts
+++ b/apps/desktop/src/i18n/ja.ts
@@ -239,7 +239,11 @@ export const ja = defineLocale({
         backend: '実行バックエンド',
         timeout: 'コマンドタイムアウト',
         persistentShell: '永続シェル',
-        envPassthrough: '環境変数の引き継ぎ'
+        envPassthrough: '環境変数の引き継ぎ',
+        dockerImage: 'Docker イメージ',
+        singularityImage: 'Singularity イメージ',
+        modalImage: 'Modal イメージ',
+        daytonaImage: 'Daytona イメージ'
       },
       fileReadMaxChars: 'ファイル読み取り上限',
       toolOutput: {
@@ -280,6 +284,15 @@ export const ja = defineLocale({
           model: 'ローカル文字起こしモデル',
           language: '文字起こし言語'
         },
+        openai: {
+          model: 'OpenAI STT モデル'
+        },
+        groq: {
+          model: 'Groq STT モデル'
+        },
+        mistral: {
+          model: 'Mistral STT モデル'
+        },
         elevenlabs: {
           modelId: 'ElevenLabs STT モデル',
           languageCode: 'ElevenLabs 言語',
@@ -299,6 +312,33 @@ export const ja = defineLocale({
         elevenlabs: {
           voiceId: 'ElevenLabs 音声',
           modelId: 'ElevenLabs モデル'
+        },
+        xai: {
+          voiceId: 'xAI (Grok) 音声',
+          language: 'xAI 言語'
+        },
+        minimax: {
+          model: 'MiniMax TTS モデル',
+          voiceId: 'MiniMax 音声'
+        },
+        mistral: {
+          model: 'Mistral TTS モデル',
+          voiceId: 'Mistral 音声'
+        },
+        gemini: {
+          model: 'Gemini TTS モデル',
+          voice: 'Gemini 音声'
+        },
+        neutts: {
+          model: 'NeuTTS モデル',
+          device: 'NeuTTS デバイス'
+        },
+        kittentts: {
+          model: 'KittenTTS モデル',
+          voice: 'KittenTTS 音声'
+        },
+        piper: {
+          voice: 'Piper 音声'
         }
       },
       memory: {
diff --git a/apps/desktop/src/i18n/zh-hant.ts b/apps/desktop/src/i18n/zh-hant.ts
index 76a0ea69643..c09793ccf34 100644
--- a/apps/desktop/src/i18n/zh-hant.ts
+++ b/apps/desktop/src/i18n/zh-hant.ts
@@ -233,7 +233,11 @@ export const zhHant = defineLocale({
         backend: '執行後端',
         timeout: '指令逾時',
         persistentShell: '持久化 Shell',
-        envPassthrough: '環境變數傳遞'
+        envPassthrough: '環境變數傳遞',
+        dockerImage: 'Docker 映像',
+        singularityImage: 'Singularity 映像',
+        modalImage: 'Modal 映像',
+        daytonaImage: 'Daytona 映像'
       },
       fileReadMaxChars: '檔案讀取上限',
       toolOutput: {
@@ -274,6 +278,15 @@ export const zhHant = defineLocale({
           model: '本機轉寫模型',
           language: '轉寫語言'
         },
+        openai: {
+          model: 'OpenAI STT 模型'
+        },
+        groq: {
+          model: 'Groq STT 模型'
+        },
+        mistral: {
+          model: 'Mistral STT 模型'
+        },
         elevenlabs: {
           modelId: 'ElevenLabs STT 模型',
           languageCode: 'ElevenLabs 語言',
@@ -293,6 +306,33 @@ export const zhHant = defineLocale({
         elevenlabs: {
           voiceId: 'ElevenLabs 語音',
           modelId: 'ElevenLabs 模型'
+        },
+        xai: {
+          voiceId: 'xAI (Grok) 語音',
+          language: 'xAI 語言'
+        },
+        minimax: {
+          model: 'MiniMax TTS 模型',
+          voiceId: 'MiniMax 語音'
+        },
+        mistral: {
+          model: 'Mistral TTS 模型',
+          voiceId: 'Mistral 語音'
+        },
+        gemini: {
+          model: 'Gemini TTS 模型',
+          voice: 'Gemini 語音'
+        },
+        neutts: {
+          model: 'NeuTTS 模型',
+          device: 'NeuTTS 裝置'
+        },
+        kittentts: {
+          model: 'KittenTTS 模型',
+          voice: 'KittenTTS 語音'
+        },
+        piper: {
+          voice: 'Piper 語音'
         }
       },
       memory: {
diff --git a/apps/desktop/src/i18n/zh.ts b/apps/desktop/src/i18n/zh.ts
index d091e505586..7eac7b467b2 100644
--- a/apps/desktop/src/i18n/zh.ts
+++ b/apps/desktop/src/i18n/zh.ts
@@ -311,7 +311,11 @@ export const zh: Translations = {
         backend: '执行后端',
         timeout: '命令超时',
         persistentShell: '持久化 Shell',
-        envPassthrough: '环境变量透传'
+        envPassthrough: '环境变量透传',
+        dockerImage: 'Docker 镜像',
+        singularityImage: 'Singularity 镜像',
+        modalImage: 'Modal 镜像',
+        daytonaImage: 'Daytona 镜像'
       },
       fileReadMaxChars: '文件读取上限',
       toolOutput: {
@@ -352,6 +356,15 @@ export const zh: Translations = {
           model: '本地转写模型',
           language: '转写语言'
         },
+        openai: {
+          model: 'OpenAI STT 模型'
+        },
+        groq: {
+          model: 'Groq STT 模型'
+        },
+        mistral: {
+          model: 'Mistral STT 模型'
+        },
         elevenlabs: {
           modelId: 'ElevenLabs STT 模型',
           languageCode: 'ElevenLabs 语言',
@@ -371,6 +384,33 @@ export const zh: Translations = {
         elevenlabs: {
           voiceId: 'ElevenLabs 语音',
           modelId: 'ElevenLabs 模型'
+        },
+        xai: {
+          voiceId: 'xAI (Grok) 语音',
+          language: 'xAI 语言'
+        },
+        minimax: {
+          model: 'MiniMax TTS 模型',
+          voiceId: 'MiniMax 语音'
+        },
+        mistral: {
+          model: 'Mistral TTS 模型',
+          voiceId: 'Mistral 语音'
+        },
+        gemini: {
+          model: 'Gemini TTS 模型',
+          voice: 'Gemini 语音'
+        },
+        neutts: {
+          model: 'NeuTTS 模型',
+          device: 'NeuTTS 设备'
+        },
+        kittentts: {
+          model: 'KittenTTS 模型',
+          voice: 'KittenTTS 语音'
+        },
+        piper: {
+          voice: 'Piper 语音'
         }
       },
       memory: {