fix(tui): harden voice.record + TextInput paste + super-mod reserved list

Three round-7 Copilot follow-ups on #19835: - voice.record start handler used _load_cfg().get('voice', {}).get(...) without shape checks, so malformed YAML (bool/scalar/list) returned 5025 instead of using VAD defaults. Centralized _voice_cfg_dict() helper and type-guarded silence_threshold/silence_duration with numeric fallbacks. - TextInput pass-through check moved above paste/copy handling so configured voice chords (ctrl+v / alt+v / cmd+v) beat the composer's paste/copy defaults. - parser now also rejects super+{c,d,l,v} — on macOS those are copy/exit/clear/paste and would be advertised in /voice status but never actually toggle recording.
2026-06-19 10:02:16 +00:00 · 2026-05-04 15:15:54 -05:00 · 2026-05-04 15:15:54 -05:00 · be8223130b
commit be8223130b
parent ff4e478fcc
5 changed files with 100 additions and 22 deletions
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@ -156,6 +156,42 @@ def test_voice_toggle_handles_non_dict_voice_cfg(monkeypatch):
        )


+def test_voice_record_start_handles_non_dict_voice_cfg(monkeypatch):
+    """Round-7 Copilot review regression on #19835.
+
+    The ``voice.record`` start path previously read
+    ``_load_cfg().get("voice", {}).get(...)`` without any shape checks.
+    When ``voice`` is a non-dict (bool/scalar/list) ``get`` raises
+    AttributeError and the handler returns 5025 instead of falling
+    back to the VAD defaults. Now it uses ``_voice_cfg_dict()`` and
+    non-numeric silence values are coerced to the documented defaults.
+    """
+    captured: dict = {}
+
+    def fake_start_continuous(**kwargs):
+        captured.update(kwargs)
+
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.voice",
+        types.SimpleNamespace(start_continuous=fake_start_continuous, stop_continuous=lambda: None),
+    )
+    monkeypatch.setenv("HERMES_VOICE", "1")
+
+    for bad in (True, "cmd+b", None, 42, ["ctrl+b"], {"silence_threshold": "loud"}):
+        captured.clear()
+        monkeypatch.setattr(server, "_load_cfg", lambda b=bad: {"voice": b})
+
+        resp = server.dispatch(
+            {"id": "voice-record", "method": "voice.record", "params": {"action": "start"}}
+        )
+
+        assert "result" in resp, f"voice.record raised for voice={bad!r}: {resp.get('error')}"
+        assert resp["result"]["status"] == "recording"
+        assert captured["silence_threshold"] == 200
+        assert captured["silence_duration"] == 3.0
+
+
 def test_voice_toggle_tts_branch_also_carries_record_key(monkeypatch):
    """Round-2 Copilot review regression on #19835.

--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@ -5278,22 +5278,26 @@ def _voice_tts_enabled() -> bool:
    return os.environ.get("HERMES_VOICE_TTS", "").strip() == "1"


-def _voice_record_key() -> str:
-    """Current ``voice.record_key`` value, documented default on error.
+def _voice_cfg_dict() -> dict:
+    """Shape-safe accessor for the ``voice:`` block in config.yaml.

    ``_load_cfg()`` returns raw ``yaml.safe_load()`` output, so both the
    root AND ``voice`` may be any YAML scalar / list / None. A hand-edit
-    like ``voice: true`` or ``voice: cmd+b`` (string where a dict is
-    expected) or even a malformed top-level config that parses to a
-    scalar would otherwise break ``.get("record_key")`` and take every
-    ``voice.toggle`` branch down with it (Copilot round-3/round-4
-    review on #19835). Coerce through ``isinstance`` at every level so
-    malformed config falls back to the documented default instead of
-    crashing /voice.
+    like ``voice: true`` or a malformed top-level config that parses to
+    a scalar would otherwise break ``.get("…")`` and take every
+    ``voice.*`` branch down with it (Copilot round-3..7 review on
+    #19835). Coerce through ``isinstance`` at every level so malformed
+    config falls back to an empty dict instead of crashing /voice.
    """
    cfg = _load_cfg()
    voice_cfg = cfg.get("voice") if isinstance(cfg, dict) else None
-    record_key = voice_cfg.get("record_key") if isinstance(voice_cfg, dict) else None
+
+    return voice_cfg if isinstance(voice_cfg, dict) else {}
+
+
+def _voice_record_key() -> str:
+    """Current ``voice.record_key`` value, documented default on error."""
+    record_key = _voice_cfg_dict().get("record_key")

    return str(record_key) if isinstance(record_key, str) and record_key else "ctrl+b"

@ -5419,15 +5423,19 @@ def _(rid, params: dict) -> dict:

            from hermes_cli.voice import start_continuous

-            voice_cfg = _load_cfg().get("voice", {})
+            # Shape-safe lookups: malformed ``voice:`` YAML (bool/scalar/list)
+            # must not crash /voice with a 5025 — fall back to VAD defaults.
+            voice_cfg = _voice_cfg_dict()
+            threshold = voice_cfg.get("silence_threshold")
+            duration = voice_cfg.get("silence_duration")
            start_continuous(
                on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}),
                on_status=lambda s: _voice_emit("voice.status", {"state": s}),
                on_silent_limit=lambda: _voice_emit(
                    "voice.transcript", {"no_speech_limit": True}
                ),
-                silence_threshold=voice_cfg.get("silence_threshold", 200),
-                silence_duration=voice_cfg.get("silence_duration", 3.0),
+                silence_threshold=threshold if isinstance(threshold, (int, float)) else 200,
+                silence_duration=duration if isinstance(duration, (int, float)) else 3.0,
            )
            return _ok(rid, {"status": "recording"})

--- a/ui-tui/src/tests/platform.test.ts
+++ b/ui-tui/src/tests/platform.test.ts
@ -207,10 +207,23 @@ describe('parseVoiceRecordKey (#18994)', () => {
    expect(parseVoiceRecordKey('ctrl+c')).toEqual(DEFAULT_VOICE_RECORD_KEY)
    expect(parseVoiceRecordKey('ctrl+d')).toEqual(DEFAULT_VOICE_RECORD_KEY)
    expect(parseVoiceRecordKey('ctrl+l')).toEqual(DEFAULT_VOICE_RECORD_KEY)
-    // Alt- and super-modifier versions of those letters are NOT
-    // intercepted, so they remain usable.
+    // Alt-modifier versions of those letters are NOT intercepted, so
+    // they remain usable.
    expect(parseVoiceRecordKey('alt+c').mod).toBe('alt')
-    expect(parseVoiceRecordKey('super+l').mod).toBe('super')
+  })
+
+  it('rejects super+{c,d,l,v} — mac action-mod chords are claimed before voice', async () => {
+    const { DEFAULT_VOICE_RECORD_KEY, parseVoiceRecordKey } = await importPlatform('linux')
+
+    // On macOS super+c/d/l/v are copy / exit / clear / paste. Reject at
+    // parse time so /voice status doesn't advertise dead bindings.
+    expect(parseVoiceRecordKey('super+c')).toEqual(DEFAULT_VOICE_RECORD_KEY)
+    expect(parseVoiceRecordKey('super+d')).toEqual(DEFAULT_VOICE_RECORD_KEY)
+    expect(parseVoiceRecordKey('super+l')).toEqual(DEFAULT_VOICE_RECORD_KEY)
+    expect(parseVoiceRecordKey('super+v')).toEqual(DEFAULT_VOICE_RECORD_KEY)
+    // Other super letters still work (no global chord claims them).
+    expect(parseVoiceRecordKey('super+b').mod).toBe('super')
+    expect(parseVoiceRecordKey('super+o').mod).toBe('super')
  })

  // Round-5 Copilot review regressions on #19835.
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@ -707,6 +707,15 @@ export function TextInput({
    (inp: string, k: Key, event: InputEvent) => {
      const eventRaw = event.keypress.raw

+      // Configured voice shortcut wins over composer-level defaults like
+      // paste/copy so users who bind voice to ctrl+v / alt+v / cmd+v
+      // actually get voice toggled instead of a paste (Copilot round-7
+      // follow-up on #19835). The pass-through predicate is a no-op for
+      // ordinary typing and plain paste when voice is unbound to 'v'.
+      if (shouldPassThroughToGlobalHandler(inp, k, voiceRecordKey)) {
+        return
+      }
+
      if (
        eventRaw === '\x1bv' ||
        eventRaw === '\x1bV' ||
@ -752,12 +761,6 @@ export function TextInput({
        return
      }

-      // Chords claimed by useInputHandlers — pass through instead of letting
-      // them fall into text-editing behavior here.
-      if (shouldPassThroughToGlobalHandler(inp, k, voiceRecordKey)) {
-        return
-      }
-
      if (k.return) {
        if (k.shift || k.ctrl || (isMac ? isActionMod(k) : k.meta)) {
          flushParentChange()
--- a/ui-tui/src/lib/platform.ts
+++ b/ui-tui/src/lib/platform.ts
@ -138,6 +138,16 @@ const _NAMED_KEY_ALIASES: Record<string, VoiceRecordKeyNamed> = {
 * round-4 review on #19835). */
 const _RESERVED_CTRL_CHARS = new Set(['c', 'd', 'l'])

+/** On macOS the action-modifier also intercepts standard editor chords
+ * via ``isCopyShortcut`` / ``isAction`` in ``useInputHandlers()``:
+ *  - super+c → copy
+ *  - super+d → exit
+ *  - super+l → clear screen
+ *  - super+v → paste (also claimed at the TextInput layer)
+ * Reject at parse time for the same reason as ``_RESERVED_CTRL_CHARS``
+ * (Copilot round-7 review on #19835). */
+const _RESERVED_SUPER_CHARS = new Set(['c', 'd', 'l', 'v'])
+
 interface RuntimeKeyEvent {
  alt?: boolean
  backspace?: boolean
@ -247,6 +257,14 @@ export const parseVoiceRecordKey = (raw: unknown): ParsedVoiceRecordKey => {
    return DEFAULT_VOICE_RECORD_KEY
  }

+  // Same for ``super+c`` / ``super+d`` / ``super+l`` / ``super+v`` —
+  // on macOS those are copy / exit / clear / paste and get claimed
+  // by ``isCopyShortcut`` / ``isAction`` / the TextInput paste layer
+  // before voice has a chance to toggle.
+  if (mod === 'super' && last.length === 1 && _RESERVED_SUPER_CHARS.has(last)) {
+    return DEFAULT_VOICE_RECORD_KEY
+  }
+
  if (last.length === 1) {
    return { ch: last, mod, raw: lower }
  }