From be8223130b6edf7e36fe4dbc22f4295972bf6c0a Mon Sep 17 00:00:00 2001
From: Brooklyn Nicholson <brooklyn.bb.nicholson@gmail.com>
Date: Mon, 4 May 2026 15:15:54 -0500
Subject: [PATCH] fix(tui): harden voice.record + TextInput paste + super-mod
 reserved list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three round-7 Copilot follow-ups on #19835:

- voice.record start handler used _load_cfg().get('voice', {}).get(...) without
  shape checks, so malformed YAML (bool/scalar/list) returned 5025 instead of
  using VAD defaults. Centralized _voice_cfg_dict() helper and type-guarded
  silence_threshold/silence_duration with numeric fallbacks.
- TextInput pass-through check moved above paste/copy handling so configured
  voice chords (ctrl+v / alt+v / cmd+v) beat the composer's paste/copy
  defaults.
- parser now also rejects super+{c,d,l,v} — on macOS those are
  copy/exit/clear/paste and would be advertised in /voice status but never
  actually toggle recording.
---
 tests/test_tui_gateway_server.py      | 36 +++++++++++++++++++++++++++
 tui_gateway/server.py                 | 34 +++++++++++++++----------
 ui-tui/src/__tests__/platform.test.ts | 19 +++++++++++---
 ui-tui/src/components/textInput.tsx   | 15 ++++++-----
 ui-tui/src/lib/platform.ts            | 18 ++++++++++++++
 5 files changed, 100 insertions(+), 22 deletions(-)

diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py
index 7953022e6c2..dbaf8750bc6 100644
--- a/tests/test_tui_gateway_server.py
+++ b/tests/test_tui_gateway_server.py
@@ -156,6 +156,42 @@ def test_voice_toggle_handles_non_dict_voice_cfg(monkeypatch):
         )
 
 
+def test_voice_record_start_handles_non_dict_voice_cfg(monkeypatch):
+    """Round-7 Copilot review regression on #19835.
+
+    The ``voice.record`` start path previously read
+    ``_load_cfg().get("voice", {}).get(...)`` without any shape checks.
+    When ``voice`` is a non-dict (bool/scalar/list) ``get`` raises
+    AttributeError and the handler returns 5025 instead of falling
+    back to the VAD defaults. Now it uses ``_voice_cfg_dict()`` and
+    non-numeric silence values are coerced to the documented defaults.
+    """
+    captured: dict = {}
+
+    def fake_start_continuous(**kwargs):
+        captured.update(kwargs)
+
+    monkeypatch.setitem(
+        sys.modules,
+        "hermes_cli.voice",
+        types.SimpleNamespace(start_continuous=fake_start_continuous, stop_continuous=lambda: None),
+    )
+    monkeypatch.setenv("HERMES_VOICE", "1")
+
+    for bad in (True, "cmd+b", None, 42, ["ctrl+b"], {"silence_threshold": "loud"}):
+        captured.clear()
+        monkeypatch.setattr(server, "_load_cfg", lambda b=bad: {"voice": b})
+
+        resp = server.dispatch(
+            {"id": "voice-record", "method": "voice.record", "params": {"action": "start"}}
+        )
+
+        assert "result" in resp, f"voice.record raised for voice={bad!r}: {resp.get('error')}"
+        assert resp["result"]["status"] == "recording"
+        assert captured["silence_threshold"] == 200
+        assert captured["silence_duration"] == 3.0
+
+
 def test_voice_toggle_tts_branch_also_carries_record_key(monkeypatch):
     """Round-2 Copilot review regression on #19835.
 
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 50de17d5c19..33749ffb884 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -5278,22 +5278,26 @@ def _voice_tts_enabled() -> bool:
     return os.environ.get("HERMES_VOICE_TTS", "").strip() == "1"
 
 
-def _voice_record_key() -> str:
-    """Current ``voice.record_key`` value, documented default on error.
+def _voice_cfg_dict() -> dict:
+    """Shape-safe accessor for the ``voice:`` block in config.yaml.
 
     ``_load_cfg()`` returns raw ``yaml.safe_load()`` output, so both the
     root AND ``voice`` may be any YAML scalar / list / None. A hand-edit
-    like ``voice: true`` or ``voice: cmd+b`` (string where a dict is
-    expected) or even a malformed top-level config that parses to a
-    scalar would otherwise break ``.get("record_key")`` and take every
-    ``voice.toggle`` branch down with it (Copilot round-3/round-4
-    review on #19835). Coerce through ``isinstance`` at every level so
-    malformed config falls back to the documented default instead of
-    crashing /voice.
+    like ``voice: true`` or a malformed top-level config that parses to
+    a scalar would otherwise break ``.get("…")`` and take every
+    ``voice.*`` branch down with it (Copilot round-3..7 review on
+    #19835). Coerce through ``isinstance`` at every level so malformed
+    config falls back to an empty dict instead of crashing /voice.
     """
     cfg = _load_cfg()
     voice_cfg = cfg.get("voice") if isinstance(cfg, dict) else None
-    record_key = voice_cfg.get("record_key") if isinstance(voice_cfg, dict) else None
+
+    return voice_cfg if isinstance(voice_cfg, dict) else {}
+
+
+def _voice_record_key() -> str:
+    """Current ``voice.record_key`` value, documented default on error."""
+    record_key = _voice_cfg_dict().get("record_key")
 
     return str(record_key) if isinstance(record_key, str) and record_key else "ctrl+b"
 
@@ -5419,15 +5423,19 @@ def _(rid, params: dict) -> dict:
 
             from hermes_cli.voice import start_continuous
 
-            voice_cfg = _load_cfg().get("voice", {})
+            # Shape-safe lookups: malformed ``voice:`` YAML (bool/scalar/list)
+            # must not crash /voice with a 5025 — fall back to VAD defaults.
+            voice_cfg = _voice_cfg_dict()
+            threshold = voice_cfg.get("silence_threshold")
+            duration = voice_cfg.get("silence_duration")
             start_continuous(
                 on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}),
                 on_status=lambda s: _voice_emit("voice.status", {"state": s}),
                 on_silent_limit=lambda: _voice_emit(
                     "voice.transcript", {"no_speech_limit": True}
                 ),
-                silence_threshold=voice_cfg.get("silence_threshold", 200),
-                silence_duration=voice_cfg.get("silence_duration", 3.0),
+                silence_threshold=threshold if isinstance(threshold, (int, float)) else 200,
+                silence_duration=duration if isinstance(duration, (int, float)) else 3.0,
             )
             return _ok(rid, {"status": "recording"})
 
diff --git a/ui-tui/src/__tests__/platform.test.ts b/ui-tui/src/__tests__/platform.test.ts
index 6f77c619751..74fc9b5a4aa 100644
--- a/ui-tui/src/__tests__/platform.test.ts
+++ b/ui-tui/src/__tests__/platform.test.ts
@@ -207,10 +207,23 @@ describe('parseVoiceRecordKey (#18994)', () => {
     expect(parseVoiceRecordKey('ctrl+c')).toEqual(DEFAULT_VOICE_RECORD_KEY)
     expect(parseVoiceRecordKey('ctrl+d')).toEqual(DEFAULT_VOICE_RECORD_KEY)
     expect(parseVoiceRecordKey('ctrl+l')).toEqual(DEFAULT_VOICE_RECORD_KEY)
-    // Alt- and super-modifier versions of those letters are NOT
-    // intercepted, so they remain usable.
+    // Alt-modifier versions of those letters are NOT intercepted, so
+    // they remain usable.
     expect(parseVoiceRecordKey('alt+c').mod).toBe('alt')
-    expect(parseVoiceRecordKey('super+l').mod).toBe('super')
+  })
+
+  it('rejects super+{c,d,l,v} — mac action-mod chords are claimed before voice', async () => {
+    const { DEFAULT_VOICE_RECORD_KEY, parseVoiceRecordKey } = await importPlatform('linux')
+
+    // On macOS super+c/d/l/v are copy / exit / clear / paste. Reject at
+    // parse time so /voice status doesn't advertise dead bindings.
+    expect(parseVoiceRecordKey('super+c')).toEqual(DEFAULT_VOICE_RECORD_KEY)
+    expect(parseVoiceRecordKey('super+d')).toEqual(DEFAULT_VOICE_RECORD_KEY)
+    expect(parseVoiceRecordKey('super+l')).toEqual(DEFAULT_VOICE_RECORD_KEY)
+    expect(parseVoiceRecordKey('super+v')).toEqual(DEFAULT_VOICE_RECORD_KEY)
+    // Other super letters still work (no global chord claims them).
+    expect(parseVoiceRecordKey('super+b').mod).toBe('super')
+    expect(parseVoiceRecordKey('super+o').mod).toBe('super')
   })
 
   // Round-5 Copilot review regressions on #19835.
diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx
index c12ddeb3b1d..d8151e72b72 100644
--- a/ui-tui/src/components/textInput.tsx
+++ b/ui-tui/src/components/textInput.tsx
@@ -707,6 +707,15 @@ export function TextInput({
     (inp: string, k: Key, event: InputEvent) => {
       const eventRaw = event.keypress.raw
 
+      // Configured voice shortcut wins over composer-level defaults like
+      // paste/copy so users who bind voice to ctrl+v / alt+v / cmd+v
+      // actually get voice toggled instead of a paste (Copilot round-7
+      // follow-up on #19835). The pass-through predicate is a no-op for
+      // ordinary typing and plain paste when voice is unbound to 'v'.
+      if (shouldPassThroughToGlobalHandler(inp, k, voiceRecordKey)) {
+        return
+      }
+
       if (
         eventRaw === '\x1bv' ||
         eventRaw === '\x1bV' ||
@@ -752,12 +761,6 @@ export function TextInput({
         return
       }
 
-      // Chords claimed by useInputHandlers — pass through instead of letting
-      // them fall into text-editing behavior here.
-      if (shouldPassThroughToGlobalHandler(inp, k, voiceRecordKey)) {
-        return
-      }
-
       if (k.return) {
         if (k.shift || k.ctrl || (isMac ? isActionMod(k) : k.meta)) {
           flushParentChange()
diff --git a/ui-tui/src/lib/platform.ts b/ui-tui/src/lib/platform.ts
index 0eb372d345f..e8a786290aa 100644
--- a/ui-tui/src/lib/platform.ts
+++ b/ui-tui/src/lib/platform.ts
@@ -138,6 +138,16 @@ const _NAMED_KEY_ALIASES: Record<string, VoiceRecordKeyNamed> = {
  * round-4 review on #19835). */
 const _RESERVED_CTRL_CHARS = new Set(['c', 'd', 'l'])
 
+/** On macOS the action-modifier also intercepts standard editor chords
+ * via ``isCopyShortcut`` / ``isAction`` in ``useInputHandlers()``:
+ *  - super+c → copy
+ *  - super+d → exit
+ *  - super+l → clear screen
+ *  - super+v → paste (also claimed at the TextInput layer)
+ * Reject at parse time for the same reason as ``_RESERVED_CTRL_CHARS``
+ * (Copilot round-7 review on #19835). */
+const _RESERVED_SUPER_CHARS = new Set(['c', 'd', 'l', 'v'])
+
 interface RuntimeKeyEvent {
   alt?: boolean
   backspace?: boolean
@@ -247,6 +257,14 @@ export const parseVoiceRecordKey = (raw: unknown): ParsedVoiceRecordKey => {
     return DEFAULT_VOICE_RECORD_KEY
   }
 
+  // Same for ``super+c`` / ``super+d`` / ``super+l`` / ``super+v`` —
+  // on macOS those are copy / exit / clear / paste and get claimed
+  // by ``isCopyShortcut`` / ``isAction`` / the TextInput paste layer
+  // before voice has a chance to toggle.
+  if (mod === 'super' && last.length === 1 && _RESERVED_SUPER_CHARS.has(last)) {
+    return DEFAULT_VOICE_RECORD_KEY
+  }
+
   if (last.length === 1) {
     return { ch: last, mod, raw: lower }
   }