From be8223130b6edf7e36fe4dbc22f4295972bf6c0a Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Mon, 4 May 2026 15:15:54 -0500 Subject: [PATCH] fix(tui): harden voice.record + TextInput paste + super-mod reserved list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three round-7 Copilot follow-ups on #19835: - voice.record start handler used _load_cfg().get('voice', {}).get(...) without shape checks, so malformed YAML (bool/scalar/list) returned 5025 instead of using VAD defaults. Centralized _voice_cfg_dict() helper and type-guarded silence_threshold/silence_duration with numeric fallbacks. - TextInput pass-through check moved above paste/copy handling so configured voice chords (ctrl+v / alt+v / cmd+v) beat the composer's paste/copy defaults. - parser now also rejects super+{c,d,l,v} — on macOS those are copy/exit/clear/paste and would be advertised in /voice status but never actually toggle recording. --- tests/test_tui_gateway_server.py | 36 +++++++++++++++++++++++++++ tui_gateway/server.py | 34 +++++++++++++++---------- ui-tui/src/__tests__/platform.test.ts | 19 +++++++++++--- ui-tui/src/components/textInput.tsx | 15 ++++++----- ui-tui/src/lib/platform.ts | 18 ++++++++++++++ 5 files changed, 100 insertions(+), 22 deletions(-) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 7953022e6c2..dbaf8750bc6 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -156,6 +156,42 @@ def test_voice_toggle_handles_non_dict_voice_cfg(monkeypatch): ) +def test_voice_record_start_handles_non_dict_voice_cfg(monkeypatch): + """Round-7 Copilot review regression on #19835. + + The ``voice.record`` start path previously read + ``_load_cfg().get("voice", {}).get(...)`` without any shape checks. + When ``voice`` is a non-dict (bool/scalar/list) ``get`` raises + AttributeError and the handler returns 5025 instead of falling + back to the VAD defaults. Now it uses ``_voice_cfg_dict()`` and + non-numeric silence values are coerced to the documented defaults. + """ + captured: dict = {} + + def fake_start_continuous(**kwargs): + captured.update(kwargs) + + monkeypatch.setitem( + sys.modules, + "hermes_cli.voice", + types.SimpleNamespace(start_continuous=fake_start_continuous, stop_continuous=lambda: None), + ) + monkeypatch.setenv("HERMES_VOICE", "1") + + for bad in (True, "cmd+b", None, 42, ["ctrl+b"], {"silence_threshold": "loud"}): + captured.clear() + monkeypatch.setattr(server, "_load_cfg", lambda b=bad: {"voice": b}) + + resp = server.dispatch( + {"id": "voice-record", "method": "voice.record", "params": {"action": "start"}} + ) + + assert "result" in resp, f"voice.record raised for voice={bad!r}: {resp.get('error')}" + assert resp["result"]["status"] == "recording" + assert captured["silence_threshold"] == 200 + assert captured["silence_duration"] == 3.0 + + def test_voice_toggle_tts_branch_also_carries_record_key(monkeypatch): """Round-2 Copilot review regression on #19835. diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 50de17d5c19..33749ffb884 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -5278,22 +5278,26 @@ def _voice_tts_enabled() -> bool: return os.environ.get("HERMES_VOICE_TTS", "").strip() == "1" -def _voice_record_key() -> str: - """Current ``voice.record_key`` value, documented default on error. +def _voice_cfg_dict() -> dict: + """Shape-safe accessor for the ``voice:`` block in config.yaml. ``_load_cfg()`` returns raw ``yaml.safe_load()`` output, so both the root AND ``voice`` may be any YAML scalar / list / None. A hand-edit - like ``voice: true`` or ``voice: cmd+b`` (string where a dict is - expected) or even a malformed top-level config that parses to a - scalar would otherwise break ``.get("record_key")`` and take every - ``voice.toggle`` branch down with it (Copilot round-3/round-4 - review on #19835). Coerce through ``isinstance`` at every level so - malformed config falls back to the documented default instead of - crashing /voice. + like ``voice: true`` or a malformed top-level config that parses to + a scalar would otherwise break ``.get("…")`` and take every + ``voice.*`` branch down with it (Copilot round-3..7 review on + #19835). Coerce through ``isinstance`` at every level so malformed + config falls back to an empty dict instead of crashing /voice. """ cfg = _load_cfg() voice_cfg = cfg.get("voice") if isinstance(cfg, dict) else None - record_key = voice_cfg.get("record_key") if isinstance(voice_cfg, dict) else None + + return voice_cfg if isinstance(voice_cfg, dict) else {} + + +def _voice_record_key() -> str: + """Current ``voice.record_key`` value, documented default on error.""" + record_key = _voice_cfg_dict().get("record_key") return str(record_key) if isinstance(record_key, str) and record_key else "ctrl+b" @@ -5419,15 +5423,19 @@ def _(rid, params: dict) -> dict: from hermes_cli.voice import start_continuous - voice_cfg = _load_cfg().get("voice", {}) + # Shape-safe lookups: malformed ``voice:`` YAML (bool/scalar/list) + # must not crash /voice with a 5025 — fall back to VAD defaults. + voice_cfg = _voice_cfg_dict() + threshold = voice_cfg.get("silence_threshold") + duration = voice_cfg.get("silence_duration") start_continuous( on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}), on_status=lambda s: _voice_emit("voice.status", {"state": s}), on_silent_limit=lambda: _voice_emit( "voice.transcript", {"no_speech_limit": True} ), - silence_threshold=voice_cfg.get("silence_threshold", 200), - silence_duration=voice_cfg.get("silence_duration", 3.0), + silence_threshold=threshold if isinstance(threshold, (int, float)) else 200, + silence_duration=duration if isinstance(duration, (int, float)) else 3.0, ) return _ok(rid, {"status": "recording"}) diff --git a/ui-tui/src/__tests__/platform.test.ts b/ui-tui/src/__tests__/platform.test.ts index 6f77c619751..74fc9b5a4aa 100644 --- a/ui-tui/src/__tests__/platform.test.ts +++ b/ui-tui/src/__tests__/platform.test.ts @@ -207,10 +207,23 @@ describe('parseVoiceRecordKey (#18994)', () => { expect(parseVoiceRecordKey('ctrl+c')).toEqual(DEFAULT_VOICE_RECORD_KEY) expect(parseVoiceRecordKey('ctrl+d')).toEqual(DEFAULT_VOICE_RECORD_KEY) expect(parseVoiceRecordKey('ctrl+l')).toEqual(DEFAULT_VOICE_RECORD_KEY) - // Alt- and super-modifier versions of those letters are NOT - // intercepted, so they remain usable. + // Alt-modifier versions of those letters are NOT intercepted, so + // they remain usable. expect(parseVoiceRecordKey('alt+c').mod).toBe('alt') - expect(parseVoiceRecordKey('super+l').mod).toBe('super') + }) + + it('rejects super+{c,d,l,v} — mac action-mod chords are claimed before voice', async () => { + const { DEFAULT_VOICE_RECORD_KEY, parseVoiceRecordKey } = await importPlatform('linux') + + // On macOS super+c/d/l/v are copy / exit / clear / paste. Reject at + // parse time so /voice status doesn't advertise dead bindings. + expect(parseVoiceRecordKey('super+c')).toEqual(DEFAULT_VOICE_RECORD_KEY) + expect(parseVoiceRecordKey('super+d')).toEqual(DEFAULT_VOICE_RECORD_KEY) + expect(parseVoiceRecordKey('super+l')).toEqual(DEFAULT_VOICE_RECORD_KEY) + expect(parseVoiceRecordKey('super+v')).toEqual(DEFAULT_VOICE_RECORD_KEY) + // Other super letters still work (no global chord claims them). + expect(parseVoiceRecordKey('super+b').mod).toBe('super') + expect(parseVoiceRecordKey('super+o').mod).toBe('super') }) // Round-5 Copilot review regressions on #19835. diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx index c12ddeb3b1d..d8151e72b72 100644 --- a/ui-tui/src/components/textInput.tsx +++ b/ui-tui/src/components/textInput.tsx @@ -707,6 +707,15 @@ export function TextInput({ (inp: string, k: Key, event: InputEvent) => { const eventRaw = event.keypress.raw + // Configured voice shortcut wins over composer-level defaults like + // paste/copy so users who bind voice to ctrl+v / alt+v / cmd+v + // actually get voice toggled instead of a paste (Copilot round-7 + // follow-up on #19835). The pass-through predicate is a no-op for + // ordinary typing and plain paste when voice is unbound to 'v'. + if (shouldPassThroughToGlobalHandler(inp, k, voiceRecordKey)) { + return + } + if ( eventRaw === '\x1bv' || eventRaw === '\x1bV' || @@ -752,12 +761,6 @@ export function TextInput({ return } - // Chords claimed by useInputHandlers — pass through instead of letting - // them fall into text-editing behavior here. - if (shouldPassThroughToGlobalHandler(inp, k, voiceRecordKey)) { - return - } - if (k.return) { if (k.shift || k.ctrl || (isMac ? isActionMod(k) : k.meta)) { flushParentChange() diff --git a/ui-tui/src/lib/platform.ts b/ui-tui/src/lib/platform.ts index 0eb372d345f..e8a786290aa 100644 --- a/ui-tui/src/lib/platform.ts +++ b/ui-tui/src/lib/platform.ts @@ -138,6 +138,16 @@ const _NAMED_KEY_ALIASES: Record = { * round-4 review on #19835). */ const _RESERVED_CTRL_CHARS = new Set(['c', 'd', 'l']) +/** On macOS the action-modifier also intercepts standard editor chords + * via ``isCopyShortcut`` / ``isAction`` in ``useInputHandlers()``: + * - super+c → copy + * - super+d → exit + * - super+l → clear screen + * - super+v → paste (also claimed at the TextInput layer) + * Reject at parse time for the same reason as ``_RESERVED_CTRL_CHARS`` + * (Copilot round-7 review on #19835). */ +const _RESERVED_SUPER_CHARS = new Set(['c', 'd', 'l', 'v']) + interface RuntimeKeyEvent { alt?: boolean backspace?: boolean @@ -247,6 +257,14 @@ export const parseVoiceRecordKey = (raw: unknown): ParsedVoiceRecordKey => { return DEFAULT_VOICE_RECORD_KEY } + // Same for ``super+c`` / ``super+d`` / ``super+l`` / ``super+v`` — + // on macOS those are copy / exit / clear / paste and get claimed + // by ``isCopyShortcut`` / ``isAction`` / the TextInput paste layer + // before voice has a chance to toggle. + if (mod === 'super' && last.length === 1 && _RESERVED_SUPER_CHARS.has(last)) { + return DEFAULT_VOICE_RECORD_KEY + } + if (last.length === 1) { return { ch: last, mod, raw: lower } }