mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-19 10:02:16 +00:00
fix(tui): harden voice.record + TextInput paste + super-mod reserved list
Three round-7 Copilot follow-ups on #19835: - voice.record start handler used _load_cfg().get('voice', {}).get(...) without shape checks, so malformed YAML (bool/scalar/list) returned 5025 instead of using VAD defaults. Centralized _voice_cfg_dict() helper and type-guarded silence_threshold/silence_duration with numeric fallbacks. - TextInput pass-through check moved above paste/copy handling so configured voice chords (ctrl+v / alt+v / cmd+v) beat the composer's paste/copy defaults. - parser now also rejects super+{c,d,l,v} — on macOS those are copy/exit/clear/paste and would be advertised in /voice status but never actually toggle recording.
This commit is contained in:
parent
ff4e478fcc
commit
be8223130b
5 changed files with 100 additions and 22 deletions
|
|
@ -156,6 +156,42 @@ def test_voice_toggle_handles_non_dict_voice_cfg(monkeypatch):
|
|||
)
|
||||
|
||||
|
||||
def test_voice_record_start_handles_non_dict_voice_cfg(monkeypatch):
|
||||
"""Round-7 Copilot review regression on #19835.
|
||||
|
||||
The ``voice.record`` start path previously read
|
||||
``_load_cfg().get("voice", {}).get(...)`` without any shape checks.
|
||||
When ``voice`` is a non-dict (bool/scalar/list) ``get`` raises
|
||||
AttributeError and the handler returns 5025 instead of falling
|
||||
back to the VAD defaults. Now it uses ``_voice_cfg_dict()`` and
|
||||
non-numeric silence values are coerced to the documented defaults.
|
||||
"""
|
||||
captured: dict = {}
|
||||
|
||||
def fake_start_continuous(**kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
monkeypatch.setitem(
|
||||
sys.modules,
|
||||
"hermes_cli.voice",
|
||||
types.SimpleNamespace(start_continuous=fake_start_continuous, stop_continuous=lambda: None),
|
||||
)
|
||||
monkeypatch.setenv("HERMES_VOICE", "1")
|
||||
|
||||
for bad in (True, "cmd+b", None, 42, ["ctrl+b"], {"silence_threshold": "loud"}):
|
||||
captured.clear()
|
||||
monkeypatch.setattr(server, "_load_cfg", lambda b=bad: {"voice": b})
|
||||
|
||||
resp = server.dispatch(
|
||||
{"id": "voice-record", "method": "voice.record", "params": {"action": "start"}}
|
||||
)
|
||||
|
||||
assert "result" in resp, f"voice.record raised for voice={bad!r}: {resp.get('error')}"
|
||||
assert resp["result"]["status"] == "recording"
|
||||
assert captured["silence_threshold"] == 200
|
||||
assert captured["silence_duration"] == 3.0
|
||||
|
||||
|
||||
def test_voice_toggle_tts_branch_also_carries_record_key(monkeypatch):
|
||||
"""Round-2 Copilot review regression on #19835.
|
||||
|
||||
|
|
|
|||
|
|
@ -5278,22 +5278,26 @@ def _voice_tts_enabled() -> bool:
|
|||
return os.environ.get("HERMES_VOICE_TTS", "").strip() == "1"
|
||||
|
||||
|
||||
def _voice_record_key() -> str:
|
||||
"""Current ``voice.record_key`` value, documented default on error.
|
||||
def _voice_cfg_dict() -> dict:
|
||||
"""Shape-safe accessor for the ``voice:`` block in config.yaml.
|
||||
|
||||
``_load_cfg()`` returns raw ``yaml.safe_load()`` output, so both the
|
||||
root AND ``voice`` may be any YAML scalar / list / None. A hand-edit
|
||||
like ``voice: true`` or ``voice: cmd+b`` (string where a dict is
|
||||
expected) or even a malformed top-level config that parses to a
|
||||
scalar would otherwise break ``.get("record_key")`` and take every
|
||||
``voice.toggle`` branch down with it (Copilot round-3/round-4
|
||||
review on #19835). Coerce through ``isinstance`` at every level so
|
||||
malformed config falls back to the documented default instead of
|
||||
crashing /voice.
|
||||
like ``voice: true`` or a malformed top-level config that parses to
|
||||
a scalar would otherwise break ``.get("…")`` and take every
|
||||
``voice.*`` branch down with it (Copilot round-3..7 review on
|
||||
#19835). Coerce through ``isinstance`` at every level so malformed
|
||||
config falls back to an empty dict instead of crashing /voice.
|
||||
"""
|
||||
cfg = _load_cfg()
|
||||
voice_cfg = cfg.get("voice") if isinstance(cfg, dict) else None
|
||||
record_key = voice_cfg.get("record_key") if isinstance(voice_cfg, dict) else None
|
||||
|
||||
return voice_cfg if isinstance(voice_cfg, dict) else {}
|
||||
|
||||
|
||||
def _voice_record_key() -> str:
|
||||
"""Current ``voice.record_key`` value, documented default on error."""
|
||||
record_key = _voice_cfg_dict().get("record_key")
|
||||
|
||||
return str(record_key) if isinstance(record_key, str) and record_key else "ctrl+b"
|
||||
|
||||
|
|
@ -5419,15 +5423,19 @@ def _(rid, params: dict) -> dict:
|
|||
|
||||
from hermes_cli.voice import start_continuous
|
||||
|
||||
voice_cfg = _load_cfg().get("voice", {})
|
||||
# Shape-safe lookups: malformed ``voice:`` YAML (bool/scalar/list)
|
||||
# must not crash /voice with a 5025 — fall back to VAD defaults.
|
||||
voice_cfg = _voice_cfg_dict()
|
||||
threshold = voice_cfg.get("silence_threshold")
|
||||
duration = voice_cfg.get("silence_duration")
|
||||
start_continuous(
|
||||
on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}),
|
||||
on_status=lambda s: _voice_emit("voice.status", {"state": s}),
|
||||
on_silent_limit=lambda: _voice_emit(
|
||||
"voice.transcript", {"no_speech_limit": True}
|
||||
),
|
||||
silence_threshold=voice_cfg.get("silence_threshold", 200),
|
||||
silence_duration=voice_cfg.get("silence_duration", 3.0),
|
||||
silence_threshold=threshold if isinstance(threshold, (int, float)) else 200,
|
||||
silence_duration=duration if isinstance(duration, (int, float)) else 3.0,
|
||||
)
|
||||
return _ok(rid, {"status": "recording"})
|
||||
|
||||
|
|
|
|||
|
|
@ -207,10 +207,23 @@ describe('parseVoiceRecordKey (#18994)', () => {
|
|||
expect(parseVoiceRecordKey('ctrl+c')).toEqual(DEFAULT_VOICE_RECORD_KEY)
|
||||
expect(parseVoiceRecordKey('ctrl+d')).toEqual(DEFAULT_VOICE_RECORD_KEY)
|
||||
expect(parseVoiceRecordKey('ctrl+l')).toEqual(DEFAULT_VOICE_RECORD_KEY)
|
||||
// Alt- and super-modifier versions of those letters are NOT
|
||||
// intercepted, so they remain usable.
|
||||
// Alt-modifier versions of those letters are NOT intercepted, so
|
||||
// they remain usable.
|
||||
expect(parseVoiceRecordKey('alt+c').mod).toBe('alt')
|
||||
expect(parseVoiceRecordKey('super+l').mod).toBe('super')
|
||||
})
|
||||
|
||||
it('rejects super+{c,d,l,v} — mac action-mod chords are claimed before voice', async () => {
|
||||
const { DEFAULT_VOICE_RECORD_KEY, parseVoiceRecordKey } = await importPlatform('linux')
|
||||
|
||||
// On macOS super+c/d/l/v are copy / exit / clear / paste. Reject at
|
||||
// parse time so /voice status doesn't advertise dead bindings.
|
||||
expect(parseVoiceRecordKey('super+c')).toEqual(DEFAULT_VOICE_RECORD_KEY)
|
||||
expect(parseVoiceRecordKey('super+d')).toEqual(DEFAULT_VOICE_RECORD_KEY)
|
||||
expect(parseVoiceRecordKey('super+l')).toEqual(DEFAULT_VOICE_RECORD_KEY)
|
||||
expect(parseVoiceRecordKey('super+v')).toEqual(DEFAULT_VOICE_RECORD_KEY)
|
||||
// Other super letters still work (no global chord claims them).
|
||||
expect(parseVoiceRecordKey('super+b').mod).toBe('super')
|
||||
expect(parseVoiceRecordKey('super+o').mod).toBe('super')
|
||||
})
|
||||
|
||||
// Round-5 Copilot review regressions on #19835.
|
||||
|
|
|
|||
|
|
@ -707,6 +707,15 @@ export function TextInput({
|
|||
(inp: string, k: Key, event: InputEvent) => {
|
||||
const eventRaw = event.keypress.raw
|
||||
|
||||
// Configured voice shortcut wins over composer-level defaults like
|
||||
// paste/copy so users who bind voice to ctrl+v / alt+v / cmd+v
|
||||
// actually get voice toggled instead of a paste (Copilot round-7
|
||||
// follow-up on #19835). The pass-through predicate is a no-op for
|
||||
// ordinary typing and plain paste when voice is unbound to 'v'.
|
||||
if (shouldPassThroughToGlobalHandler(inp, k, voiceRecordKey)) {
|
||||
return
|
||||
}
|
||||
|
||||
if (
|
||||
eventRaw === '\x1bv' ||
|
||||
eventRaw === '\x1bV' ||
|
||||
|
|
@ -752,12 +761,6 @@ export function TextInput({
|
|||
return
|
||||
}
|
||||
|
||||
// Chords claimed by useInputHandlers — pass through instead of letting
|
||||
// them fall into text-editing behavior here.
|
||||
if (shouldPassThroughToGlobalHandler(inp, k, voiceRecordKey)) {
|
||||
return
|
||||
}
|
||||
|
||||
if (k.return) {
|
||||
if (k.shift || k.ctrl || (isMac ? isActionMod(k) : k.meta)) {
|
||||
flushParentChange()
|
||||
|
|
|
|||
|
|
@ -138,6 +138,16 @@ const _NAMED_KEY_ALIASES: Record<string, VoiceRecordKeyNamed> = {
|
|||
* round-4 review on #19835). */
|
||||
const _RESERVED_CTRL_CHARS = new Set(['c', 'd', 'l'])
|
||||
|
||||
/** On macOS the action-modifier also intercepts standard editor chords
|
||||
* via ``isCopyShortcut`` / ``isAction`` in ``useInputHandlers()``:
|
||||
* - super+c → copy
|
||||
* - super+d → exit
|
||||
* - super+l → clear screen
|
||||
* - super+v → paste (also claimed at the TextInput layer)
|
||||
* Reject at parse time for the same reason as ``_RESERVED_CTRL_CHARS``
|
||||
* (Copilot round-7 review on #19835). */
|
||||
const _RESERVED_SUPER_CHARS = new Set(['c', 'd', 'l', 'v'])
|
||||
|
||||
interface RuntimeKeyEvent {
|
||||
alt?: boolean
|
||||
backspace?: boolean
|
||||
|
|
@ -247,6 +257,14 @@ export const parseVoiceRecordKey = (raw: unknown): ParsedVoiceRecordKey => {
|
|||
return DEFAULT_VOICE_RECORD_KEY
|
||||
}
|
||||
|
||||
// Same for ``super+c`` / ``super+d`` / ``super+l`` / ``super+v`` —
|
||||
// on macOS those are copy / exit / clear / paste and get claimed
|
||||
// by ``isCopyShortcut`` / ``isAction`` / the TextInput paste layer
|
||||
// before voice has a chance to toggle.
|
||||
if (mod === 'super' && last.length === 1 && _RESERVED_SUPER_CHARS.has(last)) {
|
||||
return DEFAULT_VOICE_RECORD_KEY
|
||||
}
|
||||
|
||||
if (last.length === 1) {
|
||||
return { ch: last, mod, raw: lower }
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue