From 04c489b5873dae86caa4c99757e004c767e1303f Mon Sep 17 00:00:00 2001
From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
Date: Fri, 24 Apr 2026 00:55:17 +0300
Subject: [PATCH] feat(tui): match CLI's voice slash + VAD-continuous recording
 model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The TUI had drifted from the CLI's voice model in two ways:

- /voice on was lighting up the microphone immediately and Ctrl+B was
  interpreted as a mode toggle.  The CLI separates the two: /voice on
  just flips the umbrella bit, recording only starts once the user
  presses Ctrl+B, which also sets _voice_continuous so the VAD loop
  auto-restarts until the user presses Ctrl+B again or three silent
  cycles pass.
- /voice tts was missing entirely, so users couldn't turn agent reply
  speech on/off from inside the TUI.

This commit brings the TUI to parity.

Python

- hermes_cli/voice.py: continuous-mode API (start_continuous,
  stop_continuous, is_continuous_active) layered on the existing PTT
  wrappers. The silence callback transcribes, fires on_transcript,
  tracks consecutive no-speech cycles, and auto-restarts — mirroring
  cli.py:_voice_stop_and_transcribe + _restart_recording.
- tui_gateway/server.py:
  - voice.toggle now supports on / off / tts / status.  The umbrella
    bit lives in HERMES_VOICE + display.voice_enabled; tts lives in
    HERMES_VOICE_TTS + display.voice_tts.  /voice off also tears down
    any active continuous loop so a toggle-off really releases the
    microphone.
  - voice.record start/stop now drives start_continuous/stop_continuous.
    start is refused with a clear error when the mode is off, matching
    cli.py:handle_voice_record's early return on `not _voice_mode`.
  - New voice.transcript / voice.status events emit through
    _voice_emit (remembers the sid that last enabled the mode so
    events land in the right session).

TypeScript

- gatewayTypes.ts: voice.status + voice.transcript event
  discriminants; VoiceToggleResponse gains tts; VoiceRecordResponse
  gains status for the new "started/stopped" responses.
- interfaces.ts: GatewayEventHandlerContext gains composer.setInput +
  submission.submitRef + voice.{setRecording, setProcessing,
  setVoiceEnabled}; InputHandlerContext.voice gains enabled +
  setVoiceEnabled for the mode-aware Ctrl+B handler.
- createGatewayEventHandler.ts: voice.status drives REC/STT badges;
  voice.transcript auto-submits when the composer is empty (CLI
  _pending_input.put parity) and appends when a draft is in flight.
  no_speech_limit flips voice off + sys line.
- useInputHandlers.ts: Ctrl+B now calls voice.record (start/stop),
  not voice.toggle, and nudges the user with a sys line when the
  mode is off instead of silently flipping it on.
- useMainApp.ts: wires the new event-handler context fields.
- slash/commands/session.ts: /voice handles on / off / tts / status
  with CLI-matching output ("voice: mode on · tts off").

Backward compat preserved for voice.record (was always PTT shape;
gateway still honours start/stop with mode-gating added).
---
 hermes_cli/voice.py                           | 360 +++++++++++++++++-
 tests/hermes_cli/test_voice_wrapper.py        | 202 ++++++++++
 tui_gateway/server.py                         | 147 ++++++-
 .../createGatewayEventHandler.test.ts         |  11 +-
 ui-tui/src/app/createGatewayEventHandler.ts   |  57 +++
 ui-tui/src/app/interfaces.ts                  |  13 +
 ui-tui/src/app/slash/commands/session.ts      |  55 ++-
 ui-tui/src/app/useInputHandlers.ts            |  64 ++--
 ui-tui/src/app/useMainApp.ts                  |  22 +-
 ui-tui/src/gatewayTypes.ts                    |   8 +
 10 files changed, 861 insertions(+), 78 deletions(-)

diff --git a/hermes_cli/voice.py b/hermes_cli/voice.py
index 71f180563..70e097e77 100644
--- a/hermes_cli/voice.py
+++ b/hermes_cli/voice.py
@@ -2,18 +2,31 @@
 
 Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
 (text-to-speech) behind idempotent, stateful entry points that the gateway's
-``voice.record`` and ``voice.tts`` JSON-RPC handlers can call from a
-dedicated thread. The gateway imports this module lazily so missing optional
-audio deps (sounddevice, faster-whisper, numpy) surface as an ``ImportError``
-at call time, not at startup.
+``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
+call from a dedicated thread. The gateway imports this module lazily so that
+missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
+an ``ImportError`` at call time, not at startup.
+
+Two usage modes are exposed:
+
+* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
+  manually-bounded capture used when the caller drives the start/stop pair
+  explicitly.
+* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
+  the classic CLI voice mode: recording auto-stops on silence, transcribes,
+  hands the result to a callback, and then auto-restarts for the next turn.
+  Three consecutive no-speech cycles stop the loop and fire
+  ``on_silent_limit`` so the UI can turn the mode off.
 """
 
 from __future__ import annotations
 
 import json
 import logging
+import os
+import sys
 import threading
-from typing import Optional
+from typing import Any, Callable, Optional
 
 from tools.voice_mode import (
     create_audio_recorder,
@@ -24,15 +37,71 @@ from tools.voice_mode import (
 
 logger = logging.getLogger(__name__)
 
+
+def _debug(msg: str) -> None:
+    """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
+
+    Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
+    which createGatewayEventHandler shows as an Activity line — exactly
+    what we need to diagnose "why didn't the loop auto-restart?" in the
+    user's real terminal without shipping a separate debug RPC.
+    """
+    if os.environ.get("HERMES_VOICE_DEBUG", "").strip() == "1":
+        print(f"[voice] {msg}", file=sys.stderr, flush=True)
+
+
+def _beeps_enabled() -> bool:
+    """CLI parity: voice.beep_enabled in config.yaml (default True)."""
+    try:
+        from hermes_cli.config import load_config
+
+        voice_cfg = load_config().get("voice", {})
+        if isinstance(voice_cfg, dict):
+            return bool(voice_cfg.get("beep_enabled", True))
+    except Exception:
+        pass
+    return True
+
+
+def _play_beep(frequency: int, count: int = 1) -> None:
+    """Audible cue matching cli.py's record/stop beeps.
+
+    880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
+    660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
+    Best-effort — sounddevice failures are silently swallowed so the
+    voice loop never breaks because a speaker was unavailable.
+    """
+    if not _beeps_enabled():
+        return
+    try:
+        from tools.voice_mode import play_beep
+
+        play_beep(frequency=frequency, count=count)
+    except Exception as e:
+        _debug(f"beep {frequency}Hz failed: {e}")
+
+# ── Push-to-talk state ───────────────────────────────────────────────
 _recorder = None
 _recorder_lock = threading.Lock()
 
+# ── Continuous (VAD) state ───────────────────────────────────────────
+_continuous_lock = threading.Lock()
+_continuous_active = False
+_continuous_recorder: Any = None
+_continuous_on_transcript: Optional[Callable[[str], None]] = None
+_continuous_on_status: Optional[Callable[[str], None]] = None
+_continuous_on_silent_limit: Optional[Callable[[], None]] = None
+_continuous_no_speech_count = 0
+_CONTINUOUS_NO_SPEECH_LIMIT = 3
+
+
+# ── Push-to-talk API ─────────────────────────────────────────────────
+
 
 def start_recording() -> None:
-    """Begin capturing from the default input device.
+    """Begin capturing from the default input device (push-to-talk).
 
-    Idempotent — calling again while a recording is in progress is a no-op,
-    which matches the TUI's toggle semantics (Ctrl+B starts, Ctrl+B stops).
+    Idempotent — calling again while a recording is in progress is a no-op.
     """
     global _recorder
 
@@ -40,20 +109,15 @@ def start_recording() -> None:
         if _recorder is not None and getattr(_recorder, "is_recording", False):
             return
         rec = create_audio_recorder()
-        # No silence callback: the TUI drives start/stop explicitly via
-        # the voice.record RPC. VAD auto-stop is a CLI-mode feature.
         rec.start()
         _recorder = rec
 
 
 def stop_and_transcribe() -> Optional[str]:
-    """Stop the active recording, transcribe it, and return the text.
+    """Stop the active push-to-talk recording, transcribe, return text.
 
     Returns ``None`` when no recording is active, when the microphone
-    captured no speech, or when Whisper returned a known hallucination
-    token (silence artefacts like "Thanks for watching!"). The caller
-    treats ``None`` as "no speech detected" and leaves the composer
-    untouched.
+    captured no speech, or when Whisper returned a known hallucination.
     """
     global _recorder
 
@@ -73,27 +137,281 @@ def stop_and_transcribe() -> Optional[str]:
     except Exception as e:
         logger.warning("voice transcription failed: %s", e)
         return None
+    finally:
+        try:
+            if os.path.isfile(wav_path):
+                os.unlink(wav_path)
+        except Exception:
+            pass
 
-    text = (result.get("text") or "").strip()
+    # transcribe_recording returns {"success": bool, "transcript": str, ...}
+    # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
+    if not result.get("success"):
+        return None
+    text = (result.get("transcript") or "").strip()
     if not text or is_whisper_hallucination(text):
         return None
 
     return text
 
 
+# ── Continuous (VAD) API ─────────────────────────────────────────────
+
+
+def start_continuous(
+    on_transcript: Callable[[str], None],
+    on_status: Optional[Callable[[str], None]] = None,
+    on_silent_limit: Optional[Callable[[], None]] = None,
+    silence_threshold: int = 200,
+    silence_duration: float = 3.0,
+) -> None:
+    """Start a VAD-driven continuous recording loop.
+
+    The loop calls ``on_transcript(text)`` each time speech is detected and
+    transcribed successfully, then auto-restarts. After
+    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
+    picked up at all) the loop stops itself and calls ``on_silent_limit``
+    so the UI can reflect "voice off". Idempotent — calling while already
+    active is a no-op.
+
+    ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
+    ``"idle"`` so the UI can show a live indicator.
+    """
+    global _continuous_active, _continuous_recorder
+    global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
+    global _continuous_no_speech_count
+
+    with _continuous_lock:
+        if _continuous_active:
+            _debug("start_continuous: already active — no-op")
+            return
+        _continuous_active = True
+        _continuous_on_transcript = on_transcript
+        _continuous_on_status = on_status
+        _continuous_on_silent_limit = on_silent_limit
+        _continuous_no_speech_count = 0
+
+        if _continuous_recorder is None:
+            _continuous_recorder = create_audio_recorder()
+
+        _continuous_recorder._silence_threshold = silence_threshold
+        _continuous_recorder._silence_duration = silence_duration
+        rec = _continuous_recorder
+
+    _debug(
+        f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
+    )
+
+    # CLI parity: single 880 Hz beep *before* opening the stream — placing
+    # the beep after stream.start() on macOS triggers a CoreAudio conflict
+    # (cli.py:7528 comment).
+    _play_beep(frequency=880, count=1)
+
+    try:
+        rec.start(on_silence_stop=_continuous_on_silence)
+    except Exception as e:
+        logger.error("failed to start continuous recording: %s", e)
+        _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
+        with _continuous_lock:
+            _continuous_active = False
+        raise
+
+    if on_status:
+        try:
+            on_status("listening")
+        except Exception:
+            pass
+
+
+def stop_continuous() -> None:
+    """Stop the active continuous loop and release the microphone.
+
+    Idempotent — calling while not active is a no-op. Any in-flight
+    transcription completes but its result is discarded (the callback
+    checks ``_continuous_active`` before firing).
+    """
+    global _continuous_active, _continuous_on_transcript
+    global _continuous_on_status, _continuous_on_silent_limit
+    global _continuous_recorder, _continuous_no_speech_count
+
+    with _continuous_lock:
+        if not _continuous_active:
+            return
+        _continuous_active = False
+        rec = _continuous_recorder
+        on_status = _continuous_on_status
+        _continuous_on_transcript = None
+        _continuous_on_status = None
+        _continuous_on_silent_limit = None
+        _continuous_no_speech_count = 0
+
+    if rec is not None:
+        try:
+            # cancel() (not stop()) discards buffered frames — the loop
+            # is over, we don't want to transcribe a half-captured turn.
+            rec.cancel()
+        except Exception as e:
+            logger.warning("failed to cancel recorder: %s", e)
+
+    # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
+    # silence-auto-stop path plays).
+    _play_beep(frequency=660, count=2)
+
+    if on_status:
+        try:
+            on_status("idle")
+        except Exception:
+            pass
+
+
+def is_continuous_active() -> bool:
+    """Whether a continuous voice loop is currently running."""
+    with _continuous_lock:
+        return _continuous_active
+
+
+def _continuous_on_silence() -> None:
+    """AudioRecorder silence callback — runs in a daemon thread.
+
+    Stops the current capture, transcribes, delivers the text via
+    ``on_transcript``, and — if the loop is still active — starts the
+    next capture. Three consecutive silent cycles end the loop.
+    """
+    global _continuous_active, _continuous_no_speech_count
+
+    _debug("_continuous_on_silence: fired")
+
+    with _continuous_lock:
+        if not _continuous_active:
+            _debug("_continuous_on_silence: loop inactive — abort")
+            return
+        rec = _continuous_recorder
+        on_transcript = _continuous_on_transcript
+        on_status = _continuous_on_status
+        on_silent_limit = _continuous_on_silent_limit
+
+    if rec is None:
+        _debug("_continuous_on_silence: no recorder — abort")
+        return
+
+    if on_status:
+        try:
+            on_status("transcribing")
+        except Exception:
+            pass
+
+    wav_path = rec.stop()
+    # Peak RMS is the critical diagnostic when stop() returns None despite
+    # the VAD firing — tells us at a glance whether the mic was too quiet
+    # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
+    peak_rms = getattr(rec, "_peak_rms", -1)
+    _debug(
+        f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
+    )
+
+    # CLI parity: double 660 Hz beep after the stream stops (safe from the
+    # CoreAudio conflict that blocks pre-start beeps).
+    _play_beep(frequency=660, count=2)
+
+    transcript: Optional[str] = None
+
+    if wav_path:
+        try:
+            result = transcribe_recording(wav_path)
+            # transcribe_recording returns {"success": bool, "transcript": str,
+            # "error": str?} — NOT {"text": str}.  Using the wrong key silently
+            # produced empty transcripts even when Groq/local STT returned fine,
+            # which masqueraded as "not hearing the user" to the caller.
+            success = bool(result.get("success"))
+            text = (result.get("transcript") or "").strip()
+            err = result.get("error")
+            _debug(
+                f"_continuous_on_silence: transcribe -> success={success} "
+                f"text={text!r} err={err!r}"
+            )
+            if success and text and not is_whisper_hallucination(text):
+                transcript = text
+        except Exception as e:
+            logger.warning("continuous transcription failed: %s", e)
+            _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
+        finally:
+            try:
+                if os.path.isfile(wav_path):
+                    os.unlink(wav_path)
+            except Exception:
+                pass
+
+    with _continuous_lock:
+        if not _continuous_active:
+            # User stopped us while we were transcribing — discard.
+            _debug("_continuous_on_silence: stopped during transcribe — no restart")
+            return
+        if transcript:
+            _continuous_no_speech_count = 0
+        else:
+            _continuous_no_speech_count += 1
+        should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
+        no_speech = _continuous_no_speech_count
+
+    if transcript and on_transcript:
+        try:
+            on_transcript(transcript)
+        except Exception as e:
+            logger.warning("on_transcript callback raised: %s", e)
+
+    if should_halt:
+        _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
+        with _continuous_lock:
+            _continuous_active = False
+            _continuous_no_speech_count = 0
+        if on_silent_limit:
+            try:
+                on_silent_limit()
+            except Exception:
+                pass
+        try:
+            rec.cancel()
+        except Exception:
+            pass
+        if on_status:
+            try:
+                on_status("idle")
+            except Exception:
+                pass
+        return
+
+    # Restart for the next turn.
+    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
+    _play_beep(frequency=880, count=1)
+    try:
+        rec.start(on_silence_stop=_continuous_on_silence)
+    except Exception as e:
+        logger.error("failed to restart continuous recording: %s", e)
+        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
+        with _continuous_lock:
+            _continuous_active = False
+        return
+
+    if on_status:
+        try:
+            on_status("listening")
+        except Exception:
+            pass
+
+
+# ── TTS API ──────────────────────────────────────────────────────────
+
+
 def speak_text(text: str) -> None:
     """Synthesize ``text`` with the configured TTS provider and play it.
 
     The gateway spawns a daemon thread to call this so the RPC returns
-    immediately. Failures are logged and swallowed — the UI already
-    acknowledged "speaking" by the time we get here.
+    immediately. Failures are logged and swallowed.
     """
     if not text or not text.strip():
         return
 
-    # Lazy import — tts_tool pulls optional provider SDKs (OpenAI,
-    # ElevenLabs, etc.) and config-reading machinery that we don't
-    # want to load at module import time.
+    # Lazy import — tts_tool pulls optional provider SDKs.
     from tools.tts_tool import text_to_speech_tool
 
     try:
diff --git a/tests/hermes_cli/test_voice_wrapper.py b/tests/hermes_cli/test_voice_wrapper.py
index f711ec356..a372c1194 100644
--- a/tests/hermes_cli/test_voice_wrapper.py
+++ b/tests/hermes_cli/test_voice_wrapper.py
@@ -51,3 +51,205 @@ class TestSpeakTextGuards:
 
         # Should simply return None without raising.
         assert speak_text(text) is None
+
+
+class TestContinuousAPI:
+    """Continuous (VAD) mode API — CLI-parity loop entry points."""
+
+    def test_continuous_exports(self):
+        from hermes_cli.voice import (
+            is_continuous_active,
+            start_continuous,
+            stop_continuous,
+        )
+
+        assert callable(start_continuous)
+        assert callable(stop_continuous)
+        assert callable(is_continuous_active)
+
+    def test_not_active_by_default(self, monkeypatch):
+        import hermes_cli.voice as voice
+
+        # Isolate from any state left behind by other tests in the session.
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_recorder", None)
+
+        assert voice.is_continuous_active() is False
+
+    def test_stop_continuous_idempotent_when_inactive(self, monkeypatch):
+        """stop_continuous must not raise when no loop is active — the
+        gateway's voice.toggle off path calls it unconditionally."""
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_recorder", None)
+
+        # Should return cleanly without exceptions
+        assert voice.stop_continuous() is None
+        assert voice.is_continuous_active() is False
+
+    def test_double_start_is_idempotent(self, monkeypatch):
+        """A second start_continuous while already active is a no-op — prevents
+        two overlapping capture threads fighting over the microphone when the
+        UI double-fires (e.g. both /voice on and Ctrl+B within the same tick)."""
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(voice, "_continuous_active", True)
+        called = {"n": 0}
+
+        class FakeRecorder:
+            def start(self, on_silence_stop=None):
+                called["n"] += 1
+
+            def cancel(self):
+                pass
+
+        monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder())
+
+        voice.start_continuous(on_transcript=lambda _t: None)
+
+        # The guard inside start_continuous short-circuits before rec.start()
+        assert called["n"] == 0
+
+
+class TestContinuousLoopSimulation:
+    """End-to-end simulation of the VAD loop with a fake recorder.
+
+    Proves auto-restart works: the silence callback must trigger transcribe →
+    on_transcript → re-call rec.start(on_silence_stop=same_cb). Also covers
+    the 3-strikes no-speech halt.
+    """
+
+    @pytest.fixture
+    def fake_recorder(self, monkeypatch):
+        import hermes_cli.voice as voice
+
+        # Reset module state between tests.
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_recorder", None)
+        monkeypatch.setattr(voice, "_continuous_no_speech_count", 0)
+        monkeypatch.setattr(voice, "_continuous_on_transcript", None)
+        monkeypatch.setattr(voice, "_continuous_on_status", None)
+        monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
+
+        class FakeRecorder:
+            _silence_threshold = 200
+            _silence_duration = 3.0
+            is_recording = False
+
+            def __init__(self):
+                self.start_calls = 0
+                self.last_callback = None
+                self.stopped = 0
+                self.cancelled = 0
+                # Preset WAV path returned by stop()
+                self.next_stop_wav = "/tmp/fake.wav"
+
+            def start(self, on_silence_stop=None):
+                self.start_calls += 1
+                self.last_callback = on_silence_stop
+                self.is_recording = True
+
+            def stop(self):
+                self.stopped += 1
+                self.is_recording = False
+                return self.next_stop_wav
+
+            def cancel(self):
+                self.cancelled += 1
+                self.is_recording = False
+
+        rec = FakeRecorder()
+        monkeypatch.setattr(voice, "create_audio_recorder", lambda: rec)
+        # Skip real file ops in the silence callback.
+        monkeypatch.setattr(voice.os.path, "isfile", lambda _p: False)
+        return rec
+
+    def test_loop_auto_restarts_after_transcript(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "hello world"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        statuses = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_status=lambda s: statuses.append(s),
+        )
+
+        assert fake_recorder.start_calls == 1
+        assert statuses == ["listening"]
+
+        # Simulate AudioRecorder's silence detector firing.
+        fake_recorder.last_callback()
+
+        assert transcripts == ["hello world"]
+        assert fake_recorder.start_calls == 2  # auto-restarted
+        assert statuses == ["listening", "transcribing", "listening"]
+        assert voice.is_continuous_active() is True
+
+        voice.stop_continuous()
+
+    def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        # Transcription returns no speech — fake_recorder.stop() returns the
+        # path, but transcribe returns empty text, counting as silence.
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": ""},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        silent_limit_fired = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_silent_limit=lambda: silent_limit_fired.append(True),
+        )
+
+        # Fire silence callback 3 times
+        for _ in range(3):
+            fake_recorder.last_callback()
+
+        assert transcripts == []
+        assert silent_limit_fired == [True]
+        assert voice.is_continuous_active() is False
+        assert fake_recorder.cancelled >= 1
+
+    def test_stop_during_transcription_discards_restart(self, fake_recorder, monkeypatch):
+        """User hits Ctrl+B mid-transcription: the in-flight transcript must
+        still fire (it's a real utterance), but the loop must NOT restart."""
+        import hermes_cli.voice as voice
+
+        stop_triggered = {"flag": False}
+
+        def late_transcribe(_p):
+            # Simulate stop_continuous arriving while we're inside transcribe
+            voice.stop_continuous()
+            stop_triggered["flag"] = True
+            return {"success": True, "transcript": "final word"}
+
+        monkeypatch.setattr(voice, "transcribe_recording", late_transcribe)
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        voice.start_continuous(on_transcript=lambda t: transcripts.append(t))
+
+        initial_starts = fake_recorder.start_calls  # 1
+        fake_recorder.last_callback()
+
+        assert stop_triggered["flag"] is True
+        # Loop is stopped — no auto-restart
+        assert fake_recorder.start_calls == initial_starts
+        # The in-flight transcript was suppressed because we stopped mid-flight
+        assert transcripts == []
+        assert voice.is_continuous_active() is False
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 165b47bf9..130b60576 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -3455,43 +3455,154 @@ def _(rid, params: dict) -> dict:
 # ── Methods: voice ───────────────────────────────────────────────────
 
 
+_voice_sid_lock = threading.Lock()
+_voice_event_sid: str = ""
+
+
+def _voice_emit(event: str, payload: dict | None = None) -> None:
+    """Emit a voice event toward the session that most recently turned the
+    mode on. Voice is process-global (one microphone), so there's only ever
+    one sid to target; the TUI handler treats an empty sid as "active
+    session". Kept separate from _emit to make the lack of per-call sid
+    argument explicit."""
+    with _voice_sid_lock:
+        sid = _voice_event_sid
+    _emit(event, sid, payload)
+
+
+def _voice_mode_enabled() -> bool:
+    """Current voice-mode flag. HERMES_VOICE env var wins over config so
+    the gateway and CLI agree when one of them was launched with an
+    explicit override."""
+    env = os.environ.get("HERMES_VOICE", "").strip()
+    if env in {"0", "1"}:
+        return env == "1"
+    return bool(_load_cfg().get("display", {}).get("voice_enabled", False))
+
+
+def _voice_tts_enabled() -> bool:
+    """Whether agent replies should be spoken back via TTS."""
+    env = os.environ.get("HERMES_VOICE_TTS", "").strip()
+    if env in {"0", "1"}:
+        return env == "1"
+    return bool(_load_cfg().get("display", {}).get("voice_tts", False))
+
+
 @method("voice.toggle")
 def _(rid, params: dict) -> dict:
+    """CLI parity for the ``/voice`` slash command.
+
+    Subcommands:
+
+    * ``status`` — report mode + TTS flags (default when action is unknown).
+    * ``on`` / ``off`` — flip voice *mode* (the umbrella bit). Turning it
+      off also tears down any active continuous recording loop. Does NOT
+      start recording on its own; recording is driven by ``voice.record``
+      (Ctrl+B) after mode is on, matching cli.py's enable/Ctrl+B split.
+    * ``tts`` — toggle speech-output of agent replies. Requires mode on
+      (mirrors CLI's _toggle_voice_tts guard).
+    """
     action = params.get("action", "status")
+
     if action == "status":
-        env = os.environ.get("HERMES_VOICE", "").strip()
-        if env in {"0", "1"}:
-            return _ok(rid, {"enabled": env == "1"})
-        return _ok(
-            rid,
-            {
-                "enabled": bool(
-                    _load_cfg().get("display", {}).get("voice_enabled", False)
-                )
-            },
-        )
+        # Mirror CLI's _show_voice_status: include STT/TTS provider
+        # availability so the user can tell at a glance *why* voice mode
+        # isn't working ("STT provider: MISSING ..." is the common case).
+        payload: dict = {
+            "enabled": _voice_mode_enabled(),
+            "tts": _voice_tts_enabled(),
+        }
+        try:
+            from tools.voice_mode import check_voice_requirements
+
+            reqs = check_voice_requirements()
+            payload["available"] = bool(reqs.get("available"))
+            payload["audio_available"] = bool(reqs.get("audio_available"))
+            payload["stt_available"] = bool(reqs.get("stt_available"))
+            payload["details"] = reqs.get("details") or ""
+        except Exception as e:
+            # check_voice_requirements pulls optional transcription deps —
+            # swallow so /voice status always returns something useful.
+            logger.warning("voice.toggle status: requirements probe failed: %s", e)
+
+        return _ok(rid, payload)
+
     if action in ("on", "off"):
         enabled = action == "on"
         os.environ["HERMES_VOICE"] = "1" if enabled else "0"
         _write_config_key("display.voice_enabled", enabled)
-        return _ok(rid, {"enabled": action == "on"})
+
+        if not enabled:
+            # Disabling the mode must tear the continuous loop down; the
+            # loop holds the microphone and would otherwise keep running.
+            try:
+                from hermes_cli.voice import stop_continuous
+
+                stop_continuous()
+            except ImportError:
+                pass
+            except Exception as e:
+                logger.warning("voice: stop_continuous failed during toggle off: %s", e)
+
+        return _ok(rid, {"enabled": enabled, "tts": _voice_tts_enabled()})
+
+    if action == "tts":
+        if not _voice_mode_enabled():
+            return _err(rid, 4014, "enable voice mode first: /voice on")
+        new_value = not _voice_tts_enabled()
+        os.environ["HERMES_VOICE_TTS"] = "1" if new_value else "0"
+        _write_config_key("display.voice_tts", new_value)
+        return _ok(rid, {"enabled": True, "tts": new_value})
+
     return _err(rid, 4013, f"unknown voice action: {action}")
 
 
 @method("voice.record")
 def _(rid, params: dict) -> dict:
+    """VAD-driven continuous record loop, CLI-parity.
+
+    ``start`` turns on a VAD loop that emits ``voice.transcript`` events
+    for each detected utterance and auto-restarts for the next turn.
+    ``stop`` halts the loop (manual stop; matches cli.py's Ctrl+B-while-
+    recording branch clearing ``_voice_continuous``). Three consecutive
+    silent cycles stop the loop automatically and emit a
+    ``voice.transcript`` with ``no_speech_limit=True``.
+    """
     action = params.get("action", "start")
+
+    if action not in {"start", "stop"}:
+        return _err(rid, 4019, f"unknown voice action: {action}")
+
     try:
         if action == "start":
-            from hermes_cli.voice import start_recording
+            if not _voice_mode_enabled():
+                return _err(rid, 4015, "voice mode is off — enable with /voice on")
 
-            start_recording()
+            with _voice_sid_lock:
+                global _voice_event_sid
+                _voice_event_sid = params.get("session_id") or _voice_event_sid
+
+            from hermes_cli.voice import start_continuous
+
+            voice_cfg = _load_cfg().get("voice", {})
+            start_continuous(
+                on_transcript=lambda t: _voice_emit(
+                    "voice.transcript", {"text": t}
+                ),
+                on_status=lambda s: _voice_emit("voice.status", {"state": s}),
+                on_silent_limit=lambda: _voice_emit(
+                    "voice.transcript", {"no_speech_limit": True}
+                ),
+                silence_threshold=voice_cfg.get("silence_threshold", 200),
+                silence_duration=voice_cfg.get("silence_duration", 3.0),
+            )
             return _ok(rid, {"status": "recording"})
-        if action == "stop":
-            from hermes_cli.voice import stop_and_transcribe
 
-            return _ok(rid, {"text": stop_and_transcribe() or ""})
-        return _err(rid, 4019, f"unknown voice action: {action}")
+        # action == "stop"
+        from hermes_cli.voice import stop_continuous
+
+        stop_continuous()
+        return _ok(rid, {"status": "stopped"})
     except ImportError:
         return _err(
             rid, 5025, "voice module not available — install audio dependencies"
diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
index 23f7c4646..ef55d807c 100644
--- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
+++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts
@@ -15,7 +15,8 @@ const buildCtx = (appended: Msg[]) =>
     composer: {
       dequeue: () => undefined,
       queueEditRef: ref<null | number>(null),
-      sendQueued: vi.fn()
+      sendQueued: vi.fn(),
+      setInput: vi.fn()
     },
     gateway: {
       gw: { request: vi.fn() },
@@ -29,6 +30,9 @@ const buildCtx = (appended: Msg[]) =>
       resumeById: vi.fn(),
       setCatalog: vi.fn()
     },
+    submission: {
+      submitRef: { current: vi.fn() }
+    },
     system: {
       bellOnComplete: false,
       sys: vi.fn()
@@ -38,6 +42,11 @@ const buildCtx = (appended: Msg[]) =>
       panel: (title: string, sections: any[]) =>
         appended.push({ kind: 'panel', panelData: { sections, title }, role: 'system', text: '' }),
       setHistoryItems: vi.fn()
+    },
+    voice: {
+      setProcessing: vi.fn(),
+      setRecording: vi.fn(),
+      setVoiceEnabled: vi.fn()
     }
   }) as any
 
diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts
index 1ec123f11..377735ca9 100644
--- a/ui-tui/src/app/createGatewayEventHandler.ts
+++ b/ui-tui/src/app/createGatewayEventHandler.ts
@@ -51,6 +51,9 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
   const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session
   const { bellOnComplete, stdout, sys } = ctx.system
   const { appendMessage, panel, setHistoryItems } = ctx.transcript
+  const { setInput } = ctx.composer
+  const { submitRef } = ctx.submission
+  const { setProcessing: setVoiceProcessing, setRecording: setVoiceRecording, setVoiceEnabled } = ctx.voice
 
   let pendingThinkingStatus = ''
   let thinkingStatusTimer: null | ReturnType<typeof setTimeout> = null
@@ -261,6 +264,60 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
         return
       }
 
+      case 'voice.status': {
+        // Continuous VAD loop reports its internal state so the status bar
+        // can show listening / transcribing / idle without polling.
+        const state = String(ev.payload?.state ?? '')
+
+        if (state === 'listening') {
+          setVoiceRecording(true)
+          setVoiceProcessing(false)
+        } else if (state === 'transcribing') {
+          setVoiceRecording(false)
+          setVoiceProcessing(true)
+        } else {
+          setVoiceRecording(false)
+          setVoiceProcessing(false)
+        }
+
+        return
+      }
+
+      case 'voice.transcript': {
+        // CLI parity: the 3-strikes silence detector flipped off automatically.
+        // Mirror that on the UI side and tell the user why the mode is off.
+        if (ev.payload?.no_speech_limit) {
+          setVoiceEnabled(false)
+          setVoiceRecording(false)
+          setVoiceProcessing(false)
+          sys('voice: no speech detected 3 times, continuous mode stopped')
+
+          return
+        }
+
+        const text = String(ev.payload?.text ?? '').trim()
+
+        if (!text) {
+          return
+        }
+
+        // Match CLI's _pending_input.put(transcript): auto-submit when the
+        // composer is empty, otherwise append so the user can keep editing
+        // a partial draft they were working on.
+        setInput(prev => {
+          if (!prev) {
+            // defer submit so React commits the state change first
+            setTimeout(() => submitRef.current(text), 0)
+
+            return ''
+          }
+
+          return `${prev}${/\s$/.test(prev) ? '' : ' '}${text}`
+        })
+
+        return
+      }
+
       case 'gateway.start_timeout': {
         const { cwd, python } = ev.payload ?? {}
         const trace = python || cwd ? ` · ${String(python || '')} ${String(cwd || '')}`.trim() : ''
diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index c1c427739..81def036c 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -189,9 +189,11 @@ export interface InputHandlerContext {
     stdout?: NodeJS.WriteStream
   }
   voice: {
+    enabled: boolean
     recording: boolean
     setProcessing: StateSetter<boolean>
     setRecording: StateSetter<boolean>
+    setVoiceEnabled: StateSetter<boolean>
   }
   wheelStep: number
 }
@@ -201,6 +203,9 @@ export interface InputHandlerResult {
 }
 
 export interface GatewayEventHandlerContext {
+  composer: {
+    setInput: StateSetter<string>
+  }
   gateway: GatewayServices
   session: {
     STARTUP_RESUME_ID: string
@@ -210,6 +215,9 @@ export interface GatewayEventHandlerContext {
     resumeById: (id: string) => void
     setCatalog: StateSetter<null | SlashCatalog>
   }
+  submission: {
+    submitRef: MutableRefObject<(value: string) => void>
+  }
   system: {
     bellOnComplete: boolean
     stdout?: NodeJS.WriteStream
@@ -220,6 +228,11 @@ export interface GatewayEventHandlerContext {
     panel: (title: string, sections: PanelSection[]) => void
     setHistoryItems: StateSetter<Msg[]>
   }
+  voice: {
+    setProcessing: StateSetter<boolean>
+    setRecording: StateSetter<boolean>
+    setVoiceEnabled: StateSetter<boolean>
+  }
 }
 
 export interface SlashHandlerContext {
diff --git a/ui-tui/src/app/slash/commands/session.ts b/ui-tui/src/app/slash/commands/session.ts
index 90a1beb3f..cf36fee6c 100644
--- a/ui-tui/src/app/slash/commands/session.ts
+++ b/ui-tui/src/app/slash/commands/session.ts
@@ -184,15 +184,64 @@ export const sessionCommands: SlashCommand[] = [
   },
 
   {
-    help: 'toggle voice input',
+    help: 'voice mode: [on|off|tts|status]',
     name: 'voice',
     run: (arg, ctx) => {
-      const action = arg === 'on' || arg === 'off' ? arg : 'status'
+      const normalized = (arg ?? '').trim().toLowerCase()
+
+      const action =
+        normalized === 'on' || normalized === 'off' || normalized === 'tts' || normalized === 'status'
+          ? normalized
+          : 'status'
 
       ctx.gateway.rpc<VoiceToggleResponse>('voice.toggle', { action }).then(
         ctx.guarded<VoiceToggleResponse>(r => {
           ctx.voice.setVoiceEnabled(!!r.enabled)
-          ctx.transcript.sys(`voice: ${r.enabled ? 'on — press Ctrl+B to record' : 'off'}`)
+
+          // Match CLI's _show_voice_status / _enable_voice_mode /
+          // _toggle_voice_tts output shape so users don't have to learn
+          // two vocabularies.
+          if (action === 'status') {
+            const mode = r.enabled ? 'ON' : 'OFF'
+            const tts = r.tts ? 'ON' : 'OFF'
+            ctx.transcript.sys('Voice Mode Status')
+            ctx.transcript.sys(`  Mode:       ${mode}`)
+            ctx.transcript.sys(`  TTS:        ${tts}`)
+            ctx.transcript.sys('  Record key: Ctrl+B')
+
+            // CLI's "Requirements:" block — surfaces STT/audio setup issues
+            // so the user sees "STT provider: MISSING ..." instead of
+            // silently failing on every Ctrl+B press.
+            if (r.details) {
+              ctx.transcript.sys('')
+              ctx.transcript.sys('  Requirements:')
+
+              for (const line of r.details.split('\n')) {
+                if (line.trim()) {
+                  ctx.transcript.sys(`    ${line}`)
+                }
+              }
+            }
+
+            return
+          }
+
+          if (action === 'tts') {
+            ctx.transcript.sys(`Voice TTS ${r.tts ? 'enabled' : 'disabled'}.`)
+
+            return
+          }
+
+          // on/off — mirror cli.py:_enable_voice_mode's 3-line output
+          if (r.enabled) {
+            const tts = r.tts ? ' (TTS enabled)' : ''
+            ctx.transcript.sys(`Voice mode enabled${tts}`)
+            ctx.transcript.sys('  Ctrl+B to start/stop recording')
+            ctx.transcript.sys('  /voice tts  to toggle speech output')
+            ctx.transcript.sys('  /voice off  to disable voice mode')
+          } else {
+            ctx.transcript.sys('Voice mode disabled.')
+          }
         })
       )
     }
diff --git a/ui-tui/src/app/useInputHandlers.ts b/ui-tui/src/app/useInputHandlers.ts
index cfc3eed7c..47fe8a216 100644
--- a/ui-tui/src/app/useInputHandlers.ts
+++ b/ui-tui/src/app/useInputHandlers.ts
@@ -134,45 +134,43 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
     }
   }
 
-  const voiceStop = () => {
-    voice.setRecording(false)
-    voice.setProcessing(true)
+  // CLI parity: Ctrl+B toggles the VAD-driven continuous recording loop
+  // (NOT the voice-mode umbrella bit). The mode is enabled via /voice on;
+  // Ctrl+B while the mode is off sys-nudges the user. While the mode is
+  // on, the first press starts a continuous loop (gateway → start_continuous,
+  // VAD auto-stop → transcribe → auto-restart), a subsequent press stops it.
+  // The gateway publishes voice.status + voice.transcript events that
+  // createGatewayEventHandler turns into UI badges and composer injection.
+  const voiceRecordToggle = () => {
+    if (!voice.enabled) {
+      return actions.sys('voice: mode is off — enable with /voice on')
+    }
+
+    const starting = !voice.recording
+    const action = starting ? 'start' : 'stop'
+
+    // Optimistic UI — flip the REC badge immediately so the user gets
+    // feedback while the RPC round-trips; the voice.status event is the
+    // authoritative source and may correct us.
+    if (starting) {
+      voice.setRecording(true)
+    } else {
+      voice.setRecording(false)
+      voice.setProcessing(false)
+    }
 
     gateway
-      .rpc<VoiceRecordResponse>('voice.record', { action: 'stop' })
-      .then(r => {
-        if (!r) {
-          return
+      .rpc<VoiceRecordResponse>('voice.record', { action })
+      .catch((e: Error) => {
+        // Revert optimistic UI on failure.
+        if (starting) {
+          voice.setRecording(false)
         }
 
-        const transcript = String(r.text || '').trim()
-
-        if (!transcript) {
-          return actions.sys('voice: no speech detected')
-        }
-
-        cActions.setInput(prev => (prev ? `${prev}${/\s$/.test(prev) ? '' : ' '}${transcript}` : transcript))
-      })
-      .catch((e: Error) => actions.sys(`voice error: ${e.message}`))
-      .finally(() => {
-        voice.setProcessing(false)
-        patchUiState({ status: 'ready' })
+        actions.sys(`voice error: ${e.message}`)
       })
   }
 
-  const voiceStart = () =>
-    gateway
-      .rpc<VoiceRecordResponse>('voice.record', { action: 'start' })
-      .then(r => {
-        if (!r) {
-          return
-        }
-
-        voice.setRecording(true)
-        patchUiState({ status: 'recording…' })
-      })
-      .catch((e: Error) => actions.sys(`voice error: ${e.message}`))
-
   useInput((ch, key) => {
     const live = getUiState()
 
@@ -371,7 +369,7 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
     }
 
     if (isVoiceToggleKey(key, ch)) {
-      return voice.recording ? voiceStop() : voiceStart()
+      return voiceRecordToggle()
     }
 
     if (isAction(key, ch, 'g')) {
diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts
index 41edcc828..c061aa5dd 100644
--- a/ui-tui/src/app/useMainApp.ts
+++ b/ui-tui/src/app/useMainApp.ts
@@ -454,13 +454,20 @@ export function useMainApp(gw: GatewayClient) {
     composer: { actions: composerActions, refs: composerRefs, state: composerState },
     gateway,
     terminal: { hasSelection, scrollRef, scrollWithSelection, selection, stdout },
-    voice: { recording: voiceRecording, setProcessing: setVoiceProcessing, setRecording: setVoiceRecording },
+    voice: {
+      enabled: voiceEnabled,
+      recording: voiceRecording,
+      setProcessing: setVoiceProcessing,
+      setRecording: setVoiceRecording,
+      setVoiceEnabled
+    },
     wheelStep: WHEEL_SCROLL_STEP
   })
 
   const onEvent = useMemo(
     () =>
       createGatewayEventHandler({
+        composer: { setInput: composerActions.setInput },
         gateway,
         session: {
           STARTUP_RESUME_ID,
@@ -470,18 +477,29 @@ export function useMainApp(gw: GatewayClient) {
           resumeById: session.resumeById,
           setCatalog
         },
+        submission: { submitRef },
         system: { bellOnComplete, stdout, sys },
-        transcript: { appendMessage, panel, setHistoryItems }
+        transcript: { appendMessage, panel, setHistoryItems },
+        voice: {
+          setProcessing: setVoiceProcessing,
+          setRecording: setVoiceRecording,
+          setVoiceEnabled
+        }
       }),
     [
       appendMessage,
       bellOnComplete,
+      composerActions.setInput,
       gateway,
       panel,
       session.newSession,
       session.resetSession,
       session.resumeById,
+      setVoiceEnabled,
+      setVoiceProcessing,
+      setVoiceRecording,
       stdout,
+      submitRef,
       sys
     ]
   )
diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts
index 1dc8ea5be..05f8d9a41 100644
--- a/ui-tui/src/gatewayTypes.ts
+++ b/ui-tui/src/gatewayTypes.ts
@@ -236,10 +236,16 @@ export interface ImageAttachResponse {
 // ── Voice ────────────────────────────────────────────────────────────
 
 export interface VoiceToggleResponse {
+  audio_available?: boolean
+  available?: boolean
+  details?: string
   enabled?: boolean
+  stt_available?: boolean
+  tts?: boolean
 }
 
 export interface VoiceRecordResponse {
+  status?: string
   text?: string
 }
 
@@ -368,6 +374,8 @@ export type GatewayEvent =
   | { payload?: { text?: string }; session_id?: string; type: 'thinking.delta' }
   | { payload?: undefined; session_id?: string; type: 'message.start' }
   | { payload?: { kind?: string; text?: string }; session_id?: string; type: 'status.update' }
+  | { payload?: { state?: 'idle' | 'listening' | 'transcribing' }; session_id?: string; type: 'voice.status' }
+  | { payload?: { no_speech_limit?: boolean; text?: string }; session_id?: string; type: 'voice.transcript' }
   | { payload: { line: string }; session_id?: string; type: 'gateway.stderr' }
   | { payload?: { cwd?: string; python?: string }; session_id?: string; type: 'gateway.start_timeout' }
   | { payload?: { preview?: string }; session_id?: string; type: 'gateway.protocol_error' }