mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix: voice mode race conditions, temp file leak, think tag parsing
- Atomic check-and-set for _voice_recording flag with _voice_lock - Guard _voice_stop_and_transcribe against concurrent invocation - Remove premature flag clearing from Ctrl+R handler - Clean up temp WAV files in finally block (_play_via_tempfile) - Use buffer-level regex for <think> block filtering (handles chunked tags) - Prevent /voice on prompt accumulation on repeated calls - Include Groq in STT key error message
This commit is contained in:
parent
7d4b4e95f1
commit
3a1b35ed92
2 changed files with 46 additions and 41 deletions
|
|
@ -519,10 +519,11 @@ def stream_tts_to_speaker(
|
|||
output_stream = None
|
||||
|
||||
sentence_buf = ""
|
||||
in_think = False # track <think>...</think> blocks
|
||||
min_sentence_len = 20
|
||||
long_flush_len = 100
|
||||
queue_timeout = 0.5
|
||||
# Regex to strip complete <think>...</think> blocks from buffer
|
||||
_think_block_re = re.compile(r'<think[\s>].*?</think>', flags=re.DOTALL)
|
||||
|
||||
def _speak_sentence(sentence: str):
|
||||
"""Display sentence and optionally generate + play audio."""
|
||||
|
|
@ -562,6 +563,7 @@ def stream_tts_to_speaker(
|
|||
|
||||
def _play_via_tempfile(audio_iter, stop_evt):
|
||||
"""Write PCM chunks to a temp WAV file and play it."""
|
||||
tmp_path = None
|
||||
try:
|
||||
import wave
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||
|
|
@ -576,9 +578,14 @@ def stream_tts_to_speaker(
|
|||
wf.writeframes(chunk)
|
||||
from tools.voice_mode import play_audio_file
|
||||
play_audio_file(tmp_path)
|
||||
os.unlink(tmp_path)
|
||||
except Exception as exc:
|
||||
logger.warning("Temp-file TTS fallback failed: %s", exc)
|
||||
finally:
|
||||
if tmp_path:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
while not stop_event.is_set():
|
||||
# Read next delta from queue
|
||||
|
|
@ -592,41 +599,24 @@ def stream_tts_to_speaker(
|
|||
continue
|
||||
|
||||
if delta is None:
|
||||
# End-of-text sentinel: flush remaining buffer
|
||||
# End-of-text sentinel: strip any remaining think blocks, flush
|
||||
sentence_buf = _think_block_re.sub('', sentence_buf)
|
||||
if sentence_buf.strip():
|
||||
_speak_sentence(sentence_buf)
|
||||
break
|
||||
|
||||
sentence_buf += delta
|
||||
|
||||
# --- Think block filtering ---
|
||||
# Process delta character by character for think tags
|
||||
i = 0
|
||||
filtered_delta = []
|
||||
while i < len(delta):
|
||||
# Check for opening <think tag
|
||||
if delta[i:].startswith("<think"):
|
||||
in_think = True
|
||||
# Skip past the tag
|
||||
end = delta.find(">", i)
|
||||
if end != -1:
|
||||
i = end + 1
|
||||
else:
|
||||
i = len(delta)
|
||||
continue
|
||||
# Check for closing </think> tag
|
||||
if delta[i:].startswith("</think>"):
|
||||
in_think = False
|
||||
i += len("</think>")
|
||||
continue
|
||||
if not in_think:
|
||||
filtered_delta.append(delta[i])
|
||||
i += 1
|
||||
# Strip complete <think>...</think> blocks from buffer.
|
||||
# Works correctly even when tags span multiple deltas.
|
||||
sentence_buf = _think_block_re.sub('', sentence_buf)
|
||||
|
||||
text = "".join(filtered_delta)
|
||||
if not text:
|
||||
# If an incomplete <think tag is at the end, wait for more data
|
||||
# before extracting sentences (the closing tag may arrive next).
|
||||
if '<think' in sentence_buf and '</think>' not in sentence_buf:
|
||||
continue
|
||||
|
||||
sentence_buf += text
|
||||
|
||||
# Check for sentence boundaries
|
||||
while True:
|
||||
m = _SENTENCE_BOUNDARY_RE.search(sentence_buf)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue