mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 01:21:43 +00:00
Merge branch 'main' into hermes/delegation-readiness-doctor-clean
This commit is contained in:
commit
cb855b84a3
56 changed files with 4915 additions and 218 deletions
|
|
@ -11,5 +11,5 @@ Provides subcommands for:
|
|||
- hermes cron - Manage cron jobs
|
||||
"""
|
||||
|
||||
__version__ = "0.10.0"
|
||||
__release_date__ = "2026.4.16"
|
||||
__version__ = "0.11.0"
|
||||
__release_date__ = "2026.4.23"
|
||||
|
|
|
|||
|
|
@ -739,6 +739,10 @@ DEFAULT_CONFIG = {
|
|||
"inherit_mcp_toolsets": True,
|
||||
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
|
||||
# independent of the parent's max_iterations)
|
||||
"child_timeout_seconds": 600, # wall-clock timeout for each child agent (floor 30s,
|
||||
# no ceiling). High-reasoning models on large tasks
|
||||
# (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
|
||||
# raise if children time out before producing output.
|
||||
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
|
||||
# "low", "minimal", "none" (empty = inherit parent's level)
|
||||
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
|
|||
("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")),
|
||||
("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")),
|
||||
("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")),
|
||||
("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")),
|
||||
])
|
||||
|
||||
|
||||
|
|
|
|||
548
hermes_cli/voice.py
Normal file
548
hermes_cli/voice.py
Normal file
|
|
@ -0,0 +1,548 @@
|
|||
"""Process-wide voice recording + TTS API for the TUI gateway.
|
||||
|
||||
Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
|
||||
(text-to-speech) behind idempotent, stateful entry points that the gateway's
|
||||
``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
|
||||
call from a dedicated thread. The gateway imports this module lazily so that
|
||||
missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
|
||||
an ``ImportError`` at call time, not at startup.
|
||||
|
||||
Two usage modes are exposed:
|
||||
|
||||
* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
|
||||
manually-bounded capture used when the caller drives the start/stop pair
|
||||
explicitly.
|
||||
* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
|
||||
the classic CLI voice mode: recording auto-stops on silence, transcribes,
|
||||
hands the result to a callback, and then auto-restarts for the next turn.
|
||||
Three consecutive no-speech cycles stop the loop and fire
|
||||
``on_silent_limit`` so the UI can turn the mode off.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from tools.voice_mode import (
|
||||
create_audio_recorder,
|
||||
is_whisper_hallucination,
|
||||
play_audio_file,
|
||||
transcribe_recording,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _debug(msg: str) -> None:
|
||||
"""Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
|
||||
|
||||
Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
|
||||
which createGatewayEventHandler shows as an Activity line — exactly
|
||||
what we need to diagnose "why didn't the loop auto-restart?" in the
|
||||
user's real terminal without shipping a separate debug RPC.
|
||||
|
||||
Any OSError / BrokenPipeError is swallowed because this fires from
|
||||
background threads (silence callback, TTS daemon, beep) where a
|
||||
broken stderr pipe must not kill the whole gateway — the main
|
||||
command pipe (stdin+stdout) is what actually matters.
|
||||
"""
|
||||
if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
|
||||
return
|
||||
try:
|
||||
print(f"[voice] {msg}", file=sys.stderr, flush=True)
|
||||
except (BrokenPipeError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
def _beeps_enabled() -> bool:
|
||||
"""CLI parity: voice.beep_enabled in config.yaml (default True)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
voice_cfg = load_config().get("voice", {})
|
||||
if isinstance(voice_cfg, dict):
|
||||
return bool(voice_cfg.get("beep_enabled", True))
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def _play_beep(frequency: int, count: int = 1) -> None:
|
||||
"""Audible cue matching cli.py's record/stop beeps.
|
||||
|
||||
880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
|
||||
660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
|
||||
Best-effort — sounddevice failures are silently swallowed so the
|
||||
voice loop never breaks because a speaker was unavailable.
|
||||
"""
|
||||
if not _beeps_enabled():
|
||||
return
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
|
||||
play_beep(frequency=frequency, count=count)
|
||||
except Exception as e:
|
||||
_debug(f"beep {frequency}Hz failed: {e}")
|
||||
|
||||
# ── Push-to-talk state ───────────────────────────────────────────────
|
||||
_recorder = None
|
||||
_recorder_lock = threading.Lock()
|
||||
|
||||
# ── Continuous (VAD) state ───────────────────────────────────────────
|
||||
_continuous_lock = threading.Lock()
|
||||
_continuous_active = False
|
||||
_continuous_recorder: Any = None
|
||||
|
||||
# ── TTS-vs-STT feedback guard ────────────────────────────────────────
|
||||
# When TTS plays the agent reply over the speakers, the live microphone
|
||||
# picks it up and transcribes the agent's own voice as user input — an
|
||||
# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
|
||||
# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
|
||||
# playing, set while silent. _continuous_on_silence waits on it before
|
||||
# re-arming the recorder, and speak_text itself cancels any live capture
|
||||
# before starting playback so the tail of the previous utterance doesn't
|
||||
# leak into the mic.
|
||||
_tts_playing = threading.Event()
|
||||
_tts_playing.set() # initially "not playing"
|
||||
_continuous_on_transcript: Optional[Callable[[str], None]] = None
|
||||
_continuous_on_status: Optional[Callable[[str], None]] = None
|
||||
_continuous_on_silent_limit: Optional[Callable[[], None]] = None
|
||||
_continuous_no_speech_count = 0
|
||||
_CONTINUOUS_NO_SPEECH_LIMIT = 3
|
||||
|
||||
|
||||
# ── Push-to-talk API ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def start_recording() -> None:
|
||||
"""Begin capturing from the default input device (push-to-talk).
|
||||
|
||||
Idempotent — calling again while a recording is in progress is a no-op.
|
||||
"""
|
||||
global _recorder
|
||||
|
||||
with _recorder_lock:
|
||||
if _recorder is not None and getattr(_recorder, "is_recording", False):
|
||||
return
|
||||
rec = create_audio_recorder()
|
||||
rec.start()
|
||||
_recorder = rec
|
||||
|
||||
|
||||
def stop_and_transcribe() -> Optional[str]:
|
||||
"""Stop the active push-to-talk recording, transcribe, return text.
|
||||
|
||||
Returns ``None`` when no recording is active, when the microphone
|
||||
captured no speech, or when Whisper returned a known hallucination.
|
||||
"""
|
||||
global _recorder
|
||||
|
||||
with _recorder_lock:
|
||||
rec = _recorder
|
||||
_recorder = None
|
||||
|
||||
if rec is None:
|
||||
return None
|
||||
|
||||
wav_path = rec.stop()
|
||||
if not wav_path:
|
||||
return None
|
||||
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
except Exception as e:
|
||||
logger.warning("voice transcription failed: %s", e)
|
||||
return None
|
||||
finally:
|
||||
try:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# transcribe_recording returns {"success": bool, "transcript": str, ...}
|
||||
# — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
|
||||
if not result.get("success"):
|
||||
return None
|
||||
text = (result.get("transcript") or "").strip()
|
||||
if not text or is_whisper_hallucination(text):
|
||||
return None
|
||||
|
||||
return text
|
||||
|
||||
|
||||
# ── Continuous (VAD) API ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def start_continuous(
|
||||
on_transcript: Callable[[str], None],
|
||||
on_status: Optional[Callable[[str], None]] = None,
|
||||
on_silent_limit: Optional[Callable[[], None]] = None,
|
||||
silence_threshold: int = 200,
|
||||
silence_duration: float = 3.0,
|
||||
) -> None:
|
||||
"""Start a VAD-driven continuous recording loop.
|
||||
|
||||
The loop calls ``on_transcript(text)`` each time speech is detected and
|
||||
transcribed successfully, then auto-restarts. After
|
||||
``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
|
||||
picked up at all) the loop stops itself and calls ``on_silent_limit``
|
||||
so the UI can reflect "voice off". Idempotent — calling while already
|
||||
active is a no-op.
|
||||
|
||||
``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
|
||||
``"idle"`` so the UI can show a live indicator.
|
||||
"""
|
||||
global _continuous_active, _continuous_recorder
|
||||
global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if _continuous_active:
|
||||
_debug("start_continuous: already active — no-op")
|
||||
return
|
||||
_continuous_active = True
|
||||
_continuous_on_transcript = on_transcript
|
||||
_continuous_on_status = on_status
|
||||
_continuous_on_silent_limit = on_silent_limit
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if _continuous_recorder is None:
|
||||
_continuous_recorder = create_audio_recorder()
|
||||
|
||||
_continuous_recorder._silence_threshold = silence_threshold
|
||||
_continuous_recorder._silence_duration = silence_duration
|
||||
rec = _continuous_recorder
|
||||
|
||||
_debug(
|
||||
f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
|
||||
)
|
||||
|
||||
# CLI parity: single 880 Hz beep *before* opening the stream — placing
|
||||
# the beep after stream.start() on macOS triggers a CoreAudio conflict
|
||||
# (cli.py:7528 comment).
|
||||
_play_beep(frequency=880, count=1)
|
||||
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to start continuous recording: %s", e)
|
||||
_debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
raise
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def stop_continuous() -> None:
|
||||
"""Stop the active continuous loop and release the microphone.
|
||||
|
||||
Idempotent — calling while not active is a no-op. Any in-flight
|
||||
transcription completes but its result is discarded (the callback
|
||||
checks ``_continuous_active`` before firing).
|
||||
"""
|
||||
global _continuous_active, _continuous_on_transcript
|
||||
global _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_recorder, _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
return
|
||||
_continuous_active = False
|
||||
rec = _continuous_recorder
|
||||
on_status = _continuous_on_status
|
||||
_continuous_on_transcript = None
|
||||
_continuous_on_status = None
|
||||
_continuous_on_silent_limit = None
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if rec is not None:
|
||||
try:
|
||||
# cancel() (not stop()) discards buffered frames — the loop
|
||||
# is over, we don't want to transcribe a half-captured turn.
|
||||
rec.cancel()
|
||||
except Exception as e:
|
||||
logger.warning("failed to cancel recorder: %s", e)
|
||||
|
||||
# Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
|
||||
# silence-auto-stop path plays).
|
||||
_play_beep(frequency=660, count=2)
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def is_continuous_active() -> bool:
|
||||
"""Whether a continuous voice loop is currently running."""
|
||||
with _continuous_lock:
|
||||
return _continuous_active
|
||||
|
||||
|
||||
def _continuous_on_silence() -> None:
|
||||
"""AudioRecorder silence callback — runs in a daemon thread.
|
||||
|
||||
Stops the current capture, transcribes, delivers the text via
|
||||
``on_transcript``, and — if the loop is still active — starts the
|
||||
next capture. Three consecutive silent cycles end the loop.
|
||||
"""
|
||||
global _continuous_active, _continuous_no_speech_count
|
||||
|
||||
_debug("_continuous_on_silence: fired")
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
_debug("_continuous_on_silence: loop inactive — abort")
|
||||
return
|
||||
rec = _continuous_recorder
|
||||
on_transcript = _continuous_on_transcript
|
||||
on_status = _continuous_on_status
|
||||
on_silent_limit = _continuous_on_silent_limit
|
||||
|
||||
if rec is None:
|
||||
_debug("_continuous_on_silence: no recorder — abort")
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("transcribing")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
wav_path = rec.stop()
|
||||
# Peak RMS is the critical diagnostic when stop() returns None despite
|
||||
# the VAD firing — tells us at a glance whether the mic was too quiet
|
||||
# for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
|
||||
peak_rms = getattr(rec, "_peak_rms", -1)
|
||||
_debug(
|
||||
f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
|
||||
)
|
||||
|
||||
# CLI parity: double 660 Hz beep after the stream stops (safe from the
|
||||
# CoreAudio conflict that blocks pre-start beeps).
|
||||
_play_beep(frequency=660, count=2)
|
||||
|
||||
transcript: Optional[str] = None
|
||||
|
||||
if wav_path:
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
# transcribe_recording returns {"success": bool, "transcript": str,
|
||||
# "error": str?} — NOT {"text": str}. Using the wrong key silently
|
||||
# produced empty transcripts even when Groq/local STT returned fine,
|
||||
# which masqueraded as "not hearing the user" to the caller.
|
||||
success = bool(result.get("success"))
|
||||
text = (result.get("transcript") or "").strip()
|
||||
err = result.get("error")
|
||||
_debug(
|
||||
f"_continuous_on_silence: transcribe -> success={success} "
|
||||
f"text={text!r} err={err!r}"
|
||||
)
|
||||
if success and text and not is_whisper_hallucination(text):
|
||||
transcript = text
|
||||
except Exception as e:
|
||||
logger.warning("continuous transcription failed: %s", e)
|
||||
_debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
|
||||
finally:
|
||||
try:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
# User stopped us while we were transcribing — discard.
|
||||
_debug("_continuous_on_silence: stopped during transcribe — no restart")
|
||||
return
|
||||
if transcript:
|
||||
_continuous_no_speech_count = 0
|
||||
else:
|
||||
_continuous_no_speech_count += 1
|
||||
should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
|
||||
no_speech = _continuous_no_speech_count
|
||||
|
||||
if transcript and on_transcript:
|
||||
try:
|
||||
on_transcript(transcript)
|
||||
except Exception as e:
|
||||
logger.warning("on_transcript callback raised: %s", e)
|
||||
|
||||
if should_halt:
|
||||
_debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
_continuous_no_speech_count = 0
|
||||
if on_silent_limit:
|
||||
try:
|
||||
on_silent_limit()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
rec.cancel()
|
||||
except Exception:
|
||||
pass
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
# CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
|
||||
# finish before re-arming the mic, then leave a small gap to avoid
|
||||
# catching the tail of the speaker output. Without this the voice
|
||||
# loop becomes a feedback loop — the agent's spoken reply lands
|
||||
# back in the mic and gets re-submitted.
|
||||
if not _tts_playing.is_set():
|
||||
_debug("_continuous_on_silence: waiting for TTS to finish")
|
||||
_tts_playing.wait(timeout=60)
|
||||
import time as _time
|
||||
_time.sleep(0.3)
|
||||
|
||||
# User may have stopped the loop during the wait.
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
_debug("_continuous_on_silence: stopped while waiting for TTS")
|
||||
return
|
||||
|
||||
# Restart for the next turn.
|
||||
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
|
||||
_play_beep(frequency=880, count=1)
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to restart continuous recording: %s", e)
|
||||
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ── TTS API ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def speak_text(text: str) -> None:
|
||||
"""Synthesize ``text`` with the configured TTS provider and play it.
|
||||
|
||||
Mirrors cli.py:_voice_speak_response exactly — same markdown strip
|
||||
pipeline, same 4000-char cap, same explicit mp3 output path, same
|
||||
MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
|
||||
of both extensions. Keeping these in sync means a voice-mode TTS
|
||||
session in the TUI sounds identical to one in the classic CLI.
|
||||
|
||||
While playback is in flight the module-level _tts_playing Event is
|
||||
cleared so the continuous-recording loop knows to wait before
|
||||
re-arming the mic (otherwise the agent's spoken reply feedback-loops
|
||||
through the microphone and the agent ends up replying to itself).
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return
|
||||
|
||||
import re
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
# Cancel any live capture before we open the speakers — otherwise the
|
||||
# last ~200ms of the user's turn tail + the first syllables of our TTS
|
||||
# both end up in the next recording window. The continuous loop will
|
||||
# re-arm itself after _tts_playing flips back (see _continuous_on_silence).
|
||||
paused_recording = False
|
||||
with _continuous_lock:
|
||||
if (
|
||||
_continuous_active
|
||||
and _continuous_recorder is not None
|
||||
and getattr(_continuous_recorder, "is_recording", False)
|
||||
):
|
||||
try:
|
||||
_continuous_recorder.cancel()
|
||||
paused_recording = True
|
||||
except Exception as e:
|
||||
logger.warning("failed to pause recorder for TTS: %s", e)
|
||||
|
||||
_tts_playing.clear()
|
||||
_debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
|
||||
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool
|
||||
|
||||
tts_text = text[:4000] if len(text) > 4000 else text
|
||||
tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text) # fenced code blocks
|
||||
tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text) # [text](url) → text
|
||||
tts_text = re.sub(r'https?://\S+', '', tts_text) # bare URLs
|
||||
tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text) # bold
|
||||
tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text) # italic
|
||||
tts_text = re.sub(r'`(.+?)`', r'\1', tts_text) # inline code
|
||||
tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE) # headers
|
||||
tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE) # list bullets
|
||||
tts_text = re.sub(r'---+', '', tts_text) # horizontal rules
|
||||
tts_text = re.sub(r'\n{3,}', '\n\n', tts_text) # excess newlines
|
||||
tts_text = tts_text.strip()
|
||||
if not tts_text:
|
||||
return
|
||||
|
||||
# MP3 output path, pre-chosen so we can play the MP3 directly even
|
||||
# when text_to_speech_tool auto-converts to OGG for messaging
|
||||
# platforms. afplay's OGG support is flaky, MP3 always works.
|
||||
os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
|
||||
mp3_path = os.path.join(
|
||||
tempfile.gettempdir(),
|
||||
"hermes_voice",
|
||||
f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
|
||||
)
|
||||
|
||||
_debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
|
||||
text_to_speech_tool(text=tts_text, output_path=mp3_path)
|
||||
|
||||
if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
|
||||
_debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
|
||||
play_audio_file(mp3_path)
|
||||
try:
|
||||
os.unlink(mp3_path)
|
||||
ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
|
||||
if os.path.isfile(ogg_path):
|
||||
os.unlink(ogg_path)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
_debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
|
||||
except Exception as e:
|
||||
logger.warning("Voice TTS playback failed: %s", e)
|
||||
_debug(f"speak_text raised {type(e).__name__}: {e}")
|
||||
finally:
|
||||
_tts_playing.set()
|
||||
_debug("speak_text: TTS done")
|
||||
|
||||
# Re-arm the mic so the user can answer without pressing Ctrl+B.
|
||||
# Small delay lets the OS flush speaker output and afplay fully
|
||||
# release the audio device before sounddevice re-opens the input.
|
||||
if paused_recording:
|
||||
time.sleep(0.3)
|
||||
with _continuous_lock:
|
||||
if _continuous_active and _continuous_recorder is not None:
|
||||
try:
|
||||
_continuous_recorder.start(
|
||||
on_silence_stop=_continuous_on_silence
|
||||
)
|
||||
_debug("speak_text: recording resumed after TTS")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"failed to resume recorder after TTS: %s", e
|
||||
)
|
||||
|
|
@ -2370,6 +2370,29 @@ _THEME_OVERRIDE_KEYS = {
|
|||
"border", "input", "ring",
|
||||
}
|
||||
|
||||
# Well-known named asset slots themes can populate. Any other keys under
|
||||
# ``assets.custom`` are exposed as ``--theme-asset-custom-<key>`` CSS vars
|
||||
# for plugin/shell use.
|
||||
_THEME_NAMED_ASSET_KEYS = {"bg", "hero", "logo", "crest", "sidebar", "header"}
|
||||
|
||||
# Component-style buckets themes can override. The value under each bucket
|
||||
# is a mapping from camelCase property name to CSS string; each pair emits
|
||||
# ``--component-<bucket>-<kebab-property>`` on :root. The frontend's shell
|
||||
# components (Card, App header, Backdrop, etc.) consume these vars so themes
|
||||
# can restyle chrome (clip-path, border-image, segmented progress, etc.)
|
||||
# without shipping their own CSS.
|
||||
_THEME_COMPONENT_BUCKETS = {
|
||||
"card", "header", "footer", "sidebar", "tab",
|
||||
"progress", "badge", "backdrop", "page",
|
||||
}
|
||||
|
||||
_THEME_LAYOUT_VARIANTS = {"standard", "cockpit", "tiled"}
|
||||
|
||||
# Cap on customCSS length so a malformed/oversized theme YAML can't blow up
|
||||
# the response payload or the <style> tag. 32 KiB is plenty for every
|
||||
# practical reskin (the Strike Freedom demo is ~2 KiB).
|
||||
_THEME_CUSTOM_CSS_MAX = 32 * 1024
|
||||
|
||||
|
||||
def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Normalise a user theme YAML into the wire format `ThemeProvider`
|
||||
|
|
@ -2433,6 +2456,69 @@ def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]
|
|||
if key in _THEME_OVERRIDE_KEYS and isinstance(val, str) and val.strip():
|
||||
color_overrides[key] = val
|
||||
|
||||
# Assets — named slots + arbitrary user-defined keys. Values must be
|
||||
# strings (URLs or CSS ``url(...)``/``linear-gradient(...)`` expressions).
|
||||
# We don't fetch remote assets here; the frontend just injects them as
|
||||
# CSS vars. Empty values are dropped so a theme can explicitly clear a
|
||||
# slot by setting ``hero: ""``.
|
||||
assets_out: Dict[str, Any] = {}
|
||||
assets_src = data.get("assets", {}) if isinstance(data.get("assets"), dict) else {}
|
||||
for key in _THEME_NAMED_ASSET_KEYS:
|
||||
val = assets_src.get(key)
|
||||
if isinstance(val, str) and val.strip():
|
||||
assets_out[key] = val
|
||||
custom_assets_src = assets_src.get("custom")
|
||||
if isinstance(custom_assets_src, dict):
|
||||
custom_assets: Dict[str, str] = {}
|
||||
for key, val in custom_assets_src.items():
|
||||
if (
|
||||
isinstance(key, str)
|
||||
and key.replace("-", "").replace("_", "").isalnum()
|
||||
and isinstance(val, str)
|
||||
and val.strip()
|
||||
):
|
||||
custom_assets[key] = val
|
||||
if custom_assets:
|
||||
assets_out["custom"] = custom_assets
|
||||
|
||||
# Custom CSS — raw CSS text the frontend injects as a scoped <style>
|
||||
# tag on theme apply. Clipped to _THEME_CUSTOM_CSS_MAX to keep the
|
||||
# payload bounded. We intentionally do NOT parse/sanitise the CSS
|
||||
# here — the dashboard is localhost-only and themes are user-authored
|
||||
# YAML in ~/.hermes/, same trust level as the config file itself.
|
||||
custom_css_val = data.get("customCSS")
|
||||
custom_css: Optional[str] = None
|
||||
if isinstance(custom_css_val, str) and custom_css_val.strip():
|
||||
custom_css = custom_css_val[:_THEME_CUSTOM_CSS_MAX]
|
||||
|
||||
# Component style overrides — per-bucket dicts of camelCase CSS
|
||||
# property -> CSS string. The frontend converts these into CSS vars
|
||||
# that shell components (Card, App header, Backdrop) consume.
|
||||
component_styles_src = data.get("componentStyles", {})
|
||||
component_styles: Dict[str, Dict[str, str]] = {}
|
||||
if isinstance(component_styles_src, dict):
|
||||
for bucket, props in component_styles_src.items():
|
||||
if bucket not in _THEME_COMPONENT_BUCKETS or not isinstance(props, dict):
|
||||
continue
|
||||
clean: Dict[str, str] = {}
|
||||
for prop, value in props.items():
|
||||
if (
|
||||
isinstance(prop, str)
|
||||
and prop.replace("-", "").replace("_", "").isalnum()
|
||||
and isinstance(value, (str, int, float))
|
||||
and str(value).strip()
|
||||
):
|
||||
clean[prop] = str(value)
|
||||
if clean:
|
||||
component_styles[bucket] = clean
|
||||
|
||||
layout_variant_src = data.get("layoutVariant")
|
||||
layout_variant = (
|
||||
layout_variant_src
|
||||
if isinstance(layout_variant_src, str) and layout_variant_src in _THEME_LAYOUT_VARIANTS
|
||||
else "standard"
|
||||
)
|
||||
|
||||
result: Dict[str, Any] = {
|
||||
"name": name,
|
||||
"label": data.get("label") or name,
|
||||
|
|
@ -2440,9 +2526,16 @@ def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]
|
|||
"palette": palette,
|
||||
"typography": typography,
|
||||
"layout": layout,
|
||||
"layoutVariant": layout_variant,
|
||||
}
|
||||
if color_overrides:
|
||||
result["colorOverrides"] = color_overrides
|
||||
if assets_out:
|
||||
result["assets"] = assets_out
|
||||
if custom_css is not None:
|
||||
result["customCSS"] = custom_css
|
||||
if component_styles:
|
||||
result["componentStyles"] = component_styles
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -2552,13 +2645,35 @@ def _discover_dashboard_plugins() -> list:
|
|||
if name in seen_names:
|
||||
continue
|
||||
seen_names.add(name)
|
||||
# Tab options: ``path`` + ``position`` for a new tab, optional
|
||||
# ``override`` to replace a built-in route, and ``hidden`` to
|
||||
# register the plugin component/slots without adding a tab
|
||||
# (useful for slot-only plugins like a header-crest injector).
|
||||
raw_tab = data.get("tab", {}) if isinstance(data.get("tab"), dict) else {}
|
||||
tab_info = {
|
||||
"path": raw_tab.get("path", f"/{name}"),
|
||||
"position": raw_tab.get("position", "end"),
|
||||
}
|
||||
override_path = raw_tab.get("override")
|
||||
if isinstance(override_path, str) and override_path.startswith("/"):
|
||||
tab_info["override"] = override_path
|
||||
if bool(raw_tab.get("hidden")):
|
||||
tab_info["hidden"] = True
|
||||
# Slots: list of named slot locations this plugin populates.
|
||||
# The frontend exposes ``registerSlot(pluginName, slotName, Component)``
|
||||
# on window; plugins with non-empty slots call it from their JS bundle.
|
||||
slots_src = data.get("slots")
|
||||
slots: List[str] = []
|
||||
if isinstance(slots_src, list):
|
||||
slots = [s for s in slots_src if isinstance(s, str) and s]
|
||||
plugins.append({
|
||||
"name": name,
|
||||
"label": data.get("label", name),
|
||||
"description": data.get("description", ""),
|
||||
"icon": data.get("icon", "Puzzle"),
|
||||
"version": data.get("version", "0.0.0"),
|
||||
"tab": data.get("tab", {"path": f"/{name}", "position": "end"}),
|
||||
"tab": tab_info,
|
||||
"slots": slots,
|
||||
"entry": data.get("entry", "dist/index.js"),
|
||||
"css": data.get("css"),
|
||||
"has_api": bool(data.get("api")),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue