test(tts): cover xAI auto speech-tags auxiliary rewrite path

The previous xAI auto-speech-tag tests asserted on the local
pause-only fallback and only passed because call_llm silently
returns None in the test environment. They gave zero coverage of
the new auxiliary-rewrite path added in the previous commit.

Add tests that:
- mock agent.auxiliary_client.call_llm and pin down the new contract
  (auxiliary rewriter output wins over the local fallback)
- verify the system prompt lists every documented inline + wrapping
  tag and uses BBCode-style [/tag] closing syntax
- cover markdown-fence stripping (with and without language hint)
- exercise the local fallback on rewriter exception, empty response,
  None response, and missing-choices response
- confirm call_llm is NOT invoked when the input already has
  explicit speech tags, or is empty / whitespace-only
- replace the end-to-end test that asserted on the silent-fallback
  output with one that mocks the rewriter and asserts the
  rewriter's tagged text is what reaches the xAI TTS API
This commit is contained in:
Carlos Diosdado 2026-06-17 23:40:11 -06:00 committed by Teknium
parent 5a506da3d8
commit 8ae6bd0823

View file

@ -1,8 +1,16 @@
"""Tests for xAI TTS speech-tag handling."""
from unittest.mock import Mock
from types import SimpleNamespace
from unittest.mock import Mock, patch
from tools.tts_tool import _apply_xai_auto_speech_tags, _generate_xai_tts
import pytest
from tools.tts_tool import (
_XAI_INLINE_SPEECH_TAGS,
_XAI_WRAPPING_SPEECH_TAGS,
_apply_xai_auto_speech_tags,
_generate_xai_tts,
)
def test_apply_xai_auto_speech_tags_adds_light_pause_after_first_sentence():
@ -72,8 +80,20 @@ def test_apply_xai_auto_speech_tags_single_newline_still_gets_first_sentence_pau
)
def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypatch):
def test_generate_xai_tts_sends_auxiliary_rewriter_output_to_api(
tmp_path, monkeypatch
):
"""auto_speech_tags=True should send the auxiliary rewriter's tagged
output (not the conservative local pause fallback) to the xAI TTS API.
The previous version of this test asserted on the local pause-tagged
text which only happened to match because ``call_llm`` returns
``None`` in the test environment and the function silently fell
back. With the new auxiliary-rewrite path the user-visible contract
is "what the LLM said wins", so this test pins that down.
"""
captured = {}
rewriter_output = "Bonjour Monsieur Talbot. [warmly] Ceci est un test. [soft laugh]"
class FakeResponse:
content = b"mp3"
@ -88,8 +108,15 @@ def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypa
captured["timeout"] = timeout
return FakeResponse()
fake_response = SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content=rewriter_output))]
)
monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
monkeypatch.setattr("requests.post", fake_post)
monkeypatch.setattr(
"agent.auxiliary_client.call_llm", lambda *a, **kw: fake_response
)
out = tmp_path / "out.mp3"
_generate_xai_tts(
@ -102,7 +129,178 @@ def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypa
assert captured["url"] == "https://api.x.ai/v1/tts"
assert captured["json"]["voice_id"] == "ara"
assert captured["json"]["language"] == "fr"
assert captured["json"]["text"] == "Bonjour Monsieur Talbot. [pause] Ceci est un test."
assert captured["json"]["text"] == rewriter_output
def test_auto_speech_tags_calls_auxiliary_rewriter_with_tts_audio_tags_task():
"""When input has no explicit speech tags, the function must call the
auxiliary rewriter with task='tts_audio_tags' and a system prompt
that documents the xAI inline + wrapping tag vocabulary.
"""
response = SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content="[warmly] Hi."))]
)
with patch("agent.auxiliary_client.call_llm", return_value=response) as mock_call:
result = _apply_xai_auto_speech_tags(
"Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
)
assert result == "[warmly] Hi."
mock_call.assert_called_once()
call_kwargs = mock_call.call_args.kwargs
assert call_kwargs["task"] == "tts_audio_tags"
assert call_kwargs["temperature"] == 0.7
messages = call_kwargs["messages"]
assert messages[0]["role"] == "system"
assert messages[1]["role"] == "user"
system_prompt = messages[0]["content"]
# All documented inline + wrapping tag names must appear in the prompt
# so the auxiliary model knows what's valid. The prompt lists them
# comma-separated in two example lines ("Valid inline tags (use as
# `[tag]`): pause, long-pause, ..." and a similar line for wrapping).
for tag in _XAI_INLINE_SPEECH_TAGS:
assert tag in system_prompt, (
f"inline tag {tag!r} missing from system prompt"
)
for tag in _XAI_WRAPPING_SPEECH_TAGS:
assert tag in system_prompt, (
f"wrapping tag {tag!r} missing from system prompt"
)
# The prompt must explicitly show the BBCode-style closing syntax so
# the rewriter uses [/tag] and not <tag>...</tag>.
assert "[/tag]" in system_prompt
# The user message carries the locally pause-tagged transcript (the
# conservative fallback the rewriter is asked to enrich).
assert "TRANSCRIPT TO TAG" in messages[1]["content"]
assert "[pause]" in messages[1]["content"]
def test_auto_speech_tags_strips_markdown_fences_from_rewriter_output():
"""If the auxiliary model wraps its reply in ```...``` fences the
function must strip them before returning.
"""
fenced = "```\n[warmly] Bonjour. [soft laugh]\n```"
response = SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content=fenced))]
)
with patch("agent.auxiliary_client.call_llm", return_value=response):
result = _apply_xai_auto_speech_tags(
"Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
)
assert result == "[warmly] Bonjour. [soft laugh]"
def test_auto_speech_tags_strips_markdown_fence_with_language_hint():
"""The fence regex accepts an optional language tag like ```text ...```."""
fenced = "```text\n[warmly] Bonjour.\n```"
response = SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content=fenced))]
)
with patch("agent.auxiliary_client.call_llm", return_value=response):
result = _apply_xai_auto_speech_tags(
"Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
)
assert result == "[warmly] Bonjour."
def test_auto_speech_tags_falls_back_to_local_on_auxiliary_exception(caplog):
"""If the auxiliary rewriter raises (timeout, network, provider error,
anything) the function must silently fall back to the local
pause-tagged text so the user still gets audio.
"""
import logging
with caplog.at_level(logging.DEBUG, logger="tools.tts_tool"), patch(
"agent.auxiliary_client.call_llm",
side_effect=RuntimeError("upstream provider timed out"),
):
result = _apply_xai_auto_speech_tags(
"Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
)
# Local fallback: first sentence gets a [pause] inserted, single
# paragraph, no other rewriter activity.
assert result == (
"Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
)
assert "xAI TTS audio tag rewrite failed" in caplog.text
def test_auto_speech_tags_falls_back_to_local_when_rewriter_returns_empty():
"""An empty / None rewriter response must also fall back to local."""
empty_response = SimpleNamespace(
choices=[SimpleNamespace(message=SimpleNamespace(content=""))]
)
with patch(
"agent.auxiliary_client.call_llm", return_value=empty_response
):
result = _apply_xai_auto_speech_tags(
"Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
)
assert result == (
"Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
)
def test_auto_speech_tags_skips_auxiliary_when_input_has_explicit_tags():
"""If the user/model already supplied explicit speech tags we trust
them and never call the rewriter that would risk the rewriter
overwriting intentional markup.
"""
tagged = "Bonjour. [pause] <whisper>Déjà balisé.</whisper>"
with patch("agent.auxiliary_client.call_llm") as mock_call:
result = _apply_xai_auto_speech_tags(tagged)
mock_call.assert_not_called()
# The local pass is a no-op for already-tagged text (no double
# paragraph normalization, no first-sentence pause injection).
assert result == tagged
def test_auto_speech_tags_skips_auxiliary_for_empty_input():
with patch("agent.auxiliary_client.call_llm") as mock_call:
assert _apply_xai_auto_speech_tags("") == ""
assert _apply_xai_auto_speech_tags(" \n ") == " \n "
mock_call.assert_not_called()
def test_auto_speech_tags_skips_auxiliary_for_whitespace_only_input():
"""Whitespace-only input short-circuits before the rewriter runs."""
with patch("agent.auxiliary_client.call_llm") as mock_call:
assert _apply_xai_auto_speech_tags(" ") == " "
mock_call.assert_not_called()
@pytest.mark.parametrize("bad_response", [None, SimpleNamespace(choices=[])])
def test_auto_speech_tags_falls_back_to_local_on_malformed_rewriter_response(
bad_response,
):
"""Both ``None`` and a response with no choices must fall back to the
conservative local pass rather than crash.
"""
with patch(
"agent.auxiliary_client.call_llm", return_value=bad_response
):
result = _apply_xai_auto_speech_tags(
"Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
)
assert result == (
"Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
)
def test_generate_xai_tts_leaves_text_plain_by_default(tmp_path, monkeypatch):