diff --git a/tests/tools/test_tts_xai_speech_tags.py b/tests/tools/test_tts_xai_speech_tags.py
index 37bde1c710a..d54fe7a5c92 100644
--- a/tests/tools/test_tts_xai_speech_tags.py
+++ b/tests/tools/test_tts_xai_speech_tags.py
@@ -1,8 +1,16 @@
"""Tests for xAI TTS speech-tag handling."""
-from unittest.mock import Mock
+from types import SimpleNamespace
+from unittest.mock import Mock, patch
-from tools.tts_tool import _apply_xai_auto_speech_tags, _generate_xai_tts
+import pytest
+
+from tools.tts_tool import (
+ _XAI_INLINE_SPEECH_TAGS,
+ _XAI_WRAPPING_SPEECH_TAGS,
+ _apply_xai_auto_speech_tags,
+ _generate_xai_tts,
+)
def test_apply_xai_auto_speech_tags_adds_light_pause_after_first_sentence():
@@ -72,8 +80,20 @@ def test_apply_xai_auto_speech_tags_single_newline_still_gets_first_sentence_pau
)
-def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypatch):
+def test_generate_xai_tts_sends_auxiliary_rewriter_output_to_api(
+ tmp_path, monkeypatch
+):
+ """auto_speech_tags=True should send the auxiliary rewriter's tagged
+ output (not the conservative local pause fallback) to the xAI TTS API.
+
+ The previous version of this test asserted on the local pause-tagged
+ text — which only happened to match because ``call_llm`` returns
+ ``None`` in the test environment and the function silently fell
+ back. With the new auxiliary-rewrite path the user-visible contract
+ is "what the LLM said wins", so this test pins that down.
+ """
captured = {}
+ rewriter_output = "Bonjour Monsieur Talbot. [warmly] Ceci est un test. [soft laugh]"
class FakeResponse:
content = b"mp3"
@@ -88,8 +108,15 @@ def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypa
captured["timeout"] = timeout
return FakeResponse()
+ fake_response = SimpleNamespace(
+ choices=[SimpleNamespace(message=SimpleNamespace(content=rewriter_output))]
+ )
+
monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
monkeypatch.setattr("requests.post", fake_post)
+ monkeypatch.setattr(
+ "agent.auxiliary_client.call_llm", lambda *a, **kw: fake_response
+ )
out = tmp_path / "out.mp3"
_generate_xai_tts(
@@ -102,7 +129,178 @@ def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypa
assert captured["url"] == "https://api.x.ai/v1/tts"
assert captured["json"]["voice_id"] == "ara"
assert captured["json"]["language"] == "fr"
- assert captured["json"]["text"] == "Bonjour Monsieur Talbot. [pause] Ceci est un test."
+ assert captured["json"]["text"] == rewriter_output
+
+
+def test_auto_speech_tags_calls_auxiliary_rewriter_with_tts_audio_tags_task():
+ """When input has no explicit speech tags, the function must call the
+ auxiliary rewriter with task='tts_audio_tags' and a system prompt
+ that documents the xAI inline + wrapping tag vocabulary.
+ """
+ response = SimpleNamespace(
+ choices=[SimpleNamespace(message=SimpleNamespace(content="[warmly] Hi."))]
+ )
+
+ with patch("agent.auxiliary_client.call_llm", return_value=response) as mock_call:
+ result = _apply_xai_auto_speech_tags(
+ "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+ )
+
+ assert result == "[warmly] Hi."
+ mock_call.assert_called_once()
+ call_kwargs = mock_call.call_args.kwargs
+ assert call_kwargs["task"] == "tts_audio_tags"
+ assert call_kwargs["temperature"] == 0.7
+
+ messages = call_kwargs["messages"]
+ assert messages[0]["role"] == "system"
+ assert messages[1]["role"] == "user"
+
+ system_prompt = messages[0]["content"]
+ # All documented inline + wrapping tag names must appear in the prompt
+ # so the auxiliary model knows what's valid. The prompt lists them
+ # comma-separated in two example lines ("Valid inline tags (use as
+ # `[tag]`): pause, long-pause, ..." and a similar line for wrapping).
+ for tag in _XAI_INLINE_SPEECH_TAGS:
+ assert tag in system_prompt, (
+ f"inline tag {tag!r} missing from system prompt"
+ )
+ for tag in _XAI_WRAPPING_SPEECH_TAGS:
+ assert tag in system_prompt, (
+ f"wrapping tag {tag!r} missing from system prompt"
+ )
+ # The prompt must explicitly show the BBCode-style closing syntax so
+ # the rewriter uses [/tag] and not ....
+ assert "[/tag]" in system_prompt
+
+ # The user message carries the locally pause-tagged transcript (the
+ # conservative fallback the rewriter is asked to enrich).
+ assert "TRANSCRIPT TO TAG" in messages[1]["content"]
+ assert "[pause]" in messages[1]["content"]
+
+
+def test_auto_speech_tags_strips_markdown_fences_from_rewriter_output():
+ """If the auxiliary model wraps its reply in ```...``` fences the
+ function must strip them before returning.
+ """
+ fenced = "```\n[warmly] Bonjour. [soft laugh]\n```"
+ response = SimpleNamespace(
+ choices=[SimpleNamespace(message=SimpleNamespace(content=fenced))]
+ )
+
+ with patch("agent.auxiliary_client.call_llm", return_value=response):
+ result = _apply_xai_auto_speech_tags(
+ "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+ )
+
+ assert result == "[warmly] Bonjour. [soft laugh]"
+
+
+def test_auto_speech_tags_strips_markdown_fence_with_language_hint():
+ """The fence regex accepts an optional language tag like ```text ...```."""
+ fenced = "```text\n[warmly] Bonjour.\n```"
+ response = SimpleNamespace(
+ choices=[SimpleNamespace(message=SimpleNamespace(content=fenced))]
+ )
+
+ with patch("agent.auxiliary_client.call_llm", return_value=response):
+ result = _apply_xai_auto_speech_tags(
+ "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+ )
+
+ assert result == "[warmly] Bonjour."
+
+
+def test_auto_speech_tags_falls_back_to_local_on_auxiliary_exception(caplog):
+ """If the auxiliary rewriter raises (timeout, network, provider error,
+ anything) the function must silently fall back to the local
+ pause-tagged text so the user still gets audio.
+ """
+ import logging
+
+ with caplog.at_level(logging.DEBUG, logger="tools.tts_tool"), patch(
+ "agent.auxiliary_client.call_llm",
+ side_effect=RuntimeError("upstream provider timed out"),
+ ):
+ result = _apply_xai_auto_speech_tags(
+ "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+ )
+
+ # Local fallback: first sentence gets a [pause] inserted, single
+ # paragraph, no other rewriter activity.
+ assert result == (
+ "Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
+ )
+ assert "xAI TTS audio tag rewrite failed" in caplog.text
+
+
+def test_auto_speech_tags_falls_back_to_local_when_rewriter_returns_empty():
+ """An empty / None rewriter response must also fall back to local."""
+ empty_response = SimpleNamespace(
+ choices=[SimpleNamespace(message=SimpleNamespace(content=""))]
+ )
+
+ with patch(
+ "agent.auxiliary_client.call_llm", return_value=empty_response
+ ):
+ result = _apply_xai_auto_speech_tags(
+ "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+ )
+
+ assert result == (
+ "Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
+ )
+
+
+def test_auto_speech_tags_skips_auxiliary_when_input_has_explicit_tags():
+ """If the user/model already supplied explicit speech tags we trust
+ them and never call the rewriter — that would risk the rewriter
+ overwriting intentional markup.
+ """
+ tagged = "Bonjour. [pause] Déjà balisé."
+
+ with patch("agent.auxiliary_client.call_llm") as mock_call:
+ result = _apply_xai_auto_speech_tags(tagged)
+
+ mock_call.assert_not_called()
+ # The local pass is a no-op for already-tagged text (no double
+ # paragraph normalization, no first-sentence pause injection).
+ assert result == tagged
+
+
+def test_auto_speech_tags_skips_auxiliary_for_empty_input():
+ with patch("agent.auxiliary_client.call_llm") as mock_call:
+ assert _apply_xai_auto_speech_tags("") == ""
+ assert _apply_xai_auto_speech_tags(" \n ") == " \n "
+
+ mock_call.assert_not_called()
+
+
+def test_auto_speech_tags_skips_auxiliary_for_whitespace_only_input():
+ """Whitespace-only input short-circuits before the rewriter runs."""
+ with patch("agent.auxiliary_client.call_llm") as mock_call:
+ assert _apply_xai_auto_speech_tags(" ") == " "
+
+ mock_call.assert_not_called()
+
+
+@pytest.mark.parametrize("bad_response", [None, SimpleNamespace(choices=[])])
+def test_auto_speech_tags_falls_back_to_local_on_malformed_rewriter_response(
+ bad_response,
+):
+ """Both ``None`` and a response with no choices must fall back to the
+ conservative local pass rather than crash.
+ """
+ with patch(
+ "agent.auxiliary_client.call_llm", return_value=bad_response
+ ):
+ result = _apply_xai_auto_speech_tags(
+ "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+ )
+
+ assert result == (
+ "Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
+ )
def test_generate_xai_tts_leaves_text_plain_by_default(tmp_path, monkeypatch):