test(tts): cover xAI auto speech-tags auxiliary rewrite path

The previous xAI auto-speech-tag tests asserted on the local pause-only fallback and only passed because call_llm silently returns None in the test environment. They gave zero coverage of the new auxiliary-rewrite path added in the previous commit. Add tests that: - mock agent.auxiliary_client.call_llm and pin down the new contract (auxiliary rewriter output wins over the local fallback) - verify the system prompt lists every documented inline + wrapping tag and uses BBCode-style [/tag] closing syntax - cover markdown-fence stripping (with and without language hint) - exercise the local fallback on rewriter exception, empty response, None response, and missing-choices response - confirm call_llm is NOT invoked when the input already has explicit speech tags, or is empty / whitespace-only - replace the end-to-end test that asserted on the silent-fallback output with one that mocks the rewriter and asserts the rewriter's tagged text is what reaches the xAI TTS API
2026-06-23 10:42:00 +00:00 · 2026-06-17 23:40:11 -06:00 · 2026-06-17 23:40:11 -06:00 · 8ae6bd0823
commit 8ae6bd0823
parent 5a506da3d8
1 changed files with 202 additions and 4 deletions
--- a/tests/tools/test_tts_xai_speech_tags.py
+++ b/tests/tools/test_tts_xai_speech_tags.py
@ -1,8 +1,16 @@
 """Tests for xAI TTS speech-tag handling."""

-from unittest.mock import Mock
+from types import SimpleNamespace
+from unittest.mock import Mock, patch

-from tools.tts_tool import _apply_xai_auto_speech_tags, _generate_xai_tts
+import pytest
+
+from tools.tts_tool import (
+    _XAI_INLINE_SPEECH_TAGS,
+    _XAI_WRAPPING_SPEECH_TAGS,
+    _apply_xai_auto_speech_tags,
+    _generate_xai_tts,
+)


 def test_apply_xai_auto_speech_tags_adds_light_pause_after_first_sentence():
@ -72,8 +80,20 @@ def test_apply_xai_auto_speech_tags_single_newline_still_gets_first_sentence_pau
    )


-def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypatch):
+def test_generate_xai_tts_sends_auxiliary_rewriter_output_to_api(
+    tmp_path, monkeypatch
+):
+    """auto_speech_tags=True should send the auxiliary rewriter's tagged
+    output (not the conservative local pause fallback) to the xAI TTS API.
+
+    The previous version of this test asserted on the local pause-tagged
+    text — which only happened to match because ``call_llm`` returns
+    ``None`` in the test environment and the function silently fell
+    back. With the new auxiliary-rewrite path the user-visible contract
+    is "what the LLM said wins", so this test pins that down.
+    """
    captured = {}
+    rewriter_output = "Bonjour Monsieur Talbot. [warmly] Ceci est un test. [soft laugh]"

    class FakeResponse:
        content = b"mp3"
@ -88,8 +108,15 @@ def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypa
        captured["timeout"] = timeout
        return FakeResponse()

+    fake_response = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content=rewriter_output))]
+    )
+
    monkeypatch.setenv("XAI_API_KEY", "test-xai-key")
    monkeypatch.setattr("requests.post", fake_post)
+    monkeypatch.setattr(
+        "agent.auxiliary_client.call_llm", lambda *a, **kw: fake_response
+    )

    out = tmp_path / "out.mp3"
    _generate_xai_tts(
@ -102,7 +129,178 @@ def test_generate_xai_tts_sends_auto_speech_tags_when_enabled(tmp_path, monkeypa
    assert captured["url"] == "https://api.x.ai/v1/tts"
    assert captured["json"]["voice_id"] == "ara"
    assert captured["json"]["language"] == "fr"
-    assert captured["json"]["text"] == "Bonjour Monsieur Talbot. [pause] Ceci est un test."
+    assert captured["json"]["text"] == rewriter_output
+
+
+def test_auto_speech_tags_calls_auxiliary_rewriter_with_tts_audio_tags_task():
+    """When input has no explicit speech tags, the function must call the
+    auxiliary rewriter with task='tts_audio_tags' and a system prompt
+    that documents the xAI inline + wrapping tag vocabulary.
+    """
+    response = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content="[warmly] Hi."))]
+    )
+
+    with patch("agent.auxiliary_client.call_llm", return_value=response) as mock_call:
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    assert result == "[warmly] Hi."
+    mock_call.assert_called_once()
+    call_kwargs = mock_call.call_args.kwargs
+    assert call_kwargs["task"] == "tts_audio_tags"
+    assert call_kwargs["temperature"] == 0.7
+
+    messages = call_kwargs["messages"]
+    assert messages[0]["role"] == "system"
+    assert messages[1]["role"] == "user"
+
+    system_prompt = messages[0]["content"]
+    # All documented inline + wrapping tag names must appear in the prompt
+    # so the auxiliary model knows what's valid. The prompt lists them
+    # comma-separated in two example lines ("Valid inline tags (use as
+    # `[tag]`): pause, long-pause, ..." and a similar line for wrapping).
+    for tag in _XAI_INLINE_SPEECH_TAGS:
+        assert tag in system_prompt, (
+            f"inline tag {tag!r} missing from system prompt"
+        )
+    for tag in _XAI_WRAPPING_SPEECH_TAGS:
+        assert tag in system_prompt, (
+            f"wrapping tag {tag!r} missing from system prompt"
+        )
+    # The prompt must explicitly show the BBCode-style closing syntax so
+    # the rewriter uses [/tag] and not <tag>...</tag>.
+    assert "[/tag]" in system_prompt
+
+    # The user message carries the locally pause-tagged transcript (the
+    # conservative fallback the rewriter is asked to enrich).
+    assert "TRANSCRIPT TO TAG" in messages[1]["content"]
+    assert "[pause]" in messages[1]["content"]
+
+
+def test_auto_speech_tags_strips_markdown_fences_from_rewriter_output():
+    """If the auxiliary model wraps its reply in ```...``` fences the
+    function must strip them before returning.
+    """
+    fenced = "```\n[warmly] Bonjour. [soft laugh]\n```"
+    response = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content=fenced))]
+    )
+
+    with patch("agent.auxiliary_client.call_llm", return_value=response):
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    assert result == "[warmly] Bonjour. [soft laugh]"
+
+
+def test_auto_speech_tags_strips_markdown_fence_with_language_hint():
+    """The fence regex accepts an optional language tag like ```text ...```."""
+    fenced = "```text\n[warmly] Bonjour.\n```"
+    response = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content=fenced))]
+    )
+
+    with patch("agent.auxiliary_client.call_llm", return_value=response):
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    assert result == "[warmly] Bonjour."
+
+
+def test_auto_speech_tags_falls_back_to_local_on_auxiliary_exception(caplog):
+    """If the auxiliary rewriter raises (timeout, network, provider error,
+    anything) the function must silently fall back to the local
+    pause-tagged text so the user still gets audio.
+    """
+    import logging
+
+    with caplog.at_level(logging.DEBUG, logger="tools.tts_tool"), patch(
+        "agent.auxiliary_client.call_llm",
+        side_effect=RuntimeError("upstream provider timed out"),
+    ):
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    # Local fallback: first sentence gets a [pause] inserted, single
+    # paragraph, no other rewriter activity.
+    assert result == (
+        "Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
+    )
+    assert "xAI TTS audio tag rewrite failed" in caplog.text
+
+
+def test_auto_speech_tags_falls_back_to_local_when_rewriter_returns_empty():
+    """An empty / None rewriter response must also fall back to local."""
+    empty_response = SimpleNamespace(
+        choices=[SimpleNamespace(message=SimpleNamespace(content=""))]
+    )
+
+    with patch(
+        "agent.auxiliary_client.call_llm", return_value=empty_response
+    ):
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    assert result == (
+        "Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
+    )
+
+
+def test_auto_speech_tags_skips_auxiliary_when_input_has_explicit_tags():
+    """If the user/model already supplied explicit speech tags we trust
+    them and never call the rewriter — that would risk the rewriter
+    overwriting intentional markup.
+    """
+    tagged = "Bonjour. [pause] <whisper>Déjà balisé.</whisper>"
+
+    with patch("agent.auxiliary_client.call_llm") as mock_call:
+        result = _apply_xai_auto_speech_tags(tagged)
+
+    mock_call.assert_not_called()
+    # The local pass is a no-op for already-tagged text (no double
+    # paragraph normalization, no first-sentence pause injection).
+    assert result == tagged
+
+
+def test_auto_speech_tags_skips_auxiliary_for_empty_input():
+    with patch("agent.auxiliary_client.call_llm") as mock_call:
+        assert _apply_xai_auto_speech_tags("") == ""
+        assert _apply_xai_auto_speech_tags("   \n  ") == "   \n  "
+
+    mock_call.assert_not_called()
+
+
+def test_auto_speech_tags_skips_auxiliary_for_whitespace_only_input():
+    """Whitespace-only input short-circuits before the rewriter runs."""
+    with patch("agent.auxiliary_client.call_llm") as mock_call:
+        assert _apply_xai_auto_speech_tags("   ") == "   "
+
+    mock_call.assert_not_called()
+
+
+@pytest.mark.parametrize("bad_response", [None, SimpleNamespace(choices=[])])
+def test_auto_speech_tags_falls_back_to_local_on_malformed_rewriter_response(
+    bad_response,
+):
+    """Both ``None`` and a response with no choices must fall back to the
+    conservative local pass rather than crash.
+    """
+    with patch(
+        "agent.auxiliary_client.call_llm", return_value=bad_response
+    ):
+        result = _apply_xai_auto_speech_tags(
+            "Bonjour Monsieur Talbot. Ceci est un test de réponse vocale."
+        )
+
+    assert result == (
+        "Bonjour Monsieur Talbot. [pause] Ceci est un test de réponse vocale."
+    )


 def test_generate_xai_tts_leaves_text_plain_by_default(tmp_path, monkeypatch):