diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 41a424a7d18..8ce9ad8e19a 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -415,7 +415,8 @@ prompt_caching:
 # Auxiliary Models (Advanced — Experimental)
 # =============================================================================
 # Hermes uses lightweight "auxiliary" models for side tasks: image analysis,
-# browser screenshot analysis, web page summarization, and context compression.
+# browser screenshot analysis, web page summarization, TTS audio-tag insertion,
+# and context compression.
 #
 # By default these use Gemini Flash via OpenRouter or Nous Portal and are
 # auto-detected from your credentials.  You do NOT need to change anything
@@ -460,6 +461,12 @@ prompt_caching:
 #     provider: "auto"
 #     model: ""
 #
+#   # Gemini 3.1 TTS hidden audio-tag insertion
+#   tts_audio_tags:
+#     provider: "auto"       # empty model = your main chat model
+#     model: ""
+#     timeout: 30
+#
 #   # Session search — summarizes matching past sessions
 #   session_search:
 #     provider: "auto"
@@ -835,6 +842,22 @@ platform_toolsets:
 #       max_tool_rounds: 5      # tool loop limit (0 = disable)
 #       log_level: "info"       # audit verbosity
 
+# =============================================================================
+# Text-to-Speech
+# =============================================================================
+# TTS defaults to Edge TTS unless changed in ~/.hermes/config.yaml.
+# Gemini TTS supports persona/director prompt files, and Gemini 3.1 Flash TTS
+# can use a hidden auxiliary rewrite pass to insert expressive square-bracket
+# audio tags into the TTS script without showing tags in chat.
+#
+# tts:
+#   provider: "gemini"
+#   gemini:
+#     model: "gemini-3.1-flash-tts-preview"
+#     voice: "Kore"
+#     audio_tags: false
+#     persona_prompt_file: ""  # e.g. ~/.hermes/tts/radio-host.md
+
 # =============================================================================
 # Voice Transcription (Speech-to-Text)
 # =============================================================================
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index c4017373681..494c5ddfe3a 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1290,6 +1290,14 @@ DEFAULT_CONFIG = {
             "timeout": 30,
             "extra_body": {},
         },
+        "tts_audio_tags": {
+            "provider": "auto",
+            "model": "",
+            "base_url": "",
+            "api_key": "",
+            "timeout": 30,
+            "extra_body": {},
+        },
         # Triage specifier — flesh out a rough one-liner in the Kanban
         # Triage column into a concrete spec, then promote it to ``todo``.
         # Invoked by ``hermes kanban specify`` (single id or --all). Set a
@@ -1575,6 +1583,10 @@ DEFAULT_CONFIG = {
         "gemini": {
             "model": "gemini-2.5-flash-preview-tts",
             "voice": "Kore",
+            # When true, Gemini 3.1 TTS uses a hidden auxiliary-model rewrite
+            # pass to insert freeform square-bracket audio tags into the TTS
+            # script. Visible chat replies are unchanged.
+            "audio_tags": False,
             # Optional local Markdown/text file with Gemini TTS performance
             # direction. It may include AUDIO PROFILE, SCENE, DIRECTOR'S NOTES,
             # SAMPLE CONTEXT, and either a `{transcript}` placeholder or no
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 34d563a6696..20728c4f336 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -2980,6 +2980,7 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
     ("approval", "Approval", "smart command approval"),
     ("mcp", "MCP", "MCP tool reasoning"),
     ("title_generation", "Title generation", "session titles"),
+    ("tts_audio_tags", "TTS audio tags", "Gemini TTS tag insertion"),
     ("skills_hub", "Skills hub", "skills search/install"),
     ("triage_specifier", "Triage specifier", "kanban spec fleshing"),
     ("kanban_decomposer", "Kanban decomposer", "task decomposition"),
diff --git a/tests/tools/test_tts_gemini.py b/tests/tools/test_tts_gemini.py
index 6a52a48c02c..85254649d53 100644
--- a/tests/tools/test_tts_gemini.py
+++ b/tests/tools/test_tts_gemini.py
@@ -2,6 +2,7 @@
 
 import base64
 import struct
+from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -312,6 +313,112 @@ class TestGenerateGeminiTts:
         assert prompt_text == "Hi"
         assert "persona prompt file unavailable" in caplog.text
 
+    def test_audio_tags_disabled_does_not_call_rewriter(
+        self, tmp_path, monkeypatch, mock_gemini_response
+    ):
+        from tools.tts_tool import _generate_gemini_tts
+
+        config = {
+            "gemini": {
+                "model": "gemini-3.1-flash-tts-preview",
+                "audio_tags": False,
+            }
+        }
+        monkeypatch.setenv("GEMINI_API_KEY", "test-key")
+
+        with patch("agent.auxiliary_client.call_llm") as mock_call_llm, \
+             patch("requests.post", return_value=mock_gemini_response) as mock_post:
+            _generate_gemini_tts("Hi there.", str(tmp_path / "test.wav"), config)
+
+        mock_call_llm.assert_not_called()
+        prompt_text = mock_post.call_args[1]["json"]["contents"][0]["parts"][0]["text"]
+        assert prompt_text == "Hi there."
+
+    def test_audio_tags_enabled_rewrites_hidden_tts_script(
+        self, tmp_path, monkeypatch, mock_gemini_response
+    ):
+        from tools.tts_tool import _generate_gemini_tts
+
+        persona_file = tmp_path / "voice-persona.md"
+        persona_file.write_text(
+            "### DIRECTOR'S NOTES\nStyle: Warm and amused.",
+            encoding="utf-8",
+        )
+        response = SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    message=SimpleNamespace(content="[warmly] Hi there. [soft laugh]")
+                )
+            ]
+        )
+        config = {
+            "gemini": {
+                "model": "gemini-3.1-flash-tts-preview",
+                "audio_tags": True,
+                "persona_prompt_file": str(persona_file),
+            }
+        }
+        monkeypatch.setenv("GEMINI_API_KEY", "test-key")
+
+        with patch("agent.auxiliary_client.call_llm", return_value=response) as mock_call_llm, \
+             patch("requests.post", return_value=mock_gemini_response) as mock_post:
+            _generate_gemini_tts("Hi there.", str(tmp_path / "test.wav"), config)
+
+        mock_call_llm.assert_called_once()
+        call_kwargs = mock_call_llm.call_args.kwargs
+        assert call_kwargs["task"] == "tts_audio_tags"
+        assert "Audio tags are inline square-bracket modifiers" in call_kwargs["messages"][0]["content"]
+        assert "Style: Warm and amused." in call_kwargs["messages"][1]["content"]
+        assert "Hi there." in call_kwargs["messages"][1]["content"]
+
+        prompt_text = mock_post.call_args[1]["json"]["contents"][0]["parts"][0]["text"]
+        assert "Synthesize speech from the TRANSCRIPT only" in prompt_text
+        assert "### DIRECTOR'S NOTES\nStyle: Warm and amused." in prompt_text
+        assert "#### TRANSCRIPT\n[warmly] Hi there. [soft laugh]" in prompt_text
+
+    def test_audio_tags_enabled_skips_non_tag_capable_model(
+        self, tmp_path, monkeypatch, mock_gemini_response, caplog
+    ):
+        from tools.tts_tool import _generate_gemini_tts
+
+        config = {
+            "gemini": {
+                "model": "gemini-2.5-flash-preview-tts",
+                "audio_tags": True,
+            }
+        }
+        monkeypatch.setenv("GEMINI_API_KEY", "test-key")
+
+        with patch("agent.auxiliary_client.call_llm") as mock_call_llm, \
+             patch("requests.post", return_value=mock_gemini_response) as mock_post:
+            _generate_gemini_tts("Hi there.", str(tmp_path / "test.wav"), config)
+
+        mock_call_llm.assert_not_called()
+        prompt_text = mock_post.call_args[1]["json"]["contents"][0]["parts"][0]["text"]
+        assert prompt_text == "Hi there."
+        assert "not known to support Gemini audio tags" in caplog.text
+
+    def test_audio_tag_rewrite_failure_falls_back_to_original_text(
+        self, tmp_path, monkeypatch, mock_gemini_response, caplog
+    ):
+        from tools.tts_tool import _generate_gemini_tts
+
+        config = {
+            "gemini": {
+                "model": "gemini-3.1-flash-tts-preview",
+                "audio_tags": True,
+            }
+        }
+        monkeypatch.setenv("GEMINI_API_KEY", "test-key")
+
+        with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("boom")), \
+             patch("requests.post", return_value=mock_gemini_response) as mock_post:
+            _generate_gemini_tts("Hi there.", str(tmp_path / "test.wav"), config)
+
+        prompt_text = mock_post.call_args[1]["json"]["contents"][0]["parts"][0]["text"]
+        assert prompt_text == "Hi there."
+        assert "audio tag rewrite failed" in caplog.text
+
 
 class TestGeminiInCheckRequirements:
     def test_gemini_api_key_satisfies_requirements(self, monkeypatch):
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 8b223da60bd..c6e7c22de0f 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -190,6 +190,8 @@ DEFAULT_XAI_BASE_URL = "https://api.x.ai/v1"
 DEFAULT_GEMINI_TTS_MODEL = "gemini-2.5-flash-preview-tts"
 DEFAULT_GEMINI_TTS_VOICE = "Kore"
 DEFAULT_GEMINI_TTS_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
+DEFAULT_GEMINI_AUDIO_TAGS = False
+GEMINI_AUDIO_TAG_REWRITE_TASK = "tts_audio_tags"
 # PCM output specs for Gemini TTS (fixed by the API)
 GEMINI_TTS_SAMPLE_RATE = 24000
 GEMINI_TTS_CHANNELS = 1
@@ -233,6 +235,23 @@ ELEVENLABS_MODEL_MAX_TEXT_LENGTH: Dict[str, int] = {
     "eleven_flash_v2_5": 40000,
 }
 
+
+def _config_bool(value: Any, default: bool = False) -> bool:
+    """Coerce common YAML/env bool spellings without treating random strings as true."""
+    if isinstance(value, bool):
+        return value
+    if value is None:
+        return default
+    if isinstance(value, (int, float)):
+        return bool(value)
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in {"1", "true", "yes", "on", "enabled"}:
+            return True
+        if normalized in {"0", "false", "no", "off", "disabled"}:
+            return False
+    return default
+
 # Final fallback when provider isn't recognised at all.
 FALLBACK_MAX_TEXT_LENGTH = 4000
 
@@ -1069,20 +1088,7 @@ _XAI_FIRST_SENTENCE_RE = re.compile(r"^(.{12,120}?[.!?…])\s+(?=\S)", flags=re.
 
 
 def _xai_bool_config(value: Any, default: bool = False) -> bool:
-    """Coerce common YAML/env bool spellings without treating random strings as true."""
-    if isinstance(value, bool):
-        return value
-    if value is None:
-        return default
-    if isinstance(value, (int, float)):
-        return bool(value)
-    if isinstance(value, str):
-        normalized = value.strip().lower()
-        if normalized in {"1", "true", "yes", "on", "enabled"}:
-            return True
-        if normalized in {"0", "false", "no", "off", "disabled"}:
-            return False
-    return default
+    return _config_bool(value, default=default)
 
 
 def _apply_xai_auto_speech_tags(text: str) -> str:
@@ -1427,10 +1433,105 @@ def _read_gemini_persona_prompt(gemini_config: Dict[str, Any]) -> str:
         return ""
 
 
-def _compose_gemini_tts_prompt(text: str, gemini_config: Dict[str, Any]) -> str:
+def _gemini_model_supports_audio_tags(model: str) -> bool:
+    """Return True for Gemini TTS models known to support expressive audio tags."""
+    normalized = (model or "").strip().lower().rsplit("/", 1)[-1]
+    return "gemini-3.1" in normalized and "tts" in normalized
+
+
+def _gemini_audio_tags_enabled(gemini_config: Dict[str, Any], model: str) -> bool:
+    raw = gemini_config.get("audio_tags")
+    if isinstance(raw, dict):
+        raw = raw.get("enabled")
+    enabled = _config_bool(raw, default=DEFAULT_GEMINI_AUDIO_TAGS)
+    if not enabled:
+        return False
+    if not _gemini_model_supports_audio_tags(model):
+        logger.warning(
+            "Gemini TTS audio_tags enabled, but model %s is not known to support "
+            "Gemini audio tags; skipping hidden tag rewrite",
+            model,
+        )
+        return False
+    return True
+
+
+def _clean_gemini_audio_tag_rewrite(content: str) -> str:
+    clean = (content or "").strip()
+    fence = re.fullmatch(r"```(?:[A-Za-z0-9_-]+)?\s*(.*?)\s*```", clean, flags=re.DOTALL)
+    if fence:
+        clean = fence.group(1).strip()
+    return clean
+
+
+def _extract_auxiliary_message_content(response: Any) -> str:
+    try:
+        choice = response.choices[0]
+        message = getattr(choice, "message", None)
+        if isinstance(message, dict):
+            return str(message.get("content") or "")
+        return str(getattr(message, "content", "") or "")
+    except Exception:
+        return ""
+
+
+def _rewrite_gemini_tts_audio_tags(text: str, persona_prompt: str = "") -> str:
+    """Use the configured auxiliary model to insert Gemini audio tags."""
+    transcript = text.strip()
+    if not transcript:
+        return text
+
+    system_prompt = (
+        "You rewrite transcripts for Gemini 3.1 Flash TTS by inserting expressive "
+        "audio tags.\n\n"
+        "Audio tags are inline square-bracket modifiers such as [whispers], "
+        "[excitedly], [very slow], [sarcastically], [laughs], [sighs], or [gasp]. "
+        "There is no fixed allowlist. Use creative freeform tags generously but "
+        "naturally to control tone, pace, emotional vibe, emphasis, section-level "
+        "delivery, and non-verbal sounds. Use English audio tags even when the "
+        "spoken transcript is not English.\n\n"
+        "Rules:\n"
+        "- Preserve the spoken words, order, and meaning.\n"
+        "- Do not add new spoken sentences or remove existing spoken words.\n"
+        "- Use square brackets for every audio tag.\n"
+        "- Do not use SSML or XML tags.\n"
+        "- Do not explain or comment.\n"
+        "- Return only the tagged TTS script."
+    )
+    context = persona_prompt.strip() or "(none)"
+    user_prompt = (
+        "PERSONA AND DIRECTOR CONTEXT:\n"
+        f"{context}\n\n"
+        "TRANSCRIPT TO TAG:\n"
+        f"{transcript}"
+    )
+    try:
+        from agent.auxiliary_client import call_llm
+
+        response = call_llm(
+            task=GEMINI_AUDIO_TAG_REWRITE_TASK,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            temperature=0.7,
+        )
+        tagged = _clean_gemini_audio_tag_rewrite(_extract_auxiliary_message_content(response))
+        return tagged or text
+    except Exception as exc:
+        logger.warning("Gemini TTS audio tag rewrite failed; using untagged text: %s", exc)
+        return text
+
+
+def _compose_gemini_tts_prompt(
+    text: str,
+    gemini_config: Dict[str, Any],
+    persona_prompt: Optional[str] = None,
+) -> str:
     """Build the Gemini prompt from persona direction plus the live transcript."""
     transcript = text.strip()
-    persona_prompt = _read_gemini_persona_prompt(gemini_config)
+    if persona_prompt is None:
+        persona_prompt = _read_gemini_persona_prompt(gemini_config)
     if not persona_prompt:
         return transcript
 
@@ -1487,7 +1588,15 @@ def _generate_gemini_tts(text: str, output_path: str, tts_config: Dict[str, Any]
         or get_env_value("GEMINI_BASE_URL")
         or DEFAULT_GEMINI_TTS_BASE_URL
     ).strip().rstrip("/")
-    prompt_text = _compose_gemini_tts_prompt(text, gemini_config)
+    persona_prompt = _read_gemini_persona_prompt(gemini_config)
+    tts_script = text
+    if _gemini_audio_tags_enabled(gemini_config, model):
+        tts_script = _rewrite_gemini_tts_audio_tags(text, persona_prompt=persona_prompt)
+    prompt_text = _compose_gemini_tts_prompt(
+        tts_script,
+        gemini_config,
+        persona_prompt=persona_prompt,
+    )
     max_len = _resolve_max_text_length("gemini", tts_config)
     if len(prompt_text) > max_len:
         logger.warning(
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index bf91953f6a3..4b2d2c40e93 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -835,6 +835,7 @@ $ hermes model
 [ ] vision               currently: auto / main model
 [ ] web_extract          currently: auto / main model
 [ ] title_generation     currently: openrouter / google/gemini-3-flash-preview
+[ ] tts_audio_tags       currently: auto / main model
 [ ] compression          currently: auto / main model
 [ ] approval             currently: auto / main model
 [ ] triage_specifier     currently: auto / main model
@@ -911,6 +912,14 @@ auxiliary:
     api_key: ""
     timeout: 30                # seconds
 
+  # Gemini 3.1 TTS hidden audio-tag insertion
+  tts_audio_tags:
+    provider: "auto"
+    model: ""                  # empty = main chat model
+    base_url: ""
+    api_key: ""
+    timeout: 30
+
   # Context compression timeout (separate from compression.* config)
   compression:
     timeout: 120               # seconds — compression summarizes long conversations, needs more time
@@ -1197,8 +1206,9 @@ tts:
     model: "voxtral-mini-tts-2603"
     voice_id: "c69964a6-ab8b-4f8a-9465-ec0925096ec8"  # Paul - Neutral (default)
   gemini:
-    model: "gemini-2.5-flash-preview-tts"   # or gemini-2.5-pro-preview-tts
+    model: "gemini-2.5-flash-preview-tts"   # or gemini-3.1-flash-tts-preview
     voice: "Kore"               # 30 prebuilt voices: Zephyr, Puck, Kore, Enceladus, etc.
+    audio_tags: false           # Hidden Gemini 3.1 TTS audio-tag insertion
     persona_prompt_file: ""      # Optional Markdown/text file with Gemini voice direction
   xai:
     voice_id: "eve"             # xAI TTS voice
diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md
index d67efc3e2c7..9912d834972 100644
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@@ -66,8 +66,9 @@ tts:
     model: "voxtral-mini-tts-2603"
     voice_id: "c69964a6-ab8b-4f8a-9465-ec0925096ec8"  # Paul - Neutral (default)
   gemini:
-    model: "gemini-2.5-flash-preview-tts"  # or gemini-2.5-pro-preview-tts
+    model: "gemini-2.5-flash-preview-tts"  # or gemini-3.1-flash-tts-preview
     voice: "Kore"               # 30 prebuilt voices: Zephyr, Puck, Kore, Enceladus, Gacrux, etc.
+    audio_tags: false           # Enable hidden Gemini 3.1 TTS audio-tag insertion
     persona_prompt_file: ""      # Optional Markdown/text file with Gemini voice direction
   xai:
     voice_id: "eve"             # or a custom voice ID — see docs below
@@ -112,6 +113,20 @@ tts:
     persona_prompt_file: ~/.hermes/tts/butler-voice.md
 ```
 
+### Gemini Audio Tags
+
+Gemini 3.1 Flash TTS supports freeform square-bracket audio tags such as `[whispers]`, `[excitedly]`, `[very slow]`, `[laughs]`, and other expressive delivery notes. Enable `tts.gemini.audio_tags` to have Hermes run a hidden rewrite pass before Gemini TTS. The rewrite inserts inline tags into the TTS script only; the visible chat reply stays unchanged.
+
+```yaml
+tts:
+  provider: gemini
+  gemini:
+    model: gemini-3.1-flash-tts-preview
+    audio_tags: true
+```
+
+The rewrite uses `auxiliary.tts_audio_tags` and defaults to your main chat model. Override that auxiliary task if you want tag insertion handled by a cheaper or faster model.
+
 
 ### Input length limits