diff --git a/gateway/run.py b/gateway/run.py
index 85267cc44ee..817c8441bae 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -7727,7 +7727,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                 _note = (
                     f"[The user sent an audio file attachment: '{_display}'. "
                     f"It is saved at: {_agent_path}. "
-                    f"Ask the user what they'd like you to do with it, or pass the path to a transcription or media tool.]"
+                    f"Its content is not inlined here. If the user's request involves "
+                    f"what the audio contains, transcribe or process it yourself — for "
+                    f"example by passing the path to a transcription or media tool — "
+                    f"instead of asking the user to describe it. Only ask what to do "
+                    f"with it if their intent is genuinely unclear.]"
                 )
                 message_text = f"{_note}\n\n{message_text}"
 
diff --git a/tests/gateway/test_telegram_audio_vs_voice.py b/tests/gateway/test_telegram_audio_vs_voice.py
index 5af5cb920a7..1d1bf0cb78b 100644
--- a/tests/gateway/test_telegram_audio_vs_voice.py
+++ b/tests/gateway/test_telegram_audio_vs_voice.py
@@ -134,6 +134,10 @@ async def test_audio_attachment_context_note_format():
     assert "audio file attachment" in result.lower()
     # Should NOT contain the voice-message transcription wrapper text
     assert "voice message" not in result.lower()
+    # Guides the agent to transcribe/process the file itself rather than
+    # punting back to the user (same bug class as the PDF/DOCX note).
+    assert "transcri" in result.lower()
+    assert "ask the user what they'd like" not in result.lower()
 
 
 # ---------------------------------------------------------------------------