diff --git a/gateway/run.py b/gateway/run.py index 85267cc44ee..817c8441bae 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -7727,7 +7727,11 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew _note = ( f"[The user sent an audio file attachment: '{_display}'. " f"It is saved at: {_agent_path}. " - f"Ask the user what they'd like you to do with it, or pass the path to a transcription or media tool.]" + f"Its content is not inlined here. If the user's request involves " + f"what the audio contains, transcribe or process it yourself — for " + f"example by passing the path to a transcription or media tool — " + f"instead of asking the user to describe it. Only ask what to do " + f"with it if their intent is genuinely unclear.]" ) message_text = f"{_note}\n\n{message_text}" diff --git a/tests/gateway/test_telegram_audio_vs_voice.py b/tests/gateway/test_telegram_audio_vs_voice.py index 5af5cb920a7..1d1bf0cb78b 100644 --- a/tests/gateway/test_telegram_audio_vs_voice.py +++ b/tests/gateway/test_telegram_audio_vs_voice.py @@ -134,6 +134,10 @@ async def test_audio_attachment_context_note_format(): assert "audio file attachment" in result.lower() # Should NOT contain the voice-message transcription wrapper text assert "voice message" not in result.lower() + # Guides the agent to transcribe/process the file itself rather than + # punting back to the user (same bug class as the PDF/DOCX note). + assert "transcri" in result.lower() + assert "ask the user what they'd like" not in result.lower() # ---------------------------------------------------------------------------