From d0e017bac8faeb3f08680052319c0091cfd335b5 Mon Sep 17 00:00:00 2001
From: Austin Pickett <pickett.austin@gmail.com>
Date: Thu, 11 Jun 2026 10:01:51 -0400
Subject: [PATCH] fix(gateway): gate oversized Telegram voice/audio before
 download (#44245)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(gateway): gate oversized Telegram voice/audio before download

Adds a pre-download size check to the Telegram voice and audio inbound
paths. Files that exceed _max_doc_bytes (default 20 MB) are rejected
before get_file() is called, preventing silent OOM-style stalls on large
uploads. A human-readable note is appended to the event text so the
model can explain the limit to the user.

Also extends 403 entitlement detection in recover_with_credential_pool
to cover two additional cases: 'oauth authentication is currently not
allowed for this organization' and Anthropic anthropic_messages-mode 403s,
both of which should be treated as entitlement failures rather than
transient errors.

Tests: 7 new cases in test_telegram_voice_v0_regressions.py covering
the size gate (accept, reject, note text) and the STT-failure notice path.

Salvaged from #40487 (cryptopafi) — cherry-picked the Telegram voice
policy and 403 entitlement fixes; LiveKit/Discord/uv.lock workstreams
left for separate PRs.

* test(gateway): drop orphaned voice tests not backed by this PR

The cherry-picked test file from #40487 included 3 tests for STT-failure
notice and voice-mode (_handle_voice_command 'on' -> voice_only) behavior
that this PR intentionally does NOT salvage (those belong to the LiveKit/
voice-policy workstreams left in #40487). They fail on both this branch
and clean main because the feature code isn't present.

Keep only the 2 tests backed by code actually in this PR:
- test_telegram_audio_size_gate_rejects_oversized_media_before_download
  (covers the _telegram_media_size_allowed guard this PR adds)
- test_voice_tts_is_explicit_audio_reply_opt_in (matches current main)

Removed now-unused imports (MessageEvent, MessageType, AsyncMock).
---
 agent/agent_runtime_helpers.py                | 27 +++++--
 gateway/platforms/telegram.py                 | 39 ++++++++++
 .../test_telegram_voice_v0_regressions.py     | 71 +++++++++++++++++++
 3 files changed, 130 insertions(+), 7 deletions(-)
 create mode 100644 tests/gateway/test_telegram_voice_v0_regressions.py

diff --git a/agent/agent_runtime_helpers.py b/agent/agent_runtime_helpers.py
index daffc025d9b..742af145380 100644
--- a/agent/agent_runtime_helpers.py
+++ b/agent/agent_runtime_helpers.py
@@ -679,15 +679,28 @@ def recover_with_credential_pool(
         # long-running TUI sessions stuck on stale tokens until the user
         # exited and reopened.
         is_entitlement = agent._is_entitlement_failure(error_context, status_code)
+        _auth_haystack = " ".join(
+            str(error_context.get(k) or "").lower()
+            for k in ("message", "reason", "code", "error")
+            if isinstance(error_context, dict)
+        )
+        if (
+            not is_entitlement
+            and status_code == 403
+            and "oauth authentication is currently not allowed for this organization" in _auth_haystack
+        ):
+            is_entitlement = True
+        if (
+            not is_entitlement
+            and status_code == 403
+            and (agent.provider or "") == "anthropic"
+            and getattr(agent, "api_mode", "") == "anthropic_messages"
+        ):
+            is_entitlement = True
         if not is_entitlement and status_code == 403 and (agent.provider or "") == "xai-oauth":
-            _disambiguator_haystack = " ".join(
-                str(error_context.get(k) or "").lower()
-                for k in ("message", "reason", "code", "error")
-                if isinstance(error_context, dict)
-            )
             _is_xai_auth_failure = (
-                "[wke=unauthenticated:" in _disambiguator_haystack
-                or "oauth2 access token could not be validated" in _disambiguator_haystack
+                "[wke=unauthenticated:" in _auth_haystack
+                or "oauth2 access token could not be validated" in _auth_haystack
             )
             if not _is_xai_auth_failure:
                 is_entitlement = True
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 5cba11d2ee9..eec156bbae9 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -3837,6 +3837,33 @@ class TelegramAdapter(BasePlatformAdapter):
             )
         return error
 
+    def _telegram_media_too_large_note(self, label: str, file_size: Any, max_bytes: int) -> str:
+        limit_mb = max(1, max_bytes // (1024 * 1024))
+        try:
+            size_mb = int(file_size or 0) / (1024 * 1024)
+            size_text = f"{size_mb:.1f} MB"
+        except (TypeError, ValueError):
+            size_text = "unknown size"
+        return (
+            f"[Telegram {label} skipped: file size {size_text} exceeds the "
+            f"{limit_mb} MB limit. Ask the user to send a shorter voice note "
+            "or a smaller audio file.]"
+        )
+
+    def _telegram_media_size_allowed(self, source: Any, label: str) -> tuple[bool, Optional[str]]:
+        """Validate Telegram media size before downloading into memory."""
+        max_bytes = int(getattr(self, "_max_doc_bytes", 20 * 1024 * 1024) or 20 * 1024 * 1024)
+        file_size = getattr(source, "file_size", None)
+        try:
+            size = int(file_size or 0)
+        except (TypeError, ValueError):
+            size = 0
+        if size <= 0:
+            return True, None
+        if size <= max_bytes:
+            return True, None
+        return False, self._telegram_media_too_large_note(label, size, max_bytes)
+
     async def send_voice(
         self,
         chat_id: str,
@@ -5602,6 +5629,12 @@ class TelegramAdapter(BasePlatformAdapter):
         # Download voice/audio messages to cache for STT transcription
         if msg.voice:
             try:
+                allowed, note = self._telegram_media_size_allowed(msg.voice, "voice message")
+                if not allowed:
+                    event.text = self._append_observed_note(event.text, note or "")
+                    logger.info("[Telegram] Skipped oversized user voice (size=%s)", getattr(msg.voice, "file_size", None))
+                    await self.handle_message(event)
+                    return
                 file_obj = await msg.voice.get_file()
                 audio_bytes = await file_obj.download_as_bytearray()
                 cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".ogg")
@@ -5612,6 +5645,12 @@ class TelegramAdapter(BasePlatformAdapter):
                 logger.warning("[Telegram] Failed to cache voice: %s", e, exc_info=True)
         elif msg.audio:
             try:
+                allowed, note = self._telegram_media_size_allowed(msg.audio, "audio file")
+                if not allowed:
+                    event.text = self._append_observed_note(event.text, note or "")
+                    logger.info("[Telegram] Skipped oversized user audio (size=%s)", getattr(msg.audio, "file_size", None))
+                    await self.handle_message(event)
+                    return
                 file_obj = await msg.audio.get_file()
                 audio_bytes = await file_obj.download_as_bytearray()
                 cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".mp3")
diff --git a/tests/gateway/test_telegram_voice_v0_regressions.py b/tests/gateway/test_telegram_voice_v0_regressions.py
new file mode 100644
index 00000000000..b2b8d4d0e8b
--- /dev/null
+++ b/tests/gateway/test_telegram_voice_v0_regressions.py
@@ -0,0 +1,71 @@
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+
+ROOT = Path(__file__).resolve().parents[2]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+
+from gateway.config import Platform
+from gateway.platforms.telegram import TelegramAdapter
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource
+
+
+def _source():
+    return SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
+
+
+def _runner(adapter=None):
+    runner = object.__new__(GatewayRunner)
+    runner.config = SimpleNamespace(
+        stt_enabled=True,
+        group_sessions_per_user=True,
+        thread_sessions_per_user=False,
+    )
+    runner.adapters = {Platform.TELEGRAM: adapter} if adapter else {}
+    runner._consume_pending_native_image_paths = lambda _key: []
+    runner._session_key_for_source = lambda _source: "telegram:dm:12345"
+    runner._thread_metadata_for_source = lambda *_args, **_kwargs: {}
+    runner._reply_anchor_for_event = lambda _event: None
+    return runner
+
+
+def test_telegram_audio_size_gate_rejects_oversized_media_before_download():
+    adapter = object.__new__(TelegramAdapter)
+    adapter._max_doc_bytes = 1024
+
+    allowed, note = adapter._telegram_media_size_allowed(
+        SimpleNamespace(file_size=2048),
+        "voice message",
+    )
+
+    assert allowed is False
+    assert "exceeds" in note
+    assert "voice message" in note
+
+
+@pytest.mark.asyncio
+async def test_voice_tts_is_explicit_audio_reply_opt_in():
+    adapter = SimpleNamespace(
+        _auto_tts_disabled_chats=set(),
+        _auto_tts_enabled_chats=set(),
+    )
+    runner = _runner(adapter)
+    runner._voice_mode = {}
+    runner._voice_provider_mode = {}
+    runner._save_voice_modes = lambda: None
+    runner._save_voice_provider_modes = lambda: None
+
+    event = SimpleNamespace(
+        source=_source(),
+        get_command_args=lambda: "tts",
+    )
+    result = await GatewayRunner._handle_voice_command(runner, event)
+
+    assert runner._voice_mode["telegram:12345"] == "all"
+    assert "12345" in adapter._auto_tts_enabled_chats
+    assert result