mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 08:51:53 +00:00
fix(gateway): gate oversized Telegram voice/audio before download (#44245)
* fix(gateway): gate oversized Telegram voice/audio before download Adds a pre-download size check to the Telegram voice and audio inbound paths. Files that exceed _max_doc_bytes (default 20 MB) are rejected before get_file() is called, preventing silent OOM-style stalls on large uploads. A human-readable note is appended to the event text so the model can explain the limit to the user. Also extends 403 entitlement detection in recover_with_credential_pool to cover two additional cases: 'oauth authentication is currently not allowed for this organization' and Anthropic anthropic_messages-mode 403s, both of which should be treated as entitlement failures rather than transient errors. Tests: 7 new cases in test_telegram_voice_v0_regressions.py covering the size gate (accept, reject, note text) and the STT-failure notice path. Salvaged from #40487 (cryptopafi) — cherry-picked the Telegram voice policy and 403 entitlement fixes; LiveKit/Discord/uv.lock workstreams left for separate PRs. * test(gateway): drop orphaned voice tests not backed by this PR The cherry-picked test file from #40487 included 3 tests for STT-failure notice and voice-mode (_handle_voice_command 'on' -> voice_only) behavior that this PR intentionally does NOT salvage (those belong to the LiveKit/ voice-policy workstreams left in #40487). They fail on both this branch and clean main because the feature code isn't present. Keep only the 2 tests backed by code actually in this PR: - test_telegram_audio_size_gate_rejects_oversized_media_before_download (covers the _telegram_media_size_allowed guard this PR adds) - test_voice_tts_is_explicit_audio_reply_opt_in (matches current main) Removed now-unused imports (MessageEvent, MessageType, AsyncMock).
This commit is contained in:
parent
a09343cc96
commit
d0e017bac8
3 changed files with 130 additions and 7 deletions
|
|
@ -679,15 +679,28 @@ def recover_with_credential_pool(
|
|||
# long-running TUI sessions stuck on stale tokens until the user
|
||||
# exited and reopened.
|
||||
is_entitlement = agent._is_entitlement_failure(error_context, status_code)
|
||||
_auth_haystack = " ".join(
|
||||
str(error_context.get(k) or "").lower()
|
||||
for k in ("message", "reason", "code", "error")
|
||||
if isinstance(error_context, dict)
|
||||
)
|
||||
if (
|
||||
not is_entitlement
|
||||
and status_code == 403
|
||||
and "oauth authentication is currently not allowed for this organization" in _auth_haystack
|
||||
):
|
||||
is_entitlement = True
|
||||
if (
|
||||
not is_entitlement
|
||||
and status_code == 403
|
||||
and (agent.provider or "") == "anthropic"
|
||||
and getattr(agent, "api_mode", "") == "anthropic_messages"
|
||||
):
|
||||
is_entitlement = True
|
||||
if not is_entitlement and status_code == 403 and (agent.provider or "") == "xai-oauth":
|
||||
_disambiguator_haystack = " ".join(
|
||||
str(error_context.get(k) or "").lower()
|
||||
for k in ("message", "reason", "code", "error")
|
||||
if isinstance(error_context, dict)
|
||||
)
|
||||
_is_xai_auth_failure = (
|
||||
"[wke=unauthenticated:" in _disambiguator_haystack
|
||||
or "oauth2 access token could not be validated" in _disambiguator_haystack
|
||||
"[wke=unauthenticated:" in _auth_haystack
|
||||
or "oauth2 access token could not be validated" in _auth_haystack
|
||||
)
|
||||
if not _is_xai_auth_failure:
|
||||
is_entitlement = True
|
||||
|
|
|
|||
|
|
@ -3837,6 +3837,33 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
)
|
||||
return error
|
||||
|
||||
def _telegram_media_too_large_note(self, label: str, file_size: Any, max_bytes: int) -> str:
|
||||
limit_mb = max(1, max_bytes // (1024 * 1024))
|
||||
try:
|
||||
size_mb = int(file_size or 0) / (1024 * 1024)
|
||||
size_text = f"{size_mb:.1f} MB"
|
||||
except (TypeError, ValueError):
|
||||
size_text = "unknown size"
|
||||
return (
|
||||
f"[Telegram {label} skipped: file size {size_text} exceeds the "
|
||||
f"{limit_mb} MB limit. Ask the user to send a shorter voice note "
|
||||
"or a smaller audio file.]"
|
||||
)
|
||||
|
||||
def _telegram_media_size_allowed(self, source: Any, label: str) -> tuple[bool, Optional[str]]:
|
||||
"""Validate Telegram media size before downloading into memory."""
|
||||
max_bytes = int(getattr(self, "_max_doc_bytes", 20 * 1024 * 1024) or 20 * 1024 * 1024)
|
||||
file_size = getattr(source, "file_size", None)
|
||||
try:
|
||||
size = int(file_size or 0)
|
||||
except (TypeError, ValueError):
|
||||
size = 0
|
||||
if size <= 0:
|
||||
return True, None
|
||||
if size <= max_bytes:
|
||||
return True, None
|
||||
return False, self._telegram_media_too_large_note(label, size, max_bytes)
|
||||
|
||||
async def send_voice(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
|
@ -5602,6 +5629,12 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
# Download voice/audio messages to cache for STT transcription
|
||||
if msg.voice:
|
||||
try:
|
||||
allowed, note = self._telegram_media_size_allowed(msg.voice, "voice message")
|
||||
if not allowed:
|
||||
event.text = self._append_observed_note(event.text, note or "")
|
||||
logger.info("[Telegram] Skipped oversized user voice (size=%s)", getattr(msg.voice, "file_size", None))
|
||||
await self.handle_message(event)
|
||||
return
|
||||
file_obj = await msg.voice.get_file()
|
||||
audio_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".ogg")
|
||||
|
|
@ -5612,6 +5645,12 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
logger.warning("[Telegram] Failed to cache voice: %s", e, exc_info=True)
|
||||
elif msg.audio:
|
||||
try:
|
||||
allowed, note = self._telegram_media_size_allowed(msg.audio, "audio file")
|
||||
if not allowed:
|
||||
event.text = self._append_observed_note(event.text, note or "")
|
||||
logger.info("[Telegram] Skipped oversized user audio (size=%s)", getattr(msg.audio, "file_size", None))
|
||||
await self.handle_message(event)
|
||||
return
|
||||
file_obj = await msg.audio.get_file()
|
||||
audio_bytes = await file_obj.download_as_bytearray()
|
||||
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".mp3")
|
||||
|
|
|
|||
71
tests/gateway/test_telegram_voice_v0_regressions.py
Normal file
71
tests/gateway/test_telegram_voice_v0_regressions.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
import sys
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from gateway.config import Platform
|
||||
from gateway.platforms.telegram import TelegramAdapter
|
||||
from gateway.run import GatewayRunner
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
def _source():
|
||||
return SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
|
||||
|
||||
|
||||
def _runner(adapter=None):
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = SimpleNamespace(
|
||||
stt_enabled=True,
|
||||
group_sessions_per_user=True,
|
||||
thread_sessions_per_user=False,
|
||||
)
|
||||
runner.adapters = {Platform.TELEGRAM: adapter} if adapter else {}
|
||||
runner._consume_pending_native_image_paths = lambda _key: []
|
||||
runner._session_key_for_source = lambda _source: "telegram:dm:12345"
|
||||
runner._thread_metadata_for_source = lambda *_args, **_kwargs: {}
|
||||
runner._reply_anchor_for_event = lambda _event: None
|
||||
return runner
|
||||
|
||||
|
||||
def test_telegram_audio_size_gate_rejects_oversized_media_before_download():
|
||||
adapter = object.__new__(TelegramAdapter)
|
||||
adapter._max_doc_bytes = 1024
|
||||
|
||||
allowed, note = adapter._telegram_media_size_allowed(
|
||||
SimpleNamespace(file_size=2048),
|
||||
"voice message",
|
||||
)
|
||||
|
||||
assert allowed is False
|
||||
assert "exceeds" in note
|
||||
assert "voice message" in note
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_voice_tts_is_explicit_audio_reply_opt_in():
|
||||
adapter = SimpleNamespace(
|
||||
_auto_tts_disabled_chats=set(),
|
||||
_auto_tts_enabled_chats=set(),
|
||||
)
|
||||
runner = _runner(adapter)
|
||||
runner._voice_mode = {}
|
||||
runner._voice_provider_mode = {}
|
||||
runner._save_voice_modes = lambda: None
|
||||
runner._save_voice_provider_modes = lambda: None
|
||||
|
||||
event = SimpleNamespace(
|
||||
source=_source(),
|
||||
get_command_args=lambda: "tts",
|
||||
)
|
||||
result = await GatewayRunner._handle_voice_command(runner, event)
|
||||
|
||||
assert runner._voice_mode["telegram:12345"] == "all"
|
||||
assert "12345" in adapter._auto_tts_enabled_chats
|
||||
assert result
|
||||
Loading…
Add table
Add a link
Reference in a new issue