mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-06 07:51:53 +00:00
feat(telegram): skip-STT audio path + 2GB cap via local Bot API server
Two coordinated changes that unblock downstream audio pipelines (diarization, custom transcription, archival) on attachments larger than the public Bot API's 20MB getFile ceiling. - `stt.enabled: false` no longer drops voice/audio with a generic "transcription disabled" note. The gateway probes the cached file's duration (wave → mutagen → ffprobe ladder) and surfaces `[The user sent a voice message: <abs path> (duration: M:SS)]` to the agent so a skill or tool can pick up the raw file. The previous placeholder is replaced rather than appended when present. - `platforms.telegram.extra.base_url` set → adapter auto-lifts its document size cap from 20MB to 2GB (the local telegram-bot-api `--local` ceiling) and the "too large" reply reports the active limit dynamically. No new config knob; presence of `base_url` is the opt-in. - `platforms.telegram.extra.local_mode: true` wires `Application.builder().local_mode(True)` on the python-telegram-bot builder. PTB then reads files from disk instead of HTTP, which is required when telegram-bot-api runs in `--local` mode (the server returns absolute filesystem paths, not `/file/bot...` URLs). - gateway/run.py: rewrites the `stt.enabled: false` branch of `_enrich_message_with_transcription`. New `_format_duration` + `_probe_audio_duration` helpers. - gateway/platforms/telegram.py: `_max_doc_bytes` instance attribute derived from `extra.base_url`; `local_mode` builder wiring; dynamic "too large" message. - tests/gateway/test_stt_config.py: covers path-surfacing with and without an existing user message, and placeholder replacement. - tests/gateway/test_telegram_max_doc_bytes.py: 3 cases — default 20MB without base_url, 2GB when set, empty-string base_url keeps default. - website/docs/user-guide/messaging/telegram.md: new "Skipping STT" subsection under Voice Messages and a full "Large Files (>20MB) via Local Bot API Server" walkthrough (api_id/api_hash, docker-compose, one-time `logOut` migration, `platforms.telegram.extra` config, the `local_mode` disk-access requirement, the silent HTTP-fallback 404). - website/docs/user-guide/features/voice-mode.md: documents the `stt.enabled` knob in the config reference. - `pytest tests/gateway/test_telegram_max_doc_bytes.py tests/gateway/test_stt_config.py` → 9/9 passing. - Verified end-to-end on a live deployment: gateway log shows `Using custom Telegram base_url: http://...` and `Using Telegram local_mode (read files from disk)` on startup; voice messages above 20MB cache to disk and surface their path to the agent. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
6265b3a132
commit
ad2531be08
6 changed files with 327 additions and 14 deletions
|
|
@ -33,25 +33,51 @@ def test_load_gateway_config_bridges_stt_enabled_from_config_yaml(tmp_path, monk
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_enrich_message_with_transcription_skips_when_stt_disabled():
|
||||
async def test_enrich_message_with_transcription_surfaces_path_when_stt_disabled():
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(stt_enabled=False)
|
||||
runner._has_setup_skill = lambda: True # Should NOT be consulted in disabled branch.
|
||||
|
||||
with patch(
|
||||
"tools.transcription_tools.transcribe_audio",
|
||||
side_effect=AssertionError("transcribe_audio should not be called when STT is disabled"),
|
||||
), patch(
|
||||
"gateway.run._probe_audio_duration",
|
||||
new=AsyncMock(return_value="0:12"),
|
||||
):
|
||||
result = await runner._enrich_message_with_transcription(
|
||||
"caption",
|
||||
["/tmp/voice.ogg"],
|
||||
)
|
||||
|
||||
assert "transcription is disabled" in result.lower()
|
||||
assert "/tmp/voice.ogg" in result
|
||||
assert "voice message" in result.lower()
|
||||
assert "(duration: 0:12)" in result
|
||||
assert "caption" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_enrich_message_with_transcription_omits_duration_on_probe_failure():
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = GatewayRunner.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(stt_enabled=False)
|
||||
|
||||
with patch(
|
||||
"gateway.run._probe_audio_duration",
|
||||
new=AsyncMock(return_value=None),
|
||||
):
|
||||
result = await runner._enrich_message_with_transcription(
|
||||
"",
|
||||
["/tmp/voice.ogg"],
|
||||
)
|
||||
|
||||
assert "/tmp/voice.ogg" in result
|
||||
assert "duration" not in result.lower()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_enrich_message_with_transcription_avoids_bogus_no_provider_message_for_backend_key_errors():
|
||||
from gateway.run import GatewayRunner
|
||||
|
|
|
|||
56
tests/gateway/test_telegram_max_doc_bytes.py
Normal file
56
tests/gateway/test_telegram_max_doc_bytes.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
"""Tests for Telegram document-size cap.
|
||||
|
||||
The public Telegram Bot API caps `getFile` at 20MB. A locally-hosted
|
||||
`telegram-bot-api` server raises that ceiling to 2GB. We treat the presence
|
||||
of `extra.base_url` as the explicit opt-in to the higher cap.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from gateway.config import PlatformConfig
|
||||
|
||||
|
||||
def _ensure_telegram_mock():
|
||||
if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
|
||||
return
|
||||
|
||||
telegram_mod = MagicMock()
|
||||
telegram_mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
|
||||
telegram_mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
|
||||
telegram_mod.constants.ChatType.GROUP = "group"
|
||||
telegram_mod.constants.ChatType.SUPERGROUP = "supergroup"
|
||||
telegram_mod.constants.ChatType.CHANNEL = "channel"
|
||||
telegram_mod.constants.ChatType.PRIVATE = "private"
|
||||
|
||||
for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
|
||||
sys.modules.setdefault(name, telegram_mod)
|
||||
|
||||
|
||||
_ensure_telegram_mock()
|
||||
|
||||
from gateway.platforms.telegram import TelegramAdapter # noqa: E402
|
||||
|
||||
|
||||
def test_max_doc_bytes_defaults_to_20mb_without_base_url():
|
||||
adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***", extra={}))
|
||||
assert adapter._max_doc_bytes == 20 * 1024 * 1024
|
||||
|
||||
|
||||
def test_max_doc_bytes_raised_to_2gb_when_base_url_set():
|
||||
adapter = TelegramAdapter(
|
||||
PlatformConfig(
|
||||
enabled=True,
|
||||
token="***",
|
||||
extra={"base_url": "http://localhost:8081/bot"},
|
||||
)
|
||||
)
|
||||
assert adapter._max_doc_bytes == 2 * 1024 * 1024 * 1024
|
||||
|
||||
|
||||
def test_max_doc_bytes_empty_base_url_keeps_default():
|
||||
"""An empty/falsy `base_url` should not flip the cap — only a real URL does."""
|
||||
adapter = TelegramAdapter(
|
||||
PlatformConfig(enabled=True, token="***", extra={"base_url": ""}),
|
||||
)
|
||||
assert adapter._max_doc_bytes == 20 * 1024 * 1024
|
||||
Loading…
Add table
Add a link
Reference in a new issue