mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-21 10:22:18 +00:00
Merge pull request #49530 from NousResearch/salvage/signal-trio
refactor(signal): salvage AAC voice-note remux + shared markdown formatting (batch of #47766, #46386)
This commit is contained in:
commit
834bbae895
7 changed files with 489 additions and 142 deletions
|
|
@ -17,6 +17,9 @@ import json
|
|||
import logging
|
||||
import os
|
||||
import random
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
|
|
@ -39,6 +42,7 @@ from gateway.platforms.base import (
|
|||
cache_image_from_url,
|
||||
)
|
||||
from gateway.platforms.helpers import redact_phone
|
||||
from gateway.platforms.signal_format import markdown_to_signal
|
||||
from gateway.platforms.signal_rate_limit import (
|
||||
SIGNAL_BATCH_PACING_NOTICE_THRESHOLD,
|
||||
SIGNAL_MAX_ATTACHMENTS_PER_MSG,
|
||||
|
|
@ -76,7 +80,14 @@ def _parse_comma_list(value: str) -> List[str]:
|
|||
|
||||
|
||||
def _guess_extension(data: bytes) -> str:
|
||||
"""Guess file extension from magic bytes."""
|
||||
"""Guess file extension from magic bytes.
|
||||
|
||||
Android Signal delivers voice notes as raw ADTS AAC frames, which share
|
||||
the ``0xFF 0xFx`` sync word with MPEG-1/2 Layer 3 (MP3). The byte-1
|
||||
layout disambiguates: ADTS packs ``ID layer protection_absent`` into
|
||||
bits 3-0, where ``ID`` is 0 for MPEG-2/4 AAC and ``layer`` is always
|
||||
0 for ADTS. A real MP3 frame has ``ID=1`` and ``layer`` in {1, 2, 3}.
|
||||
"""
|
||||
if data[:4] == b"\x89PNG":
|
||||
return ".png"
|
||||
if data[:2] == b"\xff\xd8":
|
||||
|
|
@ -92,6 +103,12 @@ def _guess_extension(data: bytes) -> str:
|
|||
if data[:4] == b"OggS":
|
||||
return ".ogg"
|
||||
if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0:
|
||||
# ``0xFF 0xFx`` is shared by MP3 and ADTS AAC. The discriminator
|
||||
# is bits 3-1 of byte 1: ADTS has ``ID=0`` and ``layer=00`` (mask
|
||||
# 0xF6, target 0xF0); MP3 has ``ID=1`` and ``layer`` in {01,10,11}
|
||||
# (mask 0xF6, target in {0xF2, 0xF4, 0xF6}).
|
||||
if (data[1] & 0xF6) == 0xF0:
|
||||
return ".aac"
|
||||
return ".mp3"
|
||||
if data[:2] == b"PK":
|
||||
return ".zip"
|
||||
|
|
@ -120,6 +137,61 @@ def _ext_to_mime(ext: str) -> str:
|
|||
return _EXT_TO_MIME.get(ext.lower(), "application/octet-stream")
|
||||
|
||||
|
||||
def _remux_aac_to_m4a(aac_data: bytes) -> Optional[Tuple[bytes, str]]:
|
||||
"""Losslessly remux raw ADTS AAC bytes into an MP4 (.m4a) container.
|
||||
|
||||
Used by the Signal attachment cache so Android voice notes land on disk
|
||||
in a container that every major STT API (Groq, OpenAI, xAI, Mistral
|
||||
Voxtral) will accept. ``ffmpeg -c:a copy`` is a single demux/remux —
|
||||
no re-encode, no quality loss, sub-100ms for typical voice-note sizes.
|
||||
|
||||
Returns ``(m4a_bytes, ".m4a")`` on success, or ``None`` if ffmpeg is
|
||||
missing, input is invalid, or remux fails for any reason. Callers
|
||||
must treat ``None`` as "pass through unchanged" and not raise.
|
||||
"""
|
||||
ffmpeg = shutil.which("ffmpeg")
|
||||
if not ffmpeg:
|
||||
# Common Homebrew/local prefixes on macOS dev hosts.
|
||||
for prefix in ("/opt/homebrew/bin/ffmpeg", "/usr/local/bin/ffmpeg"):
|
||||
if os.path.isfile(prefix) and os.access(prefix, os.X_OK):
|
||||
ffmpeg = prefix
|
||||
break
|
||||
if not ffmpeg:
|
||||
logger.debug("Signal: ffmpeg not found, skipping AAC→M4A remux")
|
||||
return None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix=".aac", delete=False) as src:
|
||||
src.write(aac_data)
|
||||
src_path = src.name
|
||||
dst_path = src_path[:-4] + ".m4a"
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[ffmpeg, "-y", "-loglevel", "error", "-i", src_path,
|
||||
"-c:a", "copy", "-movflags", "+faststart", dst_path],
|
||||
capture_output=True, timeout=10,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
logger.warning(
|
||||
"Signal: AAC→M4A remux failed (ffmpeg exit %d): %s",
|
||||
proc.returncode, proc.stderr.decode("utf-8", "replace")[:300],
|
||||
)
|
||||
return None
|
||||
with open(dst_path, "rb") as f:
|
||||
return f.read(), ".m4a"
|
||||
finally:
|
||||
for p in (src_path, dst_path):
|
||||
try:
|
||||
os.unlink(p)
|
||||
except OSError:
|
||||
pass
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning("Signal: AAC→M4A remux timed out (>10s)")
|
||||
return None
|
||||
except Exception:
|
||||
logger.exception("Signal: AAC→M4A remux error")
|
||||
return None
|
||||
|
||||
|
||||
def _render_mentions(text: str, mentions: list) -> str:
|
||||
"""Replace Signal mention placeholders (\\uFFFC) with readable @identifiers.
|
||||
|
||||
|
|
@ -724,6 +796,18 @@ class SignalAdapter(BasePlatformAdapter):
|
|||
raw_data = base64.b64decode(result)
|
||||
ext = _guess_extension(raw_data)
|
||||
|
||||
# Android Signal voice notes are raw ADTS AAC streams. Most STT
|
||||
# providers (Groq Whisper, OpenAI Whisper) reject raw ADTS — they
|
||||
# require AAC to be muxed into an MP4 container. Remux losslessly
|
||||
# with ``ffmpeg -c:a copy`` so the cached file is a normal .m4a.
|
||||
# No re-encode, sub-100ms on a Pi 5. Graceful no-op if ffmpeg is
|
||||
# absent: the raw ADTS file is cached as-is and STT may reject it
|
||||
# (there is no downstream sniff-and-remux fallback).
|
||||
if ext == ".aac":
|
||||
remuxed: Optional[Tuple[bytes, str]] = await asyncio.to_thread(_remux_aac_to_m4a, raw_data)
|
||||
if remuxed is not None:
|
||||
raw_data, ext = remuxed
|
||||
|
||||
if _is_image_ext(ext):
|
||||
path = cache_image_from_bytes(raw_data, ext)
|
||||
elif _is_audio_ext(ext):
|
||||
|
|
@ -821,144 +905,9 @@ class SignalAdapter(BasePlatformAdapter):
|
|||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _markdown_to_signal(text: str) -> tuple:
|
||||
"""Convert markdown to plain text + Signal textStyles list.
|
||||
|
||||
Signal doesn't render markdown. Instead it uses ``bodyRanges``
|
||||
(exposed by signal-cli as ``textStyle`` / ``textStyles`` params)
|
||||
with the format ``start:length:STYLE``.
|
||||
|
||||
Positions are measured in **UTF-16 code units** (not Python code
|
||||
points) because that's what the Signal protocol uses.
|
||||
|
||||
Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE.
|
||||
(Signal's SPOILER style is not currently mapped — no standard
|
||||
markdown syntax for it; would need ``||spoiler||`` parsing.)
|
||||
|
||||
Returns ``(plain_text, styles_list)`` where *styles_list* may be
|
||||
empty if there's nothing to format.
|
||||
"""
|
||||
import re
|
||||
|
||||
def _utf16_len(s: str) -> int:
|
||||
"""Length of *s* in UTF-16 code units."""
|
||||
return len(s.encode("utf-16-le")) // 2
|
||||
|
||||
# Pre-process: normalize whitespace before any position tracking
|
||||
# so later operations don't invalidate recorded offsets.
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
text = text.strip()
|
||||
|
||||
styles: list = []
|
||||
|
||||
# --- Phase 1: fenced code blocks ```...``` → MONOSPACE ---
|
||||
_CB = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL)
|
||||
while m := _CB.search(text):
|
||||
inner = m.group(1).rstrip("\n")
|
||||
start = m.start()
|
||||
text = text[: m.start()] + inner + text[m.end() :]
|
||||
styles.append((start, len(inner), "MONOSPACE"))
|
||||
|
||||
# --- Phase 2: heading markers # Foo → Foo (BOLD) ---
|
||||
_HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
|
||||
new_text = ""
|
||||
last_end = 0
|
||||
for m in _HEADING.finditer(text):
|
||||
new_text += text[last_end : m.start()]
|
||||
last_end = m.end()
|
||||
eol = text.find("\n", m.end())
|
||||
if eol == -1:
|
||||
eol = len(text)
|
||||
heading_text = text[m.end() : eol]
|
||||
start = len(new_text)
|
||||
new_text += heading_text
|
||||
styles.append((start, len(heading_text), "BOLD"))
|
||||
last_end = eol
|
||||
new_text += text[last_end:]
|
||||
text = new_text
|
||||
|
||||
# --- Phase 3: inline patterns (single-pass to avoid offset drift) ---
|
||||
# The old code processed each pattern sequentially, stripping markers
|
||||
# and recording positions per-pass. Later passes shifted text without
|
||||
# adjusting earlier positions → bold/italic landed mid-word.
|
||||
#
|
||||
# Fix: collect ALL non-overlapping matches first, then strip every
|
||||
# marker in one pass so positions are computed against the final text.
|
||||
_PATTERNS = [
|
||||
(re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"),
|
||||
(re.compile(r"__(.+?)__", re.DOTALL), "BOLD"),
|
||||
(re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"),
|
||||
(re.compile(r"`(.+?)`"), "MONOSPACE"),
|
||||
(re.compile(r"(?<!\*)\*(?!\*| )(.+?)(?<!\*)\*(?!\*)"), "ITALIC"),
|
||||
(re.compile(r"(?<!\w)_(?!_)(.+?)(?<!_)_(?!\w)"), "ITALIC"),
|
||||
]
|
||||
|
||||
# Collect all non-overlapping matches (earlier patterns win ties).
|
||||
all_matches: list = [] # (start, end, g1_start, g1_end, style)
|
||||
occupied: list = [] # (start, end) intervals already claimed
|
||||
for pat, style in _PATTERNS:
|
||||
for m in pat.finditer(text):
|
||||
ms, me = m.start(), m.end()
|
||||
if not any(ms < oe and me > os for os, oe in occupied):
|
||||
all_matches.append((ms, me, m.start(1), m.end(1), style))
|
||||
occupied.append((ms, me))
|
||||
all_matches.sort()
|
||||
|
||||
# Build removal list so we can adjust Phase 1/2 styles.
|
||||
# Each match removes its prefix markers (start..g1_start) and
|
||||
# suffix markers (g1_end..end).
|
||||
removals: list = [] # (position, length) sorted
|
||||
for ms, me, g1s, g1e, _ in all_matches:
|
||||
if g1s > ms:
|
||||
removals.append((ms, g1s - ms))
|
||||
if me > g1e:
|
||||
removals.append((g1e, me - g1e))
|
||||
removals.sort()
|
||||
|
||||
# Adjust Phase 1/2 styles for characters about to be removed.
|
||||
def _adj(pos: int) -> int:
|
||||
shift = 0
|
||||
for rp, rl in removals:
|
||||
if rp < pos:
|
||||
shift += min(rl, pos - rp)
|
||||
else:
|
||||
break
|
||||
return pos - shift
|
||||
|
||||
adjusted_prior: list = []
|
||||
for s, l, st in styles:
|
||||
ns = _adj(s)
|
||||
ne = _adj(s + l)
|
||||
if ne > ns:
|
||||
adjusted_prior.append((ns, ne - ns, st))
|
||||
|
||||
# Strip all inline markers in one pass → positions are correct.
|
||||
result = ""
|
||||
last_end = 0
|
||||
inline_styles: list = []
|
||||
for ms, me, g1s, g1e, sty in all_matches:
|
||||
result += text[last_end:ms]
|
||||
pos = len(result)
|
||||
inner = text[g1s:g1e]
|
||||
result += inner
|
||||
inline_styles.append((pos, len(inner), sty))
|
||||
last_end = me
|
||||
result += text[last_end:]
|
||||
text = result
|
||||
|
||||
styles = adjusted_prior + inline_styles
|
||||
|
||||
# Convert code-point offsets → UTF-16 code-unit offsets
|
||||
style_strings = []
|
||||
for cp_start, cp_len, stype in sorted(styles):
|
||||
# Safety: skip any out-of-bounds styles
|
||||
if cp_start < 0 or cp_start + cp_len > len(text):
|
||||
continue
|
||||
u16_start = _utf16_len(text[:cp_start])
|
||||
u16_len = _utf16_len(text[cp_start : cp_start + cp_len])
|
||||
style_strings.append(f"{u16_start}:{u16_len}:{stype}")
|
||||
|
||||
return text, style_strings
|
||||
def _markdown_to_signal(text: str) -> tuple[str, list[str]]:
|
||||
"""Backward-compatible wrapper around shared Signal formatting helper."""
|
||||
return markdown_to_signal(text)
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""Strip markdown for plain-text fallback (used by base class).
|
||||
|
|
|
|||
140
gateway/platforms/signal_format.py
Normal file
140
gateway/platforms/signal_format.py
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
"""Shared Signal formatting helpers.
|
||||
|
||||
Keep markdown → Signal native formatting conversion in one place so both the
|
||||
live Signal adapter and standalone send paths emit the same bodyRanges.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def markdown_to_signal(text: str) -> tuple[str, list[str]]:
|
||||
"""Convert markdown to plain text + Signal textStyles list.
|
||||
|
||||
Signal doesn't render markdown. Instead it uses ``bodyRanges`` (exposed by
|
||||
signal-cli as ``textStyle`` / ``textStyles`` params) with the format
|
||||
``start:length:STYLE``.
|
||||
|
||||
Positions are measured in UTF-16 code units because that's what the Signal
|
||||
protocol uses.
|
||||
|
||||
Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE.
|
||||
"""
|
||||
|
||||
def _utf16_len(s: str) -> int:
|
||||
"""Length of *s* in UTF-16 code units."""
|
||||
return len(s.encode("utf-16-le")) // 2
|
||||
|
||||
def _normalize_bullet_markers(source: str) -> str:
|
||||
"""Replace Markdown bullet markers with plain Unicode bullets.
|
||||
|
||||
Signal does not render Markdown list syntax, so ``- item`` and
|
||||
``* item`` otherwise arrive as literal Markdown markers. Preserve
|
||||
fenced code blocks byte-for-byte; list-looking lines inside code are
|
||||
code, not prose bullets.
|
||||
"""
|
||||
parts = re.split(r"(```.*?```)", source, flags=re.DOTALL)
|
||||
for idx, part in enumerate(parts):
|
||||
if idx % 2 == 1:
|
||||
continue
|
||||
parts[idx] = re.sub(r"(?m)^([ \t]{0,3})[-*+]\s+", r"\1• ", part)
|
||||
return "".join(parts)
|
||||
|
||||
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||
text = text.strip()
|
||||
text = _normalize_bullet_markers(text)
|
||||
|
||||
styles: list[tuple[int, int, str]] = []
|
||||
|
||||
code_block = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL)
|
||||
while match := code_block.search(text):
|
||||
inner = match.group(1).rstrip("\n")
|
||||
start = match.start()
|
||||
text = text[: match.start()] + inner + text[match.end() :]
|
||||
styles.append((start, len(inner), "MONOSPACE"))
|
||||
|
||||
heading = re.compile(r"^#{1,6}\s+", re.MULTILINE)
|
||||
new_text = ""
|
||||
last_end = 0
|
||||
for match in heading.finditer(text):
|
||||
new_text += text[last_end : match.start()]
|
||||
last_end = match.end()
|
||||
eol = text.find("\n", match.end())
|
||||
if eol == -1:
|
||||
eol = len(text)
|
||||
heading_text = text[match.end() : eol]
|
||||
start = len(new_text)
|
||||
new_text += heading_text
|
||||
styles.append((start, len(heading_text), "BOLD"))
|
||||
last_end = eol
|
||||
new_text += text[last_end:]
|
||||
text = new_text
|
||||
|
||||
patterns = [
|
||||
(re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"),
|
||||
(re.compile(r"__(.+?)__", re.DOTALL), "BOLD"),
|
||||
(re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"),
|
||||
(re.compile(r"`(.+?)`"), "MONOSPACE"),
|
||||
(re.compile(r"(?<!\*)\*(?!\*| )(.+?)(?<!\*)\*(?!\*)"), "ITALIC"),
|
||||
(re.compile(r"(?<!\w)_(?!_)(.+?)(?<!_)_(?!\w)"), "ITALIC"),
|
||||
]
|
||||
|
||||
all_matches: list[tuple[int, int, int, int, str]] = []
|
||||
occupied: list[tuple[int, int]] = []
|
||||
for pattern, style in patterns:
|
||||
for match in pattern.finditer(text):
|
||||
ms, me = match.start(), match.end()
|
||||
if not any(ms < oe and me > os for os, oe in occupied):
|
||||
all_matches.append((ms, me, match.start(1), match.end(1), style))
|
||||
occupied.append((ms, me))
|
||||
all_matches.sort()
|
||||
|
||||
removals: list[tuple[int, int]] = []
|
||||
for ms, me, g1s, g1e, _ in all_matches:
|
||||
if g1s > ms:
|
||||
removals.append((ms, g1s - ms))
|
||||
if me > g1e:
|
||||
removals.append((g1e, me - g1e))
|
||||
removals.sort()
|
||||
|
||||
def _adjust(pos: int) -> int:
|
||||
shift = 0
|
||||
for remove_pos, remove_len in removals:
|
||||
if remove_pos < pos:
|
||||
shift += min(remove_len, pos - remove_pos)
|
||||
else:
|
||||
break
|
||||
return pos - shift
|
||||
|
||||
adjusted_prior: list[tuple[int, int, str]] = []
|
||||
for start, length, style in styles:
|
||||
new_start = _adjust(start)
|
||||
new_end = _adjust(start + length)
|
||||
if new_end > new_start:
|
||||
adjusted_prior.append((new_start, new_end - new_start, style))
|
||||
|
||||
result = ""
|
||||
last_end = 0
|
||||
inline_styles: list[tuple[int, int, str]] = []
|
||||
for ms, me, g1s, g1e, style in all_matches:
|
||||
result += text[last_end:ms]
|
||||
pos = len(result)
|
||||
inner = text[g1s:g1e]
|
||||
result += inner
|
||||
inline_styles.append((pos, len(inner), style))
|
||||
last_end = me
|
||||
result += text[last_end:]
|
||||
text = result
|
||||
|
||||
styles = adjusted_prior + inline_styles
|
||||
|
||||
style_strings: list[str] = []
|
||||
for cp_start, cp_len, style_type in sorted(styles):
|
||||
if cp_start < 0 or cp_start + cp_len > len(text):
|
||||
continue
|
||||
u16_start = _utf16_len(text[:cp_start])
|
||||
u16_len = _utf16_len(text[cp_start : cp_start + cp_len])
|
||||
style_strings.append(f"{u16_start}:{u16_len}:{style_type}")
|
||||
|
||||
return text, style_strings
|
||||
|
|
@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
|||
|
||||
# Auto-extracted from noreply emails + manual overrides
|
||||
AUTHOR_MAP = {
|
||||
"lkz-de@users.noreply.github.com": "lkz-de",
|
||||
"charles@salesondemand.io": "salesondemandio",
|
||||
"victor@rocketfueldev.com": "victor-kyriazakos",
|
||||
"87440198+JoaoMarcos44@users.noreply.github.com": "JoaoMarcos44",
|
||||
|
|
|
|||
|
|
@ -163,6 +163,103 @@ class TestSignalHelpers:
|
|||
from gateway.platforms.signal import _guess_extension
|
||||
assert _guess_extension(b"\x00\x00\x00\x18ftypisom" + b"\x00" * 100) == ".mp4"
|
||||
|
||||
def test_guess_extension_aac_adts_unprotected(self):
|
||||
"""ADTS AAC, MPEG-4, no CRC (the canonical Android Signal voice note).
|
||||
|
||||
Byte 0 = 0xFF (sync high), byte 1 = 0xF1 (sync low + ID=0 + layer=00
|
||||
+ protection_absent=1). Must NOT be misclassified as MP3 — the old
|
||||
code's ``(b[1] & 0xE0) == 0xE0`` test wrongly returned ``.mp3``.
|
||||
"""
|
||||
from gateway.platforms.signal import _guess_extension
|
||||
assert _guess_extension(b"\xff\xf1" + b"\x00" * 200) == ".aac"
|
||||
|
||||
def test_guess_extension_aac_adts_protected(self):
|
||||
"""ADTS AAC, MPEG-4, CRC present (protection_absent=0)."""
|
||||
from gateway.platforms.signal import _guess_extension
|
||||
assert _guess_extension(b"\xff\xf0" + b"\x00" * 200) == ".aac"
|
||||
|
||||
def test_guess_extension_mp3_mpeg1_layer3(self):
|
||||
"""Real MP3 frame, MPEG-1 Layer 3: byte1 = 0xFB (ID=1, layer=01, prot=1)."""
|
||||
from gateway.platforms.signal import _guess_extension
|
||||
assert _guess_extension(b"\xff\xfb" + b"\x00" * 200) == ".mp3"
|
||||
|
||||
def test_guess_extension_mp3_mpeg2_layer3(self):
|
||||
"""Real MP3 frame, MPEG-2 Layer 3: byte1 = 0xF3 (ID=1, layer=01, prot=1)."""
|
||||
from gateway.platforms.signal import _guess_extension
|
||||
assert _guess_extension(b"\xff\xf3" + b"\x00" * 200) == ".mp3"
|
||||
|
||||
def test_guess_extension_aac_routes_to_audio_cache(self):
|
||||
"""ADTS-detected files must be routed to the audio cache, not document.
|
||||
|
||||
``_is_audio_ext(``.aac``)`` is True, so a Signal attachment that
|
||||
begins with the ADTS sync word ends up in ``cache_audio_from_bytes``,
|
||||
which the remux step then converts to MP4 container.
|
||||
"""
|
||||
from gateway.platforms.signal import _is_audio_ext, _guess_extension
|
||||
ext = _guess_extension(b"\xff\xf1" + b"\x00" * 200)
|
||||
assert ext == ".aac"
|
||||
assert _is_audio_ext(ext) is True
|
||||
|
||||
def test_remux_aac_to_m4a_round_trip(self):
|
||||
"""A real ADTS AAC stream remuxes to a valid MP4 (.m4a) container.
|
||||
|
||||
Generates a short ADTS AAC sample with ffmpeg at runtime so the
|
||||
end-to-end remux path actually exercises in CI (skipped only when
|
||||
ffmpeg is unavailable), rather than depending on a machine-specific
|
||||
file.
|
||||
"""
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from gateway.platforms.signal import _remux_aac_to_m4a
|
||||
|
||||
ffmpeg = shutil.which("ffmpeg")
|
||||
if not ffmpeg:
|
||||
import pytest
|
||||
pytest.skip("ffmpeg not available in this env")
|
||||
|
||||
# Synthesize 0.5s of silence encoded as raw ADTS AAC.
|
||||
with tempfile.NamedTemporaryFile(suffix=".aac", delete=False) as tmp:
|
||||
adts_path = tmp.name
|
||||
try:
|
||||
gen = subprocess.run(
|
||||
[ffmpeg, "-y", "-loglevel", "error", "-f", "lavfi",
|
||||
"-i", "anullsrc=r=44100:cl=mono", "-t", "0.5",
|
||||
"-c:a", "aac", "-f", "adts", adts_path],
|
||||
capture_output=True, timeout=30,
|
||||
)
|
||||
if gen.returncode != 0:
|
||||
import pytest
|
||||
pytest.skip("ffmpeg could not produce an ADTS AAC sample")
|
||||
with open(adts_path, "rb") as f:
|
||||
aac_data = f.read()
|
||||
finally:
|
||||
try:
|
||||
import os
|
||||
os.unlink(adts_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
result = _remux_aac_to_m4a(aac_data)
|
||||
assert result is not None
|
||||
m4a_bytes, ext = result
|
||||
assert ext == ".m4a"
|
||||
# MP4 files start with a 4-byte size, then ``ftyp`` at offset 4.
|
||||
assert m4a_bytes[4:8] == b"ftyp", \
|
||||
f"expected MP4 ftyp box, got {m4a_bytes[:12]!r}"
|
||||
# File must be at least as long as the input (MP4 has overhead).
|
||||
assert len(m4a_bytes) >= len(aac_data) * 0.5
|
||||
|
||||
def test_remux_aac_to_m4a_handles_garbage(self):
|
||||
"""Garbage input should return None, not raise."""
|
||||
from gateway.platforms.signal import _remux_aac_to_m4a
|
||||
result = _remux_aac_to_m4a(b"\xff\xf1garbage_no_aac_frames")
|
||||
# Either returns None (ffmpeg errored) or a real M4A. If it returned
|
||||
# bytes, the bytes must look like an MP4. Otherwise it returns None.
|
||||
if result is not None:
|
||||
m4a_bytes, ext = result
|
||||
assert ext == ".m4a"
|
||||
|
||||
def test_guess_extension_unknown(self):
|
||||
from gateway.platforms.signal import _guess_extension
|
||||
assert _guess_extension(b"\x00\x01\x02\x03" * 10) == ".bin"
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import pytest
|
|||
|
||||
from gateway.config import PlatformConfig
|
||||
from gateway.platforms.signal import SignalAdapter
|
||||
from gateway.platforms.signal_format import markdown_to_signal
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -20,6 +21,11 @@ def _m2s(text: str):
|
|||
return SignalAdapter._markdown_to_signal(text)
|
||||
|
||||
|
||||
def test_shared_helper_matches_signal_adapter_wrapper():
|
||||
text = "🙂 **bold** and `code`"
|
||||
assert markdown_to_signal(text) == SignalAdapter._markdown_to_signal(text)
|
||||
|
||||
|
||||
def _style_types(styles: list[str]) -> list[str]:
|
||||
"""Extract just the STYLE part from '0:4:BOLD' strings."""
|
||||
return [s.rsplit(":", 1)[1] for s in styles]
|
||||
|
|
@ -138,8 +144,29 @@ class TestItalicFalsePositives:
|
|||
"""* item lines must NOT be treated as italic delimiters."""
|
||||
md = "* item one\n* item two\n* item three"
|
||||
text, styles = _m2s(md)
|
||||
assert text == "• item one\n• item two\n• item three"
|
||||
assert _find_style(styles, "ITALIC") == []
|
||||
|
||||
def test_hyphen_bullet_list_uses_signal_safe_bullets(self):
|
||||
"""Signal does not render Markdown list markers; normalize them."""
|
||||
md = "- item one\n- item two"
|
||||
text, styles = _m2s(md)
|
||||
assert text == "• item one\n• item two"
|
||||
assert styles == []
|
||||
|
||||
def test_plus_bullet_list_uses_signal_safe_bullets(self):
|
||||
md = "+ item one\n+ item two"
|
||||
text, styles = _m2s(md)
|
||||
assert text == "• item one\n• item two"
|
||||
assert styles == []
|
||||
|
||||
def test_markdown_bullets_inside_fenced_code_are_preserved(self):
|
||||
md = "before\n```\n- literal\n* literal\n```\nafter"
|
||||
text, styles = _m2s(md)
|
||||
assert "- literal\n* literal" in text
|
||||
assert "• literal" not in text
|
||||
assert any(s.endswith(":MONOSPACE") for s in styles)
|
||||
|
||||
def test_bullet_list_with_content_before(self):
|
||||
md = "Here are things:\n\n* first thing\n* second thing"
|
||||
text, styles = _m2s(md)
|
||||
|
|
|
|||
|
|
@ -1189,6 +1189,18 @@ class TestParseTargetRefE164:
|
|||
assert thread_id is None
|
||||
assert is_explicit is True
|
||||
|
||||
def test_signal_group_target_is_explicit(self):
|
||||
chat_id, thread_id, is_explicit = _parse_target_ref("signal", " group:abc123 ")
|
||||
assert chat_id == "group:abc123"
|
||||
assert thread_id is None
|
||||
assert is_explicit is True
|
||||
|
||||
def test_empty_signal_group_target_is_not_explicit(self):
|
||||
chat_id, thread_id, is_explicit = _parse_target_ref("signal", " group: ")
|
||||
assert chat_id is None
|
||||
assert thread_id is None
|
||||
assert is_explicit is False
|
||||
|
||||
def test_sms_e164_is_explicit(self):
|
||||
chat_id, _, is_explicit = _parse_target_ref("sms", "+15551234567")
|
||||
assert chat_id == "+15551234567"
|
||||
|
|
@ -2230,11 +2242,68 @@ class TestSendSignalChunking:
|
|||
)
|
||||
)
|
||||
|
||||
assert result == {"success": True, "platform": "signal", "chat_id": "+15557654321"}
|
||||
assert result["success"] is True
|
||||
assert result["platform"] == "signal"
|
||||
assert result["chat_id"].endswith("4321")
|
||||
assert len(fake.calls) == 1
|
||||
params = fake.calls[0]["payload"]["params"]
|
||||
assert params["message"] == "hello"
|
||||
assert "attachments" not in params
|
||||
assert "textStyle" not in params
|
||||
assert "textStyles" not in params
|
||||
|
||||
def test_text_only_markdown_uses_singular_text_style(self, monkeypatch):
|
||||
fake = _FakeSignalHttp([{"result": {"timestamp": 1}}])
|
||||
_install_signal_http(monkeypatch, fake)
|
||||
|
||||
result = asyncio.run(
|
||||
_send_signal(
|
||||
{"http_url": "http://localhost:8080", "account": "+155****4567"},
|
||||
"+155****4321",
|
||||
"**hello**",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
params = fake.calls[0]["payload"]["params"]
|
||||
assert params["message"] == "hello"
|
||||
assert params["textStyle"] == "0:5:BOLD"
|
||||
assert "textStyles" not in params
|
||||
|
||||
def test_text_only_multiple_styles_use_plural_text_styles(self, monkeypatch):
|
||||
fake = _FakeSignalHttp([{"result": {"timestamp": 1}}])
|
||||
_install_signal_http(monkeypatch, fake)
|
||||
|
||||
result = asyncio.run(
|
||||
_send_signal(
|
||||
{"http_url": "http://localhost:8080", "account": "+155****4567"},
|
||||
"+155****4321",
|
||||
"**bold** and *italic*",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
params = fake.calls[0]["payload"]["params"]
|
||||
assert params["message"] == "bold and italic"
|
||||
assert "textStyle" not in params
|
||||
assert params["textStyles"] == ["0:4:BOLD", "9:6:ITALIC"]
|
||||
|
||||
def test_text_style_offsets_use_utf16_code_units(self, monkeypatch):
|
||||
fake = _FakeSignalHttp([{"result": {"timestamp": 1}}])
|
||||
_install_signal_http(monkeypatch, fake)
|
||||
|
||||
result = asyncio.run(
|
||||
_send_signal(
|
||||
{"http_url": "http://localhost:8080", "account": "+155****4567"},
|
||||
"+155****4321",
|
||||
"🙂 **bold**",
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
params = fake.calls[0]["payload"]["params"]
|
||||
assert params["message"] == "🙂 bold"
|
||||
assert params["textStyle"] == "3:4:BOLD"
|
||||
|
||||
def test_chunks_attachments_above_max(self, tmp_path, monkeypatch):
|
||||
"""33 attachments → 2 batches; text only on first batch. Batch 1
|
||||
|
|
@ -2274,10 +2343,53 @@ class TestSendSignalChunking:
|
|||
first = fake.calls[0]["payload"]["params"]
|
||||
assert first["message"] == "Caption goes here"
|
||||
assert len(first["attachments"]) == SIGNAL_MAX_ATTACHMENTS_PER_MSG
|
||||
assert "textStyle" not in first
|
||||
assert "textStyles" not in first
|
||||
|
||||
second = fake.calls[1]["payload"]["params"]
|
||||
assert second["message"] == "" # caption only on batch 0
|
||||
assert len(second["attachments"]) == 33 - SIGNAL_MAX_ATTACHMENTS_PER_MSG
|
||||
assert "textStyle" not in second
|
||||
assert "textStyles" not in second
|
||||
|
||||
def test_caption_styles_only_apply_to_first_attachment_batch(self, tmp_path, monkeypatch):
|
||||
from gateway.platforms.signal_rate_limit import SIGNAL_MAX_ATTACHMENTS_PER_MSG
|
||||
|
||||
paths = []
|
||||
for i in range(33):
|
||||
p = tmp_path / f"img_{i}.png"
|
||||
p.write_bytes(b"\x89PNG" + b"\x00" * 16)
|
||||
paths.append((str(p), False))
|
||||
|
||||
fake = _FakeSignalHttp([
|
||||
{"result": {"timestamp": 1}},
|
||||
{"result": {"timestamp": 2}},
|
||||
])
|
||||
_install_signal_http(monkeypatch, fake)
|
||||
|
||||
result = asyncio.run(
|
||||
_send_signal(
|
||||
{"http_url": "http://localhost:8080", "account": "+155****4567"},
|
||||
"group:abc123",
|
||||
"**Bold** and *italic*",
|
||||
media_files=paths,
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["chat_id"] == "group:***"
|
||||
first = fake.calls[0]["payload"]["params"]
|
||||
assert first["groupId"] == "abc123"
|
||||
assert first["message"] == "Bold and italic"
|
||||
assert first["textStyles"] == ["0:4:BOLD", "9:6:ITALIC"]
|
||||
assert len(first["attachments"]) == SIGNAL_MAX_ATTACHMENTS_PER_MSG
|
||||
|
||||
second = fake.calls[1]["payload"]["params"]
|
||||
assert second["groupId"] == "abc123"
|
||||
assert second["message"] == ""
|
||||
assert len(second["attachments"]) == 33 - SIGNAL_MAX_ATTACHMENTS_PER_MSG
|
||||
assert "textStyle" not in second
|
||||
assert "textStyles" not in second
|
||||
|
||||
def test_full_followup_batch_emits_pacing_notice(self, tmp_path, monkeypatch):
|
||||
"""64 attachments → 2 full batches. Batch 1 needs 14 more tokens
|
||||
|
|
|
|||
|
|
@ -88,6 +88,13 @@ def _error(message: str) -> dict:
|
|||
return {"error": _sanitize_error_text(message)}
|
||||
|
||||
|
||||
def _display_chat_id(platform_name: str, chat_id: str) -> str:
|
||||
"""Return a result-safe chat identifier for tool transcripts/log consumers."""
|
||||
if platform_name == "signal" and str(chat_id).startswith("group:"):
|
||||
return "group:***"
|
||||
return chat_id
|
||||
|
||||
|
||||
def _telegram_retry_delay(exc: Exception, attempt: int) -> float | None:
|
||||
retry_after = getattr(exc, "retry_after", None)
|
||||
if retry_after is not None:
|
||||
|
|
@ -523,6 +530,12 @@ def _parse_target_ref(platform_name: str, target_ref: str):
|
|||
# through to the _PHONE_PLATFORMS handler below.
|
||||
if _WHATSAPP_JID_RE.fullmatch(target_ref):
|
||||
return target_ref.strip(), None, True
|
||||
stripped_target = target_ref.strip()
|
||||
if platform_name == "signal" and stripped_target.startswith("group:"):
|
||||
group_id = stripped_target[len("group:"):].strip()
|
||||
if group_id:
|
||||
return f"group:{group_id}", None, True
|
||||
return None, None, False
|
||||
if platform_name in _PHONE_PLATFORMS:
|
||||
match = _E164_TARGET_RE.fullmatch(target_ref)
|
||||
if match:
|
||||
|
|
@ -1258,6 +1271,7 @@ async def _send_signal(extra, chat_id, message, media_files=None):
|
|||
_signal_send_timeout,
|
||||
get_scheduler,
|
||||
)
|
||||
from gateway.platforms.signal_format import markdown_to_signal
|
||||
|
||||
try:
|
||||
http_url = extra.get("http_url", "http://127.0.0.1:8080").rstrip("/")
|
||||
|
|
@ -1284,8 +1298,15 @@ async def _send_signal(extra, chat_id, message, media_files=None):
|
|||
else:
|
||||
att_batches = [[]]
|
||||
|
||||
plain_text, text_styles = markdown_to_signal(message)
|
||||
|
||||
async def _post(batch_attachments, batch_message):
|
||||
params = {"account": account, "message": batch_message}
|
||||
if batch_message and text_styles:
|
||||
if len(text_styles) == 1:
|
||||
params["textStyle"] = text_styles[0]
|
||||
else:
|
||||
params["textStyles"] = text_styles
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
|
|
@ -1342,7 +1363,7 @@ async def _send_signal(extra, chat_id, message, media_files=None):
|
|||
f"for Signal rate limit, batch {idx + 1}/{len(att_batches)}.)"
|
||||
)
|
||||
|
||||
batch_message = message if idx == 0 else ""
|
||||
batch_message = plain_text if idx == 0 else ""
|
||||
|
||||
for attempt in range(1, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS + 1):
|
||||
try:
|
||||
|
|
@ -1407,7 +1428,7 @@ async def _send_signal(extra, chat_id, message, media_files=None):
|
|||
f"no attachments delivered"
|
||||
)
|
||||
|
||||
result = {"success": True, "platform": "signal", "chat_id": chat_id}
|
||||
result = {"success": True, "platform": "signal", "chat_id": _display_chat_id("signal", chat_id)}
|
||||
if warnings:
|
||||
result["warnings"] = warnings
|
||||
return result
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue