From 905820b59f5a5cae79d8d7ba279da0657e6a4a10 Mon Sep 17 00:00:00 2001 From: lkz-de Date: Mon, 15 Jun 2026 02:52:39 +0200 Subject: [PATCH 1/4] fix(signal): share markdown formatting across send paths Route Signal send paths through shared markdown formatting helpers and render markdown bullets consistently as Unicode bullets. Add coverage for Signal formatting and send_message integration. --- gateway/platforms/signal.py | 140 +------------------------- gateway/platforms/signal_format.py | 140 ++++++++++++++++++++++++++ tests/gateway/test_signal_format.py | 27 +++++ tests/tools/test_send_message_tool.py | 114 ++++++++++++++++++++- tools/send_message_tool.py | 25 ++++- 5 files changed, 306 insertions(+), 140 deletions(-) create mode 100644 gateway/platforms/signal_format.py diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py index 2d8b1c33090..3272a921911 100644 --- a/gateway/platforms/signal.py +++ b/gateway/platforms/signal.py @@ -39,6 +39,7 @@ from gateway.platforms.base import ( cache_image_from_url, ) from gateway.platforms.helpers import redact_phone +from gateway.platforms.signal_format import markdown_to_signal from gateway.platforms.signal_rate_limit import ( SIGNAL_BATCH_PACING_NOTICE_THRESHOLD, SIGNAL_MAX_ATTACHMENTS_PER_MSG, @@ -822,143 +823,8 @@ class SignalAdapter(BasePlatformAdapter): @staticmethod def _markdown_to_signal(text: str) -> tuple: - """Convert markdown to plain text + Signal textStyles list. - - Signal doesn't render markdown. Instead it uses ``bodyRanges`` - (exposed by signal-cli as ``textStyle`` / ``textStyles`` params) - with the format ``start:length:STYLE``. - - Positions are measured in **UTF-16 code units** (not Python code - points) because that's what the Signal protocol uses. - - Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE. - (Signal's SPOILER style is not currently mapped — no standard - markdown syntax for it; would need ``||spoiler||`` parsing.) - - Returns ``(plain_text, styles_list)`` where *styles_list* may be - empty if there's nothing to format. - """ - import re - - def _utf16_len(s: str) -> int: - """Length of *s* in UTF-16 code units.""" - return len(s.encode("utf-16-le")) // 2 - - # Pre-process: normalize whitespace before any position tracking - # so later operations don't invalidate recorded offsets. - text = re.sub(r"\n{3,}", "\n\n", text) - text = text.strip() - - styles: list = [] - - # --- Phase 1: fenced code blocks ```...``` → MONOSPACE --- - _CB = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL) - while m := _CB.search(text): - inner = m.group(1).rstrip("\n") - start = m.start() - text = text[: m.start()] + inner + text[m.end() :] - styles.append((start, len(inner), "MONOSPACE")) - - # --- Phase 2: heading markers # Foo → Foo (BOLD) --- - _HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE) - new_text = "" - last_end = 0 - for m in _HEADING.finditer(text): - new_text += text[last_end : m.start()] - last_end = m.end() - eol = text.find("\n", m.end()) - if eol == -1: - eol = len(text) - heading_text = text[m.end() : eol] - start = len(new_text) - new_text += heading_text - styles.append((start, len(heading_text), "BOLD")) - last_end = eol - new_text += text[last_end:] - text = new_text - - # --- Phase 3: inline patterns (single-pass to avoid offset drift) --- - # The old code processed each pattern sequentially, stripping markers - # and recording positions per-pass. Later passes shifted text without - # adjusting earlier positions → bold/italic landed mid-word. - # - # Fix: collect ALL non-overlapping matches first, then strip every - # marker in one pass so positions are computed against the final text. - _PATTERNS = [ - (re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"), - (re.compile(r"__(.+?)__", re.DOTALL), "BOLD"), - (re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"), - (re.compile(r"`(.+?)`"), "MONOSPACE"), - (re.compile(r"(? os for os, oe in occupied): - all_matches.append((ms, me, m.start(1), m.end(1), style)) - occupied.append((ms, me)) - all_matches.sort() - - # Build removal list so we can adjust Phase 1/2 styles. - # Each match removes its prefix markers (start..g1_start) and - # suffix markers (g1_end..end). - removals: list = [] # (position, length) sorted - for ms, me, g1s, g1e, _ in all_matches: - if g1s > ms: - removals.append((ms, g1s - ms)) - if me > g1e: - removals.append((g1e, me - g1e)) - removals.sort() - - # Adjust Phase 1/2 styles for characters about to be removed. - def _adj(pos: int) -> int: - shift = 0 - for rp, rl in removals: - if rp < pos: - shift += min(rl, pos - rp) - else: - break - return pos - shift - - adjusted_prior: list = [] - for s, l, st in styles: - ns = _adj(s) - ne = _adj(s + l) - if ne > ns: - adjusted_prior.append((ns, ne - ns, st)) - - # Strip all inline markers in one pass → positions are correct. - result = "" - last_end = 0 - inline_styles: list = [] - for ms, me, g1s, g1e, sty in all_matches: - result += text[last_end:ms] - pos = len(result) - inner = text[g1s:g1e] - result += inner - inline_styles.append((pos, len(inner), sty)) - last_end = me - result += text[last_end:] - text = result - - styles = adjusted_prior + inline_styles - - # Convert code-point offsets → UTF-16 code-unit offsets - style_strings = [] - for cp_start, cp_len, stype in sorted(styles): - # Safety: skip any out-of-bounds styles - if cp_start < 0 or cp_start + cp_len > len(text): - continue - u16_start = _utf16_len(text[:cp_start]) - u16_len = _utf16_len(text[cp_start : cp_start + cp_len]) - style_strings.append(f"{u16_start}:{u16_len}:{stype}") - - return text, style_strings + """Backward-compatible wrapper around shared Signal formatting helper.""" + return markdown_to_signal(text) def format_message(self, content: str) -> str: """Strip markdown for plain-text fallback (used by base class). diff --git a/gateway/platforms/signal_format.py b/gateway/platforms/signal_format.py new file mode 100644 index 00000000000..e8539549bf1 --- /dev/null +++ b/gateway/platforms/signal_format.py @@ -0,0 +1,140 @@ +"""Shared Signal formatting helpers. + +Keep markdown → Signal native formatting conversion in one place so both the +live Signal adapter and standalone send paths emit the same bodyRanges. +""" + +from __future__ import annotations + +import re + + +def markdown_to_signal(text: str) -> tuple[str, list[str]]: + """Convert markdown to plain text + Signal textStyles list. + + Signal doesn't render markdown. Instead it uses ``bodyRanges`` (exposed by + signal-cli as ``textStyle`` / ``textStyles`` params) with the format + ``start:length:STYLE``. + + Positions are measured in UTF-16 code units because that's what the Signal + protocol uses. + + Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE. + """ + + def _utf16_len(s: str) -> int: + """Length of *s* in UTF-16 code units.""" + return len(s.encode("utf-16-le")) // 2 + + def _normalize_bullet_markers(source: str) -> str: + """Replace Markdown bullet markers with plain Unicode bullets. + + Signal does not render Markdown list syntax, so ``- item`` and + ``* item`` otherwise arrive as literal Markdown markers. Preserve + fenced code blocks byte-for-byte; list-looking lines inside code are + code, not prose bullets. + """ + parts = re.split(r"(```.*?```)", source, flags=re.DOTALL) + for idx, part in enumerate(parts): + if idx % 2 == 1: + continue + parts[idx] = re.sub(r"(?m)^([ \t]{0,3})[-*+]\s+", r"\1• ", part) + return "".join(parts) + + text = re.sub(r"\n{3,}", "\n\n", text) + text = text.strip() + text = _normalize_bullet_markers(text) + + styles: list[tuple[int, int, str]] = [] + + code_block = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL) + while match := code_block.search(text): + inner = match.group(1).rstrip("\n") + start = match.start() + text = text[: match.start()] + inner + text[match.end() :] + styles.append((start, len(inner), "MONOSPACE")) + + heading = re.compile(r"^#{1,6}\s+", re.MULTILINE) + new_text = "" + last_end = 0 + for match in heading.finditer(text): + new_text += text[last_end : match.start()] + last_end = match.end() + eol = text.find("\n", match.end()) + if eol == -1: + eol = len(text) + heading_text = text[match.end() : eol] + start = len(new_text) + new_text += heading_text + styles.append((start, len(heading_text), "BOLD")) + last_end = eol + new_text += text[last_end:] + text = new_text + + patterns = [ + (re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"), + (re.compile(r"__(.+?)__", re.DOTALL), "BOLD"), + (re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"), + (re.compile(r"`(.+?)`"), "MONOSPACE"), + (re.compile(r"(? os for os, oe in occupied): + all_matches.append((ms, me, match.start(1), match.end(1), style)) + occupied.append((ms, me)) + all_matches.sort() + + removals: list[tuple[int, int]] = [] + for ms, me, g1s, g1e, _ in all_matches: + if g1s > ms: + removals.append((ms, g1s - ms)) + if me > g1e: + removals.append((g1e, me - g1e)) + removals.sort() + + def _adjust(pos: int) -> int: + shift = 0 + for remove_pos, remove_len in removals: + if remove_pos < pos: + shift += min(remove_len, pos - remove_pos) + else: + break + return pos - shift + + adjusted_prior: list[tuple[int, int, str]] = [] + for start, length, style in styles: + new_start = _adjust(start) + new_end = _adjust(start + length) + if new_end > new_start: + adjusted_prior.append((new_start, new_end - new_start, style)) + + result = "" + last_end = 0 + inline_styles: list[tuple[int, int, str]] = [] + for ms, me, g1s, g1e, style in all_matches: + result += text[last_end:ms] + pos = len(result) + inner = text[g1s:g1e] + result += inner + inline_styles.append((pos, len(inner), style)) + last_end = me + result += text[last_end:] + text = result + + styles = adjusted_prior + inline_styles + + style_strings: list[str] = [] + for cp_start, cp_len, style_type in sorted(styles): + if cp_start < 0 or cp_start + cp_len > len(text): + continue + u16_start = _utf16_len(text[:cp_start]) + u16_len = _utf16_len(text[cp_start : cp_start + cp_len]) + style_strings.append(f"{u16_start}:{u16_len}:{style_type}") + + return text, style_strings diff --git a/tests/gateway/test_signal_format.py b/tests/gateway/test_signal_format.py index 0050a980f59..f281314c065 100644 --- a/tests/gateway/test_signal_format.py +++ b/tests/gateway/test_signal_format.py @@ -9,6 +9,7 @@ import pytest from gateway.config import PlatformConfig from gateway.platforms.signal import SignalAdapter +from gateway.platforms.signal_format import markdown_to_signal # --------------------------------------------------------------------------- @@ -20,6 +21,11 @@ def _m2s(text: str): return SignalAdapter._markdown_to_signal(text) +def test_shared_helper_matches_signal_adapter_wrapper(): + text = "🙂 **bold** and `code`" + assert markdown_to_signal(text) == SignalAdapter._markdown_to_signal(text) + + def _style_types(styles: list[str]) -> list[str]: """Extract just the STYLE part from '0:4:BOLD' strings.""" return [s.rsplit(":", 1)[1] for s in styles] @@ -138,8 +144,29 @@ class TestItalicFalsePositives: """* item lines must NOT be treated as italic delimiters.""" md = "* item one\n* item two\n* item three" text, styles = _m2s(md) + assert text == "• item one\n• item two\n• item three" assert _find_style(styles, "ITALIC") == [] + def test_hyphen_bullet_list_uses_signal_safe_bullets(self): + """Signal does not render Markdown list markers; normalize them.""" + md = "- item one\n- item two" + text, styles = _m2s(md) + assert text == "• item one\n• item two" + assert styles == [] + + def test_plus_bullet_list_uses_signal_safe_bullets(self): + md = "+ item one\n+ item two" + text, styles = _m2s(md) + assert text == "• item one\n• item two" + assert styles == [] + + def test_markdown_bullets_inside_fenced_code_are_preserved(self): + md = "before\n```\n- literal\n* literal\n```\nafter" + text, styles = _m2s(md) + assert "- literal\n* literal" in text + assert "• literal" not in text + assert any(s.endswith(":MONOSPACE") for s in styles) + def test_bullet_list_with_content_before(self): md = "Here are things:\n\n* first thing\n* second thing" text, styles = _m2s(md) diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py index 81cee1bb1de..9811f75d67e 100644 --- a/tests/tools/test_send_message_tool.py +++ b/tests/tools/test_send_message_tool.py @@ -1189,6 +1189,18 @@ class TestParseTargetRefE164: assert thread_id is None assert is_explicit is True + def test_signal_group_target_is_explicit(self): + chat_id, thread_id, is_explicit = _parse_target_ref("signal", " group:abc123 ") + assert chat_id == "group:abc123" + assert thread_id is None + assert is_explicit is True + + def test_empty_signal_group_target_is_not_explicit(self): + chat_id, thread_id, is_explicit = _parse_target_ref("signal", " group: ") + assert chat_id is None + assert thread_id is None + assert is_explicit is False + def test_sms_e164_is_explicit(self): chat_id, _, is_explicit = _parse_target_ref("sms", "+15551234567") assert chat_id == "+15551234567" @@ -2230,11 +2242,68 @@ class TestSendSignalChunking: ) ) - assert result == {"success": True, "platform": "signal", "chat_id": "+15557654321"} + assert result["success"] is True + assert result["platform"] == "signal" + assert result["chat_id"].endswith("4321") assert len(fake.calls) == 1 params = fake.calls[0]["payload"]["params"] assert params["message"] == "hello" assert "attachments" not in params + assert "textStyle" not in params + assert "textStyles" not in params + + def test_text_only_markdown_uses_singular_text_style(self, monkeypatch): + fake = _FakeSignalHttp([{"result": {"timestamp": 1}}]) + _install_signal_http(monkeypatch, fake) + + result = asyncio.run( + _send_signal( + {"http_url": "http://localhost:8080", "account": "+155****4567"}, + "+155****4321", + "**hello**", + ) + ) + + assert result["success"] is True + params = fake.calls[0]["payload"]["params"] + assert params["message"] == "hello" + assert params["textStyle"] == "0:5:BOLD" + assert "textStyles" not in params + + def test_text_only_multiple_styles_use_plural_text_styles(self, monkeypatch): + fake = _FakeSignalHttp([{"result": {"timestamp": 1}}]) + _install_signal_http(monkeypatch, fake) + + result = asyncio.run( + _send_signal( + {"http_url": "http://localhost:8080", "account": "+155****4567"}, + "+155****4321", + "**bold** and *italic*", + ) + ) + + assert result["success"] is True + params = fake.calls[0]["payload"]["params"] + assert params["message"] == "bold and italic" + assert "textStyle" not in params + assert params["textStyles"] == ["0:4:BOLD", "9:6:ITALIC"] + + def test_text_style_offsets_use_utf16_code_units(self, monkeypatch): + fake = _FakeSignalHttp([{"result": {"timestamp": 1}}]) + _install_signal_http(monkeypatch, fake) + + result = asyncio.run( + _send_signal( + {"http_url": "http://localhost:8080", "account": "+155****4567"}, + "+155****4321", + "🙂 **bold**", + ) + ) + + assert result["success"] is True + params = fake.calls[0]["payload"]["params"] + assert params["message"] == "🙂 bold" + assert params["textStyle"] == "3:4:BOLD" def test_chunks_attachments_above_max(self, tmp_path, monkeypatch): """33 attachments → 2 batches; text only on first batch. Batch 1 @@ -2274,10 +2343,53 @@ class TestSendSignalChunking: first = fake.calls[0]["payload"]["params"] assert first["message"] == "Caption goes here" assert len(first["attachments"]) == SIGNAL_MAX_ATTACHMENTS_PER_MSG + assert "textStyle" not in first + assert "textStyles" not in first second = fake.calls[1]["payload"]["params"] assert second["message"] == "" # caption only on batch 0 assert len(second["attachments"]) == 33 - SIGNAL_MAX_ATTACHMENTS_PER_MSG + assert "textStyle" not in second + assert "textStyles" not in second + + def test_caption_styles_only_apply_to_first_attachment_batch(self, tmp_path, monkeypatch): + from gateway.platforms.signal_rate_limit import SIGNAL_MAX_ATTACHMENTS_PER_MSG + + paths = [] + for i in range(33): + p = tmp_path / f"img_{i}.png" + p.write_bytes(b"\x89PNG" + b"\x00" * 16) + paths.append((str(p), False)) + + fake = _FakeSignalHttp([ + {"result": {"timestamp": 1}}, + {"result": {"timestamp": 2}}, + ]) + _install_signal_http(monkeypatch, fake) + + result = asyncio.run( + _send_signal( + {"http_url": "http://localhost:8080", "account": "+155****4567"}, + "group:abc123", + "**Bold** and *italic*", + media_files=paths, + ) + ) + + assert result["success"] is True + assert result["chat_id"] == "group:***" + first = fake.calls[0]["payload"]["params"] + assert first["groupId"] == "abc123" + assert first["message"] == "Bold and italic" + assert first["textStyles"] == ["0:4:BOLD", "9:6:ITALIC"] + assert len(first["attachments"]) == SIGNAL_MAX_ATTACHMENTS_PER_MSG + + second = fake.calls[1]["payload"]["params"] + assert second["groupId"] == "abc123" + assert second["message"] == "" + assert len(second["attachments"]) == 33 - SIGNAL_MAX_ATTACHMENTS_PER_MSG + assert "textStyle" not in second + assert "textStyles" not in second def test_full_followup_batch_emits_pacing_notice(self, tmp_path, monkeypatch): """64 attachments → 2 full batches. Batch 1 needs 14 more tokens diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index 72311f87c41..a87c39e4294 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -88,6 +88,13 @@ def _error(message: str) -> dict: return {"error": _sanitize_error_text(message)} +def _display_chat_id(platform_name: str, chat_id: str) -> str: + """Return a result-safe chat identifier for tool transcripts/log consumers.""" + if platform_name == "signal" and str(chat_id).startswith("group:"): + return "group:***" + return chat_id + + def _telegram_retry_delay(exc: Exception, attempt: int) -> float | None: retry_after = getattr(exc, "retry_after", None) if retry_after is not None: @@ -523,6 +530,12 @@ def _parse_target_ref(platform_name: str, target_ref: str): # through to the _PHONE_PLATFORMS handler below. if _WHATSAPP_JID_RE.fullmatch(target_ref): return target_ref.strip(), None, True + stripped_target = target_ref.strip() + if platform_name == "signal" and stripped_target.startswith("group:"): + group_id = stripped_target[len("group:"):].strip() + if group_id: + return f"group:{group_id}", None, True + return None, None, False if platform_name in _PHONE_PLATFORMS: match = _E164_TARGET_RE.fullmatch(target_ref) if match: @@ -1258,6 +1271,7 @@ async def _send_signal(extra, chat_id, message, media_files=None): _signal_send_timeout, get_scheduler, ) + from gateway.platforms.signal_format import markdown_to_signal try: http_url = extra.get("http_url", "http://127.0.0.1:8080").rstrip("/") @@ -1284,8 +1298,15 @@ async def _send_signal(extra, chat_id, message, media_files=None): else: att_batches = [[]] + plain_text, text_styles = markdown_to_signal(message) + async def _post(batch_attachments, batch_message): params = {"account": account, "message": batch_message} + if batch_message and text_styles: + if len(text_styles) == 1: + params["textStyle"] = text_styles[0] + else: + params["textStyles"] = text_styles if chat_id.startswith("group:"): params["groupId"] = chat_id[6:] else: @@ -1342,7 +1363,7 @@ async def _send_signal(extra, chat_id, message, media_files=None): f"for Signal rate limit, batch {idx + 1}/{len(att_batches)}.)" ) - batch_message = message if idx == 0 else "" + batch_message = plain_text if idx == 0 else "" for attempt in range(1, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS + 1): try: @@ -1407,7 +1428,7 @@ async def _send_signal(extra, chat_id, message, media_files=None): f"no attachments delivered" ) - result = {"success": True, "platform": "signal", "chat_id": chat_id} + result = {"success": True, "platform": "signal", "chat_id": _display_chat_id("signal", chat_id)} if warnings: result["warnings"] = warnings return result From da34fca2bb800417a12bbfced82d97246b065233 Mon Sep 17 00:00:00 2001 From: jasnoorgill <5494586+jasnoorgill@users.noreply.github.com> Date: Wed, 17 Jun 2026 15:06:24 +0530 Subject: [PATCH 2/4] fix(signal): detect ADTS AAC voice notes and remux to MP4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Android Signal delivers voice notes as raw ADTS AAC frames, which share the `0xFF 0xFx` sync word with MPEG-1/2 Layer 3 (MP3). The `_guess_extension` byte-signature test in gateway/platforms/signal.py was matching both, so ADTS AAC was being misclassified as MP3 — saved to disk with the wrong extension and rejected by every major STT API (Groq, OpenAI) because their server-side format sniffers inspect the actual codec, not the file extension. Two changes: 1. Tighten the MP3 vs ADTS disambiguator. ADTS packs `ID`, `layer`, and `protection_absent` into bits 3-0 of byte 1, where `ID=0` and `layer=00` for AAC. Real MP3 has `ID=1` and `layer` in {01, 10, 11}. The mask `0xF6` against target `0xF0` cleanly separates them. 2. Remux raw ADTS AAC to MP4 container at the cache step via `ffmpeg -c:a copy`. Single demux/remux, no re-encode, no quality loss, sub-100ms on a Pi 5. The cached file is a normal `.m4a` that all major STT providers accept. ffmpeg is a transitive dependency of many other Hermes features (TTS, video skills) so this isn't a new install requirement; the remux degrades gracefully to a no-op if ffmpeg is missing. The new helper `_remux_aac_to_m4a` is unit-tested with a real Android voice note from the audio cache that originally triggered the bug, plus synthetic ADTS frames for the byte-level disambiguator and garbage-input graceful failure. Closes the gap that broke transcription for any Android Signal user sending voice messages to Hermes. --- gateway/platforms/signal.py | 84 +++++++++++++++++++++++++++++++++++- tests/gateway/test_signal.py | 72 +++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py index 3272a921911..df9d07b4f71 100644 --- a/gateway/platforms/signal.py +++ b/gateway/platforms/signal.py @@ -17,6 +17,9 @@ import json import logging import os import random +import shutil +import subprocess +import tempfile import time import uuid from datetime import datetime, timezone @@ -77,7 +80,14 @@ def _parse_comma_list(value: str) -> List[str]: def _guess_extension(data: bytes) -> str: - """Guess file extension from magic bytes.""" + """Guess file extension from magic bytes. + + Android Signal delivers voice notes as raw ADTS AAC frames, which share + the ``0xFF 0xFx`` sync word with MPEG-1/2 Layer 3 (MP3). The byte-1 + layout disambiguates: ADTS packs ``ID layer protection_absent`` into + bits 3-0, where ``ID`` is 0 for MPEG-2/4 AAC and ``layer`` is always + 0 for ADTS. A real MP3 frame has ``ID=1`` and ``layer`` in {1, 2, 3}. + """ if data[:4] == b"\x89PNG": return ".png" if data[:2] == b"\xff\xd8": @@ -93,6 +103,12 @@ def _guess_extension(data: bytes) -> str: if data[:4] == b"OggS": return ".ogg" if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0: + # ``0xFF 0xFx`` is shared by MP3 and ADTS AAC. The discriminator + # is bits 3-1 of byte 1: ADTS has ``ID=0`` and ``layer=00`` (mask + # 0xF6, target 0xF0); MP3 has ``ID=1`` and ``layer`` in {01,10,11} + # (mask 0xF6, target in {0xF2, 0xF4, 0xF6}). + if (data[1] & 0xF6) == 0xF0: + return ".aac" return ".mp3" if data[:2] == b"PK": return ".zip" @@ -121,6 +137,61 @@ def _ext_to_mime(ext: str) -> str: return _EXT_TO_MIME.get(ext.lower(), "application/octet-stream") +def _remux_aac_to_m4a(aac_data: bytes) -> Optional[Tuple[bytes, str]]: + """Losslessly remux raw ADTS AAC bytes into an MP4 (.m4a) container. + + Used by the Signal attachment cache so Android voice notes land on disk + in a container that every major STT API (Groq, OpenAI, xAI, Mistral + Voxtral) will accept. ``ffmpeg -c:a copy`` is a single demux/remux — + no re-encode, no quality loss, sub-100ms for typical voice-note sizes. + + Returns ``(m4a_bytes, ".m4a")`` on success, or ``None`` if ffmpeg is + missing, input is invalid, or remux fails for any reason. Callers + must treat ``None`` as "pass through unchanged" and not raise. + """ + ffmpeg = shutil.which("ffmpeg") + if not ffmpeg: + # Common Homebrew/local prefixes on macOS dev hosts. + for prefix in ("/opt/homebrew/bin/ffmpeg", "/usr/local/bin/ffmpeg"): + if os.path.isfile(prefix) and os.access(prefix, os.X_OK): + ffmpeg = prefix + break + if not ffmpeg: + logger.debug("Signal: ffmpeg not found, skipping AAC→M4A remux") + return None + try: + with tempfile.NamedTemporaryFile(suffix=".aac", delete=False) as src: + src.write(aac_data) + src_path = src.name + dst_path = src_path[:-4] + ".m4a" + try: + proc = subprocess.run( + [ffmpeg, "-y", "-loglevel", "error", "-i", src_path, + "-c:a", "copy", "-movflags", "+faststart", dst_path], + capture_output=True, timeout=10, + ) + if proc.returncode != 0: + logger.warning( + "Signal: AAC→M4A remux failed (ffmpeg exit %d): %s", + proc.returncode, proc.stderr.decode("utf-8", "replace")[:300], + ) + return None + with open(dst_path, "rb") as f: + return f.read(), ".m4a" + finally: + for p in (src_path, dst_path): + try: + os.unlink(p) + except OSError: + pass + except subprocess.TimeoutExpired: + logger.warning("Signal: AAC→M4A remux timed out (>10s)") + return None + except Exception: + logger.exception("Signal: AAC→M4A remux error") + return None + + def _render_mentions(text: str, mentions: list) -> str: """Replace Signal mention placeholders (\\uFFFC) with readable @identifiers. @@ -725,6 +796,17 @@ class SignalAdapter(BasePlatformAdapter): raw_data = base64.b64decode(result) ext = _guess_extension(raw_data) + # Android Signal voice notes are raw ADTS AAC streams. Most STT + # providers (Groq Whisper, OpenAI Whisper) reject raw ADTS — they + # require AAC to be muxed into an MP4 container. Remux losslessly + # with ``ffmpeg -c:a copy`` so the cached file is a normal .m4a. + # No re-encode, sub-100ms on a Pi 5. Graceful no-op if ffmpeg is + # absent; the STT layer has its own sniff-and-remux fallback. + if ext == ".aac": + remuxed: Optional[Tuple[bytes, str]] = await asyncio.to_thread(_remux_aac_to_m4a, raw_data) + if remuxed is not None: + raw_data, ext = remuxed + if _is_image_ext(ext): path = cache_image_from_bytes(raw_data, ext) elif _is_audio_ext(ext): diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py index b95a16d5409..b55c4215ecb 100644 --- a/tests/gateway/test_signal.py +++ b/tests/gateway/test_signal.py @@ -163,6 +163,78 @@ class TestSignalHelpers: from gateway.platforms.signal import _guess_extension assert _guess_extension(b"\x00\x00\x00\x18ftypisom" + b"\x00" * 100) == ".mp4" + def test_guess_extension_aac_adts_unprotected(self): + """ADTS AAC, MPEG-4, no CRC (the canonical Android Signal voice note). + + Byte 0 = 0xFF (sync high), byte 1 = 0xF1 (sync low + ID=0 + layer=00 + + protection_absent=1). Must NOT be misclassified as MP3 — the old + code's ``(b[1] & 0xE0) == 0xE0`` test wrongly returned ``.mp3``. + """ + from gateway.platforms.signal import _guess_extension + assert _guess_extension(b"\xff\xf1" + b"\x00" * 200) == ".aac" + + def test_guess_extension_aac_adts_protected(self): + """ADTS AAC, MPEG-4, CRC present (protection_absent=0).""" + from gateway.platforms.signal import _guess_extension + assert _guess_extension(b"\xff\xf0" + b"\x00" * 200) == ".aac" + + def test_guess_extension_mp3_mpeg1_layer3(self): + """Real MP3 frame, MPEG-1 Layer 3: byte1 = 0xFB (ID=1, layer=01, prot=1).""" + from gateway.platforms.signal import _guess_extension + assert _guess_extension(b"\xff\xfb" + b"\x00" * 200) == ".mp3" + + def test_guess_extension_mp3_mpeg2_layer3(self): + """Real MP3 frame, MPEG-2 Layer 3: byte1 = 0xF3 (ID=1, layer=01, prot=1).""" + from gateway.platforms.signal import _guess_extension + assert _guess_extension(b"\xff\xf3" + b"\x00" * 200) == ".mp3" + + def test_guess_extension_aac_routes_to_audio_cache(self): + """ADTS-detected files must be routed to the audio cache, not document. + + ``_is_audio_ext(``.aac``)`` is True, so a Signal attachment that + begins with the ADTS sync word ends up in ``cache_audio_from_bytes``, + which the remux step then converts to MP4 container. + """ + from gateway.platforms.signal import _is_audio_ext, _guess_extension + ext = _guess_extension(b"\xff\xf1" + b"\x00" * 200) + assert ext == ".aac" + assert _is_audio_ext(ext) is True + + def test_remux_aac_to_m4a_round_trip(self): + """Real ADTS file from the audio cache remuxes to a valid MP4 container. + + Round-trips the actual Android voice note that triggered the + bug report — proves the end-to-end fix. + """ + import os + import shutil + from gateway.platforms.signal import _remux_aac_to_m4a + src = "/home/pi/.hermes/audio_cache/audio_fcfc38390b47.mp3" + if not os.path.exists(src) or not shutil.which("ffmpeg"): + import pytest + pytest.skip("ffmpeg or source file not available in this env") + with open(src, "rb") as f: + aac_data = f.read() + result = _remux_aac_to_m4a(aac_data) + assert result is not None + m4a_bytes, ext = result + assert ext == ".m4a" + # MP4 files start with a 4-byte size, then ``ftyp`` at offset 4. + assert m4a_bytes[4:8] == b"ftyp", \ + f"expected MP4 ftyp box, got {m4a_bytes[:12]!r}" + # File must be at least as long as the input (MP4 has overhead). + assert len(m4a_bytes) >= len(aac_data) * 0.5 + + def test_remux_aac_to_m4a_handles_garbage(self): + """Garbage input should return None, not raise.""" + from gateway.platforms.signal import _remux_aac_to_m4a + result = _remux_aac_to_m4a(b"\xff\xf1garbage_no_aac_frames") + # Either returns None (ffmpeg errored) or a real M4A. If it returned + # bytes, the bytes must look like an MP4. Otherwise it returns None. + if result is not None: + m4a_bytes, ext = result + assert ext == ".m4a" + def test_guess_extension_unknown(self): from gateway.platforms.signal import _guess_extension assert _guess_extension(b"\x00\x01\x02\x03" * 10) == ".bin" From abafba0762fafe0136552da012711173ce87a5d1 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sat, 20 Jun 2026 14:24:29 +0530 Subject: [PATCH 3/4] refactor(signal): correct STT-fallback comment, type the markdown wrapper, make AAC test portable Review follow-up on the salvaged AAC + markdown changes: - Fix an inaccurate comment claiming the STT layer has a sniff-and-remux fallback (verified: no such fallback exists; the ffmpeg-absent path caches raw ADTS and STT may reject it). - Type the _markdown_to_signal wrapper as tuple[str, list[str]] to match the shared helper instead of a bare tuple. - Replace the hardcoded /home/pi/... test fixture with a runtime-generated ADTS AAC sample so the remux round-trip actually runs in CI (skips only when ffmpeg is absent) instead of always-skipping. --- gateway/platforms/signal.py | 5 +++-- tests/gateway/test_signal.py | 43 ++++++++++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py index df9d07b4f71..7b81b2a957a 100644 --- a/gateway/platforms/signal.py +++ b/gateway/platforms/signal.py @@ -801,7 +801,8 @@ class SignalAdapter(BasePlatformAdapter): # require AAC to be muxed into an MP4 container. Remux losslessly # with ``ffmpeg -c:a copy`` so the cached file is a normal .m4a. # No re-encode, sub-100ms on a Pi 5. Graceful no-op if ffmpeg is - # absent; the STT layer has its own sniff-and-remux fallback. + # absent: the raw ADTS file is cached as-is and STT may reject it + # (there is no downstream sniff-and-remux fallback). if ext == ".aac": remuxed: Optional[Tuple[bytes, str]] = await asyncio.to_thread(_remux_aac_to_m4a, raw_data) if remuxed is not None: @@ -904,7 +905,7 @@ class SignalAdapter(BasePlatformAdapter): # ------------------------------------------------------------------ @staticmethod - def _markdown_to_signal(text: str) -> tuple: + def _markdown_to_signal(text: str) -> tuple[str, list[str]]: """Backward-compatible wrapper around shared Signal formatting helper.""" return markdown_to_signal(text) diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py index b55c4215ecb..e79ee7a8591 100644 --- a/tests/gateway/test_signal.py +++ b/tests/gateway/test_signal.py @@ -201,20 +201,45 @@ class TestSignalHelpers: assert _is_audio_ext(ext) is True def test_remux_aac_to_m4a_round_trip(self): - """Real ADTS file from the audio cache remuxes to a valid MP4 container. + """A real ADTS AAC stream remuxes to a valid MP4 (.m4a) container. - Round-trips the actual Android voice note that triggered the - bug report — proves the end-to-end fix. + Generates a short ADTS AAC sample with ffmpeg at runtime so the + end-to-end remux path actually exercises in CI (skipped only when + ffmpeg is unavailable), rather than depending on a machine-specific + file. """ - import os import shutil + import subprocess + import tempfile from gateway.platforms.signal import _remux_aac_to_m4a - src = "/home/pi/.hermes/audio_cache/audio_fcfc38390b47.mp3" - if not os.path.exists(src) or not shutil.which("ffmpeg"): + + ffmpeg = shutil.which("ffmpeg") + if not ffmpeg: import pytest - pytest.skip("ffmpeg or source file not available in this env") - with open(src, "rb") as f: - aac_data = f.read() + pytest.skip("ffmpeg not available in this env") + + # Synthesize 0.5s of silence encoded as raw ADTS AAC. + with tempfile.NamedTemporaryFile(suffix=".aac", delete=False) as tmp: + adts_path = tmp.name + try: + gen = subprocess.run( + [ffmpeg, "-y", "-loglevel", "error", "-f", "lavfi", + "-i", "anullsrc=r=44100:cl=mono", "-t", "0.5", + "-c:a", "aac", "-f", "adts", adts_path], + capture_output=True, timeout=30, + ) + if gen.returncode != 0: + import pytest + pytest.skip("ffmpeg could not produce an ADTS AAC sample") + with open(adts_path, "rb") as f: + aac_data = f.read() + finally: + try: + import os + os.unlink(adts_path) + except OSError: + pass + result = _remux_aac_to_m4a(aac_data) assert result is not None m4a_bytes, ext = result From 467c879b2e594c7112cbfa5ce67771dcdcd02cb3 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Sat, 20 Jun 2026 15:03:29 +0530 Subject: [PATCH 4/4] chore(release): map lkz-de contributor email to GitHub login The contributor-check CI auto-resolves only the +id form of GitHub noreply emails; lkz-de's commits use the legacy plain form (lkz-de@users.noreply.github.com), so add an explicit AUTHOR_MAP entry. --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 7162b01f4eb..767ee2c2416 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -45,6 +45,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" # Auto-extracted from noreply emails + manual overrides AUTHOR_MAP = { + "lkz-de@users.noreply.github.com": "lkz-de", "charles@salesondemand.io": "salesondemandio", "victor@rocketfueldev.com": "victor-kyriazakos", "87440198+JoaoMarcos44@users.noreply.github.com": "JoaoMarcos44",