mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 01:51:44 +00:00
Three Signal adapter improvements that depend on the no-edit-mode
plumbing from the previous commit.
1. Native formatting (markdown -> Signal bodyRanges)
Signal renders markdown as literal characters (**bold**, `code`, #
heading), which looks broken. Added _markdown_to_signal(text) that
strips markdown syntax and emits Signal-native bodyRanges as
start:length:STYLE entries. Offsets are computed in UTF-16 code
units so non-BMP emoji stay aligned. Supports BOLD, ITALIC, STRIKE,
MONO, and headings mapped to BOLD. Fenced code and inline code are
handled; link syntax is unwrapped to visible text + URL.
Includes edge-case fixes reported previously:
- Bullet lists ("* item") no longer misidentified as italics
- URLs containing underscores no longer italicized around the dot
2. Reply-quote context
Parses dataMessage.quote on inbound messages and populates
MessageEvent.raw_message with sender + timestamp_ms. This lets the
gateway's existing [Replying to: "..."] injector (gateway/run.py)
work on Signal, matching Telegram/Matrix behavior.
3. Processing reactions
Overrides on_processing_start -> hourglass and on_processing_complete
-> checkmark via the sendReaction JSON-RPC using targetAuthor and
targetTimestamp pulled from raw_message. Uses the ProcessingOutcome
enum introduced in the previous commit.
Also sets SUPPORTS_MESSAGE_EDITING = False on SignalAdapter so the
no-edit streaming path activates.
Tests: 40+ new tests in tests/gateway/test_signal_format.py covering
markdown conversion, UTF-16 offset correctness with non-BMP emoji,
bullet-list and URL false-positive regressions, reply-quote extraction,
and reaction payload shape. Regression extensions to test_signal.py.
452 lines
17 KiB
Python
452 lines
17 KiB
Python
"""Tests for Signal _markdown_to_signal() formatting.
|
|
|
|
Covers the markdown-to-bodyRanges conversion pipeline: bold, italic,
|
|
strikethrough, monospace, code blocks, headings, and — critically — the
|
|
false-positive regressions that caused spurious italics in production.
|
|
"""
|
|
|
|
import pytest
|
|
|
|
from gateway.config import PlatformConfig
|
|
from gateway.platforms.signal import SignalAdapter
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helper
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _m2s(text: str):
|
|
"""Shorthand: call the static method and return (plain_text, styles)."""
|
|
return SignalAdapter._markdown_to_signal(text)
|
|
|
|
|
|
def _style_types(styles: list[str]) -> list[str]:
|
|
"""Extract just the STYLE part from '0:4:BOLD' strings."""
|
|
return [s.rsplit(":", 1)[1] for s in styles]
|
|
|
|
|
|
def _find_style(styles: list[str], style_type: str) -> list[str]:
|
|
"""Return only styles matching a given type."""
|
|
return [s for s in styles if s.endswith(f":{style_type}")]
|
|
|
|
|
|
# ===========================================================================
|
|
# Basic formatting
|
|
# ===========================================================================
|
|
|
|
class TestMarkdownToSignalBasic:
|
|
"""Core formatting: bold, italic, strikethrough, monospace."""
|
|
|
|
def test_bold_double_asterisk(self):
|
|
text, styles = _m2s("hello **world**")
|
|
assert text == "hello world"
|
|
assert len(styles) == 1
|
|
assert styles[0].endswith(":BOLD")
|
|
|
|
def test_bold_double_underscore(self):
|
|
text, styles = _m2s("hello __world__")
|
|
assert text == "hello world"
|
|
assert len(styles) == 1
|
|
assert styles[0].endswith(":BOLD")
|
|
|
|
def test_italic_single_asterisk(self):
|
|
text, styles = _m2s("hello *world*")
|
|
assert text == "hello world"
|
|
assert len(styles) == 1
|
|
assert styles[0].endswith(":ITALIC")
|
|
|
|
def test_italic_single_underscore(self):
|
|
text, styles = _m2s("hello _world_")
|
|
assert text == "hello world"
|
|
assert len(styles) == 1
|
|
assert styles[0].endswith(":ITALIC")
|
|
|
|
def test_strikethrough(self):
|
|
text, styles = _m2s("hello ~~world~~")
|
|
assert text == "hello world"
|
|
assert len(styles) == 1
|
|
assert styles[0].endswith(":STRIKETHROUGH")
|
|
|
|
def test_inline_monospace(self):
|
|
text, styles = _m2s("run `ls -la` now")
|
|
assert text == "run ls -la now"
|
|
assert len(styles) == 1
|
|
assert styles[0].endswith(":MONOSPACE")
|
|
|
|
def test_fenced_code_block(self):
|
|
text, styles = _m2s("before\n```\ncode here\n```\nafter")
|
|
assert "code here" in text
|
|
assert "```" not in text
|
|
assert any(s.endswith(":MONOSPACE") for s in styles)
|
|
|
|
def test_heading_becomes_bold(self):
|
|
text, styles = _m2s("## Section Title")
|
|
assert text == "Section Title"
|
|
assert len(styles) == 1
|
|
assert styles[0].endswith(":BOLD")
|
|
|
|
def test_multiple_styles(self):
|
|
text, styles = _m2s("**bold** and *italic*")
|
|
assert text == "bold and italic"
|
|
types = _style_types(styles)
|
|
assert "BOLD" in types
|
|
assert "ITALIC" in types
|
|
|
|
def test_plain_text_no_styles(self):
|
|
text, styles = _m2s("just plain text")
|
|
assert text == "just plain text"
|
|
assert styles == []
|
|
|
|
def test_empty_string(self):
|
|
text, styles = _m2s("")
|
|
assert text == ""
|
|
assert styles == []
|
|
|
|
|
|
# ===========================================================================
|
|
# Italic false-positive regressions
|
|
# ===========================================================================
|
|
|
|
class TestItalicFalsePositives:
|
|
"""Regressions from signal-italic-false-positive-fix.md and
|
|
signal-italic-bullet-list-fix.md."""
|
|
|
|
# --- snake_case (original fix) ---
|
|
|
|
def test_snake_case_not_italic(self):
|
|
"""snake_case identifiers must NOT be italicized."""
|
|
text, styles = _m2s("the config_file is ready")
|
|
assert text == "the config_file is ready"
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
def test_multiple_snake_case(self):
|
|
text, styles = _m2s("set OPENAI_API_KEY and ANTHROPIC_API_KEY")
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
def test_snake_case_path(self):
|
|
text, styles = _m2s("/tools/delegate_tool.py")
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
def test_snake_case_between_words(self):
|
|
"""file_path and error_code — underscores between words."""
|
|
text, styles = _m2s("file_path and error_code")
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
# --- Bullet lists (second fix) ---
|
|
|
|
def test_bullet_list_not_italic(self):
|
|
"""* item lines must NOT be treated as italic delimiters."""
|
|
md = "* item one\n* item two\n* item three"
|
|
text, styles = _m2s(md)
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
def test_bullet_list_with_content_before(self):
|
|
md = "Here are things:\n\n* first thing\n* second thing"
|
|
text, styles = _m2s(md)
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
def test_bullet_list_file_paths(self):
|
|
"""Real-world case that triggered the bug."""
|
|
md = (
|
|
"* tools/delegate_tool.py — delegation\n"
|
|
"* tools/file_tools.py — file operations\n"
|
|
"* tools/web_tools.py — web operations"
|
|
)
|
|
text, styles = _m2s(md)
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
def test_bullet_with_italic_inside(self):
|
|
"""Italic *inside* a bullet item should still work."""
|
|
md = "* this has *emphasis* inside\n* plain item"
|
|
text, styles = _m2s(md)
|
|
italic_styles = _find_style(styles, "ITALIC")
|
|
assert len(italic_styles) == 1
|
|
# The italic should cover "emphasis", not the whole bullet
|
|
assert "emphasis" in text
|
|
|
|
# --- Cross-line spans (DOTALL removal) ---
|
|
|
|
def test_star_italic_no_cross_line(self):
|
|
"""*foo\\nbar* must NOT match as italic (no DOTALL)."""
|
|
text, styles = _m2s("*foo\nbar*")
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
def test_underscore_italic_no_cross_line(self):
|
|
"""_foo\\nbar_ must NOT match as italic (no DOTALL)."""
|
|
text, styles = _m2s("_foo\nbar_")
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
def test_star_italic_multiline_response(self):
|
|
"""Multi-paragraph response with * should not false-positive."""
|
|
md = (
|
|
"I checked the following files:\n\n"
|
|
"* tools/delegate_tool.py — sub-agent delegation\n"
|
|
"* tools/file_tools.py — file read/write/search\n"
|
|
"* tools/web_tools.py — web search/extract\n\n"
|
|
"Everything looks good."
|
|
)
|
|
text, styles = _m2s(md)
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
# --- Legitimate italic still works ---
|
|
|
|
def test_star_italic_still_works(self):
|
|
text, styles = _m2s("this is *italic* text")
|
|
assert text == "this is italic text"
|
|
assert len(_find_style(styles, "ITALIC")) == 1
|
|
|
|
def test_underscore_italic_still_works(self):
|
|
text, styles = _m2s("this is _italic_ text")
|
|
assert text == "this is italic text"
|
|
assert len(_find_style(styles, "ITALIC")) == 1
|
|
|
|
def test_multiple_italic_same_line(self):
|
|
text, styles = _m2s("*foo* and *bar* ok")
|
|
assert text == "foo and bar ok"
|
|
assert len(_find_style(styles, "ITALIC")) == 2
|
|
|
|
def test_italic_single_word(self):
|
|
text, styles = _m2s("*word*")
|
|
assert text == "word"
|
|
assert len(_find_style(styles, "ITALIC")) == 1
|
|
|
|
def test_italic_multi_word(self):
|
|
text, styles = _m2s("*several words here*")
|
|
assert text == "several words here"
|
|
assert len(_find_style(styles, "ITALIC")) == 1
|
|
|
|
|
|
# ===========================================================================
|
|
# Style position accuracy
|
|
# ===========================================================================
|
|
|
|
class TestStylePositions:
|
|
"""Verify that start:length positions map to the correct text."""
|
|
|
|
def _extract(self, text: str, style_str: str) -> str:
|
|
"""Given 'start:length:STYLE', extract the substring from text."""
|
|
# Positions are UTF-16 code units; for ASCII they match code points
|
|
parts = style_str.split(":")
|
|
start, length = int(parts[0]), int(parts[1])
|
|
# Encode to UTF-16-LE, slice, decode back
|
|
encoded = text.encode("utf-16-le")
|
|
extracted = encoded[start * 2 : (start + length) * 2]
|
|
return extracted.decode("utf-16-le")
|
|
|
|
def test_bold_position(self):
|
|
text, styles = _m2s("hello **world** end")
|
|
assert len(styles) == 1
|
|
assert self._extract(text, styles[0]) == "world"
|
|
|
|
def test_italic_position(self):
|
|
text, styles = _m2s("hello *world* end")
|
|
assert len(styles) == 1
|
|
assert self._extract(text, styles[0]) == "world"
|
|
|
|
def test_multiple_styles_positions(self):
|
|
text, styles = _m2s("**bold** then *italic*")
|
|
assert len(styles) == 2
|
|
extracted = {self._extract(text, s) for s in styles}
|
|
assert extracted == {"bold", "italic"}
|
|
|
|
def test_emoji_utf16_offset(self):
|
|
"""Emoji (multi-byte UTF-16) before a styled span."""
|
|
text, styles = _m2s("👋 **hello**")
|
|
assert text == "👋 hello"
|
|
assert len(styles) == 1
|
|
assert self._extract(text, styles[0]) == "hello"
|
|
|
|
|
|
# ===========================================================================
|
|
# Edge cases
|
|
# ===========================================================================
|
|
|
|
class TestEdgeCases:
|
|
"""Tricky inputs that have caused issues or could regress."""
|
|
|
|
def test_bold_inside_bullet(self):
|
|
"""Bold inside a bullet list item."""
|
|
md = "* **important** item\n* normal item"
|
|
text, styles = _m2s(md)
|
|
assert len(_find_style(styles, "BOLD")) == 1
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
def test_code_span_with_underscores(self):
|
|
"""`snake_case_var` — backtick takes priority over underscore."""
|
|
text, styles = _m2s("use `my_var_name` here")
|
|
assert text == "use my_var_name here"
|
|
types = _style_types(styles)
|
|
assert "MONOSPACE" in types
|
|
assert "ITALIC" not in types
|
|
|
|
def test_bold_and_italic_nested(self):
|
|
"""***bold+italic*** — bold captured, not italic (bold pattern first)."""
|
|
text, styles = _m2s("***word***")
|
|
# ** matches bold around *word*, or *** is ambiguous;
|
|
# either way there should be no false italic of the whole string
|
|
assert "word" in text
|
|
|
|
def test_lone_asterisk(self):
|
|
"""A single * with no pair should not cause issues."""
|
|
text, styles = _m2s("5 * 3 = 15")
|
|
# Should not crash; any italic match would be a false positive
|
|
assert "5" in text and "15" in text
|
|
|
|
def test_lone_underscore(self):
|
|
"""A single _ with no pair."""
|
|
text, styles = _m2s("this _ that")
|
|
assert text == "this _ that"
|
|
|
|
def test_consecutive_underscored_words(self):
|
|
"""_foo and _bar (leading underscores, no closers)."""
|
|
text, styles = _m2s("call _init and _setup")
|
|
assert _find_style(styles, "ITALIC") == []
|
|
|
|
def test_mixed_formatting_no_bleed(self):
|
|
"""Multiple format types don't bleed into each other."""
|
|
md = "**bold** and `code` and *italic* and ~~strike~~"
|
|
text, styles = _m2s(md)
|
|
assert text == "bold and code and italic and strike"
|
|
types = _style_types(styles)
|
|
assert sorted(types) == ["BOLD", "ITALIC", "MONOSPACE", "STRIKETHROUGH"]
|
|
|
|
|
|
# ===========================================================================
|
|
# signal-markdown-strip-patch: core conversion pipeline
|
|
# ===========================================================================
|
|
|
|
class TestMarkdownStripPatch:
|
|
"""Tests for the original signal-markdown-strip-patch.
|
|
|
|
Covers: fenced code blocks with language tags, links preserved,
|
|
headings converted to bold, multiple headings, UTF-16 correctness
|
|
for multi-byte characters, and marker stripping completeness.
|
|
"""
|
|
|
|
def test_fenced_code_block_with_language_tag(self):
|
|
"""```python\\ncode\\n``` — language tag is stripped, content is MONOSPACE."""
|
|
text, styles = _m2s("```python\nprint('hello')\n```")
|
|
assert "```" not in text
|
|
assert "python" not in text # language tag stripped
|
|
assert "print('hello')" in text
|
|
assert any(s.endswith(":MONOSPACE") for s in styles)
|
|
|
|
def test_fenced_code_block_multiline(self):
|
|
"""Multi-line code blocks preserve all lines."""
|
|
md = "```\nline1\nline2\nline3\n```"
|
|
text, styles = _m2s(md)
|
|
assert "line1" in text
|
|
assert "line2" in text
|
|
assert "line3" in text
|
|
assert "```" not in text
|
|
|
|
def test_links_preserved(self):
|
|
"""[text](url) links are kept as-is — Signal auto-linkifies."""
|
|
md = "Check [this link](https://example.com) for details"
|
|
text, styles = _m2s(md)
|
|
# Links should pass through — either as markdown or just preserved
|
|
assert "https://example.com" in text
|
|
|
|
def test_heading_h1(self):
|
|
"""# H1 becomes bold text."""
|
|
text, styles = _m2s("# Main Title")
|
|
assert text == "Main Title"
|
|
assert len(styles) == 1
|
|
assert styles[0].endswith(":BOLD")
|
|
|
|
def test_heading_h3(self):
|
|
"""### H3 becomes bold text."""
|
|
text, styles = _m2s("### Sub Section")
|
|
assert text == "Sub Section"
|
|
assert len(styles) == 1
|
|
assert styles[0].endswith(":BOLD")
|
|
|
|
def test_multiple_headings(self):
|
|
"""Multiple headings each become separate bold spans."""
|
|
md = "## First\n\nSome text\n\n## Second"
|
|
text, styles = _m2s(md)
|
|
assert "First" in text
|
|
assert "Second" in text
|
|
assert "##" not in text
|
|
bold_styles = _find_style(styles, "BOLD")
|
|
assert len(bold_styles) == 2
|
|
|
|
def test_no_raw_markdown_markers_in_output(self):
|
|
"""All markdown syntax is stripped from plain text output."""
|
|
md = "**bold** and *italic* and ~~struck~~ and `code` and ## heading"
|
|
text, styles = _m2s(md)
|
|
assert "**" not in text
|
|
assert "~~" not in text
|
|
assert "`" not in text
|
|
# ## at end might remain if not at line start — that's ok
|
|
# The important thing is styled markers are stripped
|
|
|
|
def test_utf16_surrogate_pair_emoji(self):
|
|
"""Emoji requiring UTF-16 surrogate pairs don't corrupt offsets."""
|
|
# 🎉 is U+1F389 — requires surrogate pair (2 UTF-16 code units)
|
|
text, styles = _m2s("🎉🎉 **test**")
|
|
assert "test" in text
|
|
assert len(styles) == 1
|
|
# Verify the style position is correct
|
|
parts = styles[0].split(":")
|
|
start, length = int(parts[0]), int(parts[1])
|
|
# 🎉🎉 = 4 UTF-16 code units + space = 5, then "test" = 4
|
|
assert start == 5
|
|
assert length == 4
|
|
|
|
def test_consecutive_newlines_collapsed(self):
|
|
"""3+ consecutive newlines are collapsed to 2."""
|
|
text, styles = _m2s("first\n\n\n\n\nsecond")
|
|
assert "\n\n\n" not in text
|
|
assert "first" in text
|
|
assert "second" in text
|
|
|
|
def test_empty_bold_not_crash(self):
|
|
"""**** (empty bold) should not crash."""
|
|
text, styles = _m2s("before **** after")
|
|
# Should not raise — exact output doesn't matter much
|
|
assert "before" in text
|
|
|
|
|
|
# ===========================================================================
|
|
# signal-streaming-patch: SUPPORTS_MESSAGE_EDITING and send() behavior
|
|
# ===========================================================================
|
|
|
|
class TestSignalStreamingPatch:
|
|
"""Tests for signal-streaming-patch: cursor suppression and edit support.
|
|
|
|
These verify the adapter-level properties that prevent the streaming
|
|
cursor from leaking into Signal messages.
|
|
"""
|
|
|
|
def test_signal_does_not_support_editing(self, monkeypatch):
|
|
"""SignalAdapter.SUPPORTS_MESSAGE_EDITING must be False."""
|
|
monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "")
|
|
from gateway.platforms.signal import SignalAdapter
|
|
assert SignalAdapter.SUPPORTS_MESSAGE_EDITING is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_send_returns_no_message_id(self, monkeypatch):
|
|
"""send() returns message_id=None so stream consumer uses no-edit path."""
|
|
monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "")
|
|
from gateway.platforms.signal import SignalAdapter
|
|
from gateway.config import PlatformConfig
|
|
|
|
config = PlatformConfig(enabled=True)
|
|
config.extra = {
|
|
"http_url": "http://localhost:8080",
|
|
"account": "+15551234567",
|
|
}
|
|
adapter = SignalAdapter(config)
|
|
|
|
# Mock the RPC call
|
|
async def mock_rpc(method, params, rpc_id=None):
|
|
return {"timestamp": 1234567890}
|
|
|
|
adapter._rpc = mock_rpc
|
|
|
|
result = await adapter.send(
|
|
chat_id="+15559876543",
|
|
content="Hello",
|
|
)
|
|
assert result.message_id is None
|