mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(gateway): fix underscore-stripping regex to preserve snake_case identifiers
The blanket _(.+?)_ and __(.+?)__ patterns incorrectly consumed snake_case identifiers like send_as_bot and user_id. Add lookbehind/lookahead boundaries so underscores adjacent to alphanumeric characters are not treated as markdown formatting. Same fix already applied and tested in the CLI renderer; this addresses the gateway/platforms/helpers.py copy. Supersedes #15076.
This commit is contained in:
parent
00c3d848d8
commit
4eab312c35
2 changed files with 62 additions and 2 deletions
|
|
@ -157,8 +157,8 @@ class TextBatchAggregator:
|
||||||
# Pre-compiled regexes for performance
|
# Pre-compiled regexes for performance
|
||||||
_RE_BOLD = re.compile(r"\*\*(.+?)\*\*", re.DOTALL)
|
_RE_BOLD = re.compile(r"\*\*(.+?)\*\*", re.DOTALL)
|
||||||
_RE_ITALIC_STAR = re.compile(r"\*(.+?)\*", re.DOTALL)
|
_RE_ITALIC_STAR = re.compile(r"\*(.+?)\*", re.DOTALL)
|
||||||
_RE_BOLD_UNDER = re.compile(r"__(.+?)__", re.DOTALL)
|
_RE_BOLD_UNDER = re.compile(r"(?<![a-zA-Z0-9])__(?=[^_\s])(.+?)(?<=[^_])__(?![a-zA-Z0-9])", re.DOTALL)
|
||||||
_RE_ITALIC_UNDER = re.compile(r"_(.+?)_", re.DOTALL)
|
_RE_ITALIC_UNDER = re.compile(r"(?<![a-zA-Z0-9])_(?=[^_\s])(.+?)(?<=[^_])_(?![a-zA-Z0-9])", re.DOTALL)
|
||||||
_RE_CODE_BLOCK = re.compile(r"```[a-zA-Z0-9_+-]*\n?")
|
_RE_CODE_BLOCK = re.compile(r"```[a-zA-Z0-9_+-]*\n?")
|
||||||
_RE_INLINE_CODE = re.compile(r"`(.+?)`")
|
_RE_INLINE_CODE = re.compile(r"`(.+?)`")
|
||||||
_RE_HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
|
_RE_HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
|
||||||
|
|
|
||||||
60
tests/gateway/test_helpers_markdown_underscore.py
Normal file
60
tests/gateway/test_helpers_markdown_underscore.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
"""Tests for underscore markdown stripping in gateway helpers.
|
||||||
|
|
||||||
|
The blanket _(.+?)_ and __(.+?)__ patterns incorrectly consumed
|
||||||
|
snake_case identifiers like send_as_bot and user_id. The fix adds
|
||||||
|
lookbehind/lookahead boundaries so underscores adjacent to
|
||||||
|
alphanumeric characters are not treated as markdown formatting.
|
||||||
|
|
||||||
|
Mirrors fixes already tested in hermes_cli/test_cli_markdown_rendering.py.
|
||||||
|
"""
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from gateway.platforms.helpers import strip_markdown
|
||||||
|
|
||||||
|
|
||||||
|
class TestGatewayUnderscoreRegex(unittest.TestCase):
|
||||||
|
"""Verify markdown stripping preserves snake_case identifiers."""
|
||||||
|
|
||||||
|
def test_snake_case_preserved(self):
|
||||||
|
text = "Set send_as_bot to true and check user_id"
|
||||||
|
result = strip_markdown(text)
|
||||||
|
self.assertIn("send_as_bot", result)
|
||||||
|
self.assertIn("user_id", result)
|
||||||
|
|
||||||
|
def test_bold_underscore_stripped(self):
|
||||||
|
result = strip_markdown("Here is __bold text__ for you")
|
||||||
|
self.assertIn("bold text", result)
|
||||||
|
self.assertNotIn("__bold", result)
|
||||||
|
|
||||||
|
def test_italic_underscore_stripped(self):
|
||||||
|
result = strip_markdown("Here is _italic_ text")
|
||||||
|
self.assertIn("italic", result)
|
||||||
|
self.assertNotIn("_italic_", result)
|
||||||
|
|
||||||
|
def test_double_underscore_in_identifier_preserved(self):
|
||||||
|
"""Double underscores embedded in alphanumeric context should survive."""
|
||||||
|
# e.g. x__y where underscores are between alphanumeric chars
|
||||||
|
result = strip_markdown("Check my_var__name for details")
|
||||||
|
self.assertIn("my_var__name", result)
|
||||||
|
|
||||||
|
def test_config_keys_preserved(self):
|
||||||
|
result = strip_markdown("Set max_tokens to 4096 and api_base_url to localhost")
|
||||||
|
self.assertIn("max_tokens", result)
|
||||||
|
self.assertIn("api_base_url", result)
|
||||||
|
|
||||||
|
def test_asterisk_bold_unaffected(self):
|
||||||
|
result = strip_markdown("This is **bold** text")
|
||||||
|
self.assertIn("bold", result)
|
||||||
|
self.assertNotIn("**", result)
|
||||||
|
|
||||||
|
def test_mixed_formatting_and_identifiers(self):
|
||||||
|
result = strip_markdown("Use **bold** and set send_as_bot to _true_")
|
||||||
|
self.assertIn("send_as_bot", result)
|
||||||
|
self.assertNotIn("**", result)
|
||||||
|
|
||||||
|
def test_multiple_snake_case_in_one_line(self):
|
||||||
|
text = "Configure thread_id, session_key, and platform_name"
|
||||||
|
result = strip_markdown(text)
|
||||||
|
self.assertIn("thread_id", result)
|
||||||
|
self.assertIn("session_key", result)
|
||||||
|
self.assertIn("platform_name", result)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue