fix(cli): keep snake_case underscores intact in strip markdown mode

This commit is contained in:
IAvecilla 2026-04-21 15:32:59 -03:00
parent b2111a2b45
commit aa61831a14
No known key found for this signature in database
4 changed files with 36 additions and 5 deletions

6
cli.py
View file

@ -1155,11 +1155,11 @@ def _strip_markdown_syntax(text: str) -> str:
plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain) plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain)
plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain) plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain)
plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain) plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain)
plain = re.sub(r"___([^_]+)___", r"\1", plain) plain = re.sub(r"(?<!\w)___([^_]+)___(?!\w)", r"\1", plain)
plain = re.sub(r"\*\*([^*]+)\*\*", r"\1", plain) plain = re.sub(r"\*\*([^*]+)\*\*", r"\1", plain)
plain = re.sub(r"__([^_]+)__", r"\1", plain) plain = re.sub(r"(?<!\w)__([^_]+)__(?!\w)", r"\1", plain)
plain = re.sub(r"\*([^*]+)\*", r"\1", plain) plain = re.sub(r"\*([^*]+)\*", r"\1", plain)
plain = re.sub(r"_([^_]+)_", r"\1", plain) plain = re.sub(r"(?<!\w)_([^_]+)_(?!\w)", r"\1", plain)
plain = re.sub(r"~~([^~]+)~~", r"\1", plain) plain = re.sub(r"~~([^~]+)~~", r"\1", plain)
plain = re.sub(r"\n{3,}", "\n\n", plain) plain = re.sub(r"\n{3,}", "\n\n", plain)
return plain.strip("\n") return plain.strip("\n")

View file

@ -115,3 +115,26 @@ def test_final_assistant_content_can_leave_markdown_raw():
output = _render_to_text(renderable) output = _render_to_text(renderable)
assert "***Bold italic***" in output assert "***Bold italic***" in output
def test_strip_mode_preserves_intraword_underscores_in_snake_case_identifiers():
renderable = _render_final_assistant_content(
"Let me look at recover_with_credential_pool and MY_CONST "
"then /home/user/path_with_stuff/file.py",
mode="strip",
)
output = _render_to_text(renderable)
assert "recover_with_credential_pool" in output
assert "MY_CONST" in output
assert "path_with_stuff" in output
def test_strip_mode_still_strips_boundary_underscore_emphasis():
renderable = _render_final_assistant_content(
"say _hi_ and __bold__ now",
mode="strip",
)
output = _render_to_text(renderable)
assert "say hi and bold now" in output

View file

@ -97,4 +97,12 @@ describe('estimateRows', () => {
expect(estimateRows(md, 40)).toBe(2) expect(estimateRows(md, 40)).toBe(2)
}) })
it('keeps intraword underscores when sizing snake_case identifiers', () => {
const w = 80
const snake = 'look at recover_with_credential_pool now'
const plain = 'look at recover with credential pool now'
expect(estimateRows(snake, w)).toBe(estimateRows(plain, w))
})
}) })

View file

@ -25,9 +25,9 @@ const renderEstimateLine = (line: string) => {
.replace(/\[(.+?)\]\((https?:\/\/[^\s)]+)\)/g, '$1') .replace(/\[(.+?)\]\((https?:\/\/[^\s)]+)\)/g, '$1')
.replace(/`([^`]+)`/g, '$1') .replace(/`([^`]+)`/g, '$1')
.replace(/\*\*(.+?)\*\*/g, '$1') .replace(/\*\*(.+?)\*\*/g, '$1')
.replace(/__(.+?)__/g, '$1') .replace(/(?<!\w)__(.+?)__(?!\w)/g, '$1')
.replace(/\*(.+?)\*/g, '$1') .replace(/\*(.+?)\*/g, '$1')
.replace(/_(.+?)_/g, '$1') .replace(/(?<!\w)_(.+?)_(?!\w)/g, '$1')
.replace(/~~(.+?)~~/g, '$1') .replace(/~~(.+?)~~/g, '$1')
.replace(/==(.+?)==/g, '$1') .replace(/==(.+?)==/g, '$1')
.replace(/\[\^([^\]]+)\]/g, '[$1]') .replace(/\[\^([^\]]+)\]/g, '[$1]')