fix(desktop): stop bare-URL autolinker swallowing trailing emphasis asterisks (#41093)

The desktop markdown preprocessor autolinks bare URLs by wrapping them in
<...>. RAW_URL_RE allowed '*' in its character classes, so a bold line with
a URL and no separating space — e.g. '**PR opened: https://.../pull/123**' —
greedily pulled the closing '**' into the href, producing a broken link and
an unterminated bold run. Exclude '*' from both URL character classes; '_'
and '~' (which can appear in real paths) are preserved.
This commit is contained in:
Teknium 2026-06-07 02:47:39 -07:00 committed by GitHub
parent ed81cfe3de
commit 349a3f601c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 37 additions and 1 deletions

View file

@ -172,4 +172,33 @@ describe('preprocessMarkdown', () => {
'<https://www.getyourguide.com/en-gb/san-juan-puerto-rico-l355/san-juan-old-san-juan-sunset-cruise-with-drinks-transfer-t405191/>'
)
})
it('does not swallow trailing emphasis asterisks into an autolinked url', () => {
const input = '**PR opened: https://github.com/NousResearch/hermes-agent/pull/12345**'
const output = preprocessMarkdown(input)
// The URL is autolinked WITHOUT the trailing `**` glued into the href,
// and the bold emphasis run stays intact so it renders as bold + a link.
expect(output).toContain('<https://github.com/NousResearch/hermes-agent/pull/12345>')
expect(output).not.toContain('pull/12345**>')
expect(output).not.toContain('12345*')
})
it('stops an autolinked url at mid-string bold markers', () => {
const input = 'See https://github.com/foo/bar**bold** for details.'
const output = preprocessMarkdown(input)
expect(output).toContain('<https://github.com/foo/bar>')
expect(output).toContain('**bold**')
})
it('keeps underscores and tildes inside autolinked url paths', () => {
const input = 'Docs at https://example.com/a_b/c~d/page'
const output = preprocessMarkdown(input)
expect(output).toContain('<https://example.com/a_b/c~d/page>')
})
})

View file

@ -8,7 +8,14 @@ const FENCE_LINE_RE = /^([ \t]*)(`{3,}|~{3,})([^\n]*)$/
const EMPTY_FENCE_BLOCK_RE = /(^|\n)[ \t]*(?:`{3,}|~{3,})[^\n]*\n[ \t]*(?:`{3,}|~{3,})[ \t]*(?=\n|$)/g
const CODE_FENCE_SPLIT_RE = /((?:```|~~~)[\s\S]*?(?:```|~~~))/g
const INLINE_CODE_SPLIT_RE = /(`[^`\n]+`)/g
const RAW_URL_RE = /https?:\/\/[^\s<>"'`]+[^\s<>"'`.,;:!?]/g
// Bare-URL autolink matcher. The character classes EXCLUDE `*` so a URL that
// abuts markdown emphasis with no separating space (e.g. `**label: https://x**`,
// a very common LLM pattern) doesn't swallow the trailing `**` into the href.
// `*` is never meaningful in a real URL path, and GFM's own autolink extension
// likewise strips trailing emphasis/punctuation — so dropping it here is safe
// and keeps the emphasis run intact. Other trailing punctuation is still peeled
// off by the final `[^\s<>"'`*.,;:!?]` class.
const RAW_URL_RE = /https?:\/\/[^\s<>"'`*]+[^\s<>"'`*.,;:!?]/g
const LOCAL_PREVIEW_URL_RE = /(^|\s)https?:\/\/(?:localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])(?::\d+)?\/?[^\s<>"'`]*/gi
const LOCAL_PREVIEW_ONLY_RE = /^https?:\/\/(?:localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])(?::\d+)?\/?$/i
const URL_ONLY_LINE_RE = /^\s*https?:\/\/\S+\s*$/i