From 349a3f601c6c135736df35fe9e4cbb313fd1122d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 7 Jun 2026 02:47:39 -0700 Subject: [PATCH] fix(desktop): stop bare-URL autolinker swallowing trailing emphasis asterisks (#41093) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The desktop markdown preprocessor autolinks bare URLs by wrapping them in <...>. RAW_URL_RE allowed '*' in its character classes, so a bold line with a URL and no separating space — e.g. '**PR opened: https://.../pull/123**' — greedily pulled the closing '**' into the href, producing a broken link and an unterminated bold run. Exclude '*' from both URL character classes; '_' and '~' (which can appear in real paths) are preserved. --- .../assistant-ui/markdown-text.test.ts | 29 +++++++++++++++++++ apps/desktop/src/lib/markdown-preprocess.ts | 9 +++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/apps/desktop/src/components/assistant-ui/markdown-text.test.ts b/apps/desktop/src/components/assistant-ui/markdown-text.test.ts index 22645ec7cd0..fad9944741f 100644 --- a/apps/desktop/src/components/assistant-ui/markdown-text.test.ts +++ b/apps/desktop/src/components/assistant-ui/markdown-text.test.ts @@ -172,4 +172,33 @@ describe('preprocessMarkdown', () => { '' ) }) + + it('does not swallow trailing emphasis asterisks into an autolinked url', () => { + const input = '**PR opened: https://github.com/NousResearch/hermes-agent/pull/12345**' + + const output = preprocessMarkdown(input) + + // The URL is autolinked WITHOUT the trailing `**` glued into the href, + // and the bold emphasis run stays intact so it renders as bold + a link. + expect(output).toContain('') + expect(output).not.toContain('pull/12345**>') + expect(output).not.toContain('12345*') + }) + + it('stops an autolinked url at mid-string bold markers', () => { + const input = 'See https://github.com/foo/bar**bold** for details.' + + const output = preprocessMarkdown(input) + + expect(output).toContain('') + expect(output).toContain('**bold**') + }) + + it('keeps underscores and tildes inside autolinked url paths', () => { + const input = 'Docs at https://example.com/a_b/c~d/page' + + const output = preprocessMarkdown(input) + + expect(output).toContain('') + }) }) diff --git a/apps/desktop/src/lib/markdown-preprocess.ts b/apps/desktop/src/lib/markdown-preprocess.ts index c4d4637befa..aea5af1b82c 100644 --- a/apps/desktop/src/lib/markdown-preprocess.ts +++ b/apps/desktop/src/lib/markdown-preprocess.ts @@ -8,7 +8,14 @@ const FENCE_LINE_RE = /^([ \t]*)(`{3,}|~{3,})([^\n]*)$/ const EMPTY_FENCE_BLOCK_RE = /(^|\n)[ \t]*(?:`{3,}|~{3,})[^\n]*\n[ \t]*(?:`{3,}|~{3,})[ \t]*(?=\n|$)/g const CODE_FENCE_SPLIT_RE = /((?:```|~~~)[\s\S]*?(?:```|~~~))/g const INLINE_CODE_SPLIT_RE = /(`[^`\n]+`)/g -const RAW_URL_RE = /https?:\/\/[^\s<>"'`]+[^\s<>"'`.,;:!?]/g +// Bare-URL autolink matcher. The character classes EXCLUDE `*` so a URL that +// abuts markdown emphasis with no separating space (e.g. `**label: https://x**`, +// a very common LLM pattern) doesn't swallow the trailing `**` into the href. +// `*` is never meaningful in a real URL path, and GFM's own autolink extension +// likewise strips trailing emphasis/punctuation — so dropping it here is safe +// and keeps the emphasis run intact. Other trailing punctuation is still peeled +// off by the final `[^\s<>"'`*.,;:!?]` class. +const RAW_URL_RE = /https?:\/\/[^\s<>"'`*]+[^\s<>"'`*.,;:!?]/g const LOCAL_PREVIEW_URL_RE = /(^|\s)https?:\/\/(?:localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])(?::\d+)?\/?[^\s<>"'`]*/gi const LOCAL_PREVIEW_ONLY_RE = /^https?:\/\/(?:localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])(?::\d+)?\/?$/i const URL_ONLY_LINE_RE = /^\s*https?:\/\/\S+\s*$/i