mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-19 10:02:16 +00:00
fix(desktop): stop bare-URL autolinker swallowing trailing emphasis asterisks (#41093)
The desktop markdown preprocessor autolinks bare URLs by wrapping them in <...>. RAW_URL_RE allowed '*' in its character classes, so a bold line with a URL and no separating space — e.g. '**PR opened: https://.../pull/123**' — greedily pulled the closing '**' into the href, producing a broken link and an unterminated bold run. Exclude '*' from both URL character classes; '_' and '~' (which can appear in real paths) are preserved.
This commit is contained in:
parent
ed81cfe3de
commit
349a3f601c
2 changed files with 37 additions and 1 deletions
|
|
@ -172,4 +172,33 @@ describe('preprocessMarkdown', () => {
|
|||
'<https://www.getyourguide.com/en-gb/san-juan-puerto-rico-l355/san-juan-old-san-juan-sunset-cruise-with-drinks-transfer-t405191/>'
|
||||
)
|
||||
})
|
||||
|
||||
it('does not swallow trailing emphasis asterisks into an autolinked url', () => {
|
||||
const input = '**PR opened: https://github.com/NousResearch/hermes-agent/pull/12345**'
|
||||
|
||||
const output = preprocessMarkdown(input)
|
||||
|
||||
// The URL is autolinked WITHOUT the trailing `**` glued into the href,
|
||||
// and the bold emphasis run stays intact so it renders as bold + a link.
|
||||
expect(output).toContain('<https://github.com/NousResearch/hermes-agent/pull/12345>')
|
||||
expect(output).not.toContain('pull/12345**>')
|
||||
expect(output).not.toContain('12345*')
|
||||
})
|
||||
|
||||
it('stops an autolinked url at mid-string bold markers', () => {
|
||||
const input = 'See https://github.com/foo/bar**bold** for details.'
|
||||
|
||||
const output = preprocessMarkdown(input)
|
||||
|
||||
expect(output).toContain('<https://github.com/foo/bar>')
|
||||
expect(output).toContain('**bold**')
|
||||
})
|
||||
|
||||
it('keeps underscores and tildes inside autolinked url paths', () => {
|
||||
const input = 'Docs at https://example.com/a_b/c~d/page'
|
||||
|
||||
const output = preprocessMarkdown(input)
|
||||
|
||||
expect(output).toContain('<https://example.com/a_b/c~d/page>')
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -8,7 +8,14 @@ const FENCE_LINE_RE = /^([ \t]*)(`{3,}|~{3,})([^\n]*)$/
|
|||
const EMPTY_FENCE_BLOCK_RE = /(^|\n)[ \t]*(?:`{3,}|~{3,})[^\n]*\n[ \t]*(?:`{3,}|~{3,})[ \t]*(?=\n|$)/g
|
||||
const CODE_FENCE_SPLIT_RE = /((?:```|~~~)[\s\S]*?(?:```|~~~))/g
|
||||
const INLINE_CODE_SPLIT_RE = /(`[^`\n]+`)/g
|
||||
const RAW_URL_RE = /https?:\/\/[^\s<>"'`]+[^\s<>"'`.,;:!?]/g
|
||||
// Bare-URL autolink matcher. The character classes EXCLUDE `*` so a URL that
|
||||
// abuts markdown emphasis with no separating space (e.g. `**label: https://x**`,
|
||||
// a very common LLM pattern) doesn't swallow the trailing `**` into the href.
|
||||
// `*` is never meaningful in a real URL path, and GFM's own autolink extension
|
||||
// likewise strips trailing emphasis/punctuation — so dropping it here is safe
|
||||
// and keeps the emphasis run intact. Other trailing punctuation is still peeled
|
||||
// off by the final `[^\s<>"'`*.,;:!?]` class.
|
||||
const RAW_URL_RE = /https?:\/\/[^\s<>"'`*]+[^\s<>"'`*.,;:!?]/g
|
||||
const LOCAL_PREVIEW_URL_RE = /(^|\s)https?:\/\/(?:localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])(?::\d+)?\/?[^\s<>"'`]*/gi
|
||||
const LOCAL_PREVIEW_ONLY_RE = /^https?:\/\/(?:localhost|127\.0\.0\.1|0\.0\.0\.0|\[::1\])(?::\d+)?\/?$/i
|
||||
const URL_ONLY_LINE_RE = /^\s*https?:\/\/\S+\s*$/i
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue