diff --git a/ui-tui/src/__tests__/markdown.test.ts b/ui-tui/src/__tests__/markdown.test.ts index 478cb6255c..0e95ba6c0f 100644 --- a/ui-tui/src/__tests__/markdown.test.ts +++ b/ui-tui/src/__tests__/markdown.test.ts @@ -23,6 +23,31 @@ describe('INLINE_RE emphasis', () => { expect(matches('a*b*c')).toEqual(['*b*']) expect(matches('a**bold**c')).toEqual(['**bold**']) }) + + it('matches short alphanumeric subscript (H~2~O, CO~2~, X~n~)', () => { + expect(matches('H~2~O')).toEqual(['~2~']) + expect(matches('CO~2~ levels')).toEqual(['~2~']) + expect(matches('the X~n~ term')).toEqual(['~n~']) + }) + + it('ignores kaomoji-style ~! and ~? punctuation', () => { + // Kimi / Qwen / GLM emit these as decorators and the whole span between + // two tildes used to get collapsed into one dim blob. + expect(matches('Aww ~! Building step by step, I love it ~!')).toEqual([]) + expect(matches('cool ~? yeah ~?')).toEqual([]) + expect(matches('mixed ~! and ~? flow')).toEqual([]) + }) + + it('ignores tilde spans that contain spaces or punctuation', () => { + // Real subscript doesn't contain spaces; a tilde followed by words-then- + // tilde is almost always conversational. Matching it swallows text. + expect(matches('hello ~good idea~ there')).toEqual([]) + expect(matches('x ~oh no!~ y')).toEqual([]) + }) + + it('does not let strikethrough eat subscript', () => { + expect(matches('~~strike~~ and H~2~O')).toEqual(['~~strike~~', '~2~']) + }) }) describe('stripInlineMarkup', () => { @@ -31,6 +56,11 @@ describe('stripInlineMarkup', () => { expect(stripInlineMarkup('browser_screenshot_ecc.png')).toBe('browser_screenshot_ecc.png') expect(stripInlineMarkup('__bold__ and foo__bar__')).toBe('bold and foo__bar__') }) + + it('leaves ~!/~? kaomoji alone and still handles real subscript', () => { + expect(stripInlineMarkup('Yay ~! nice work ~!')).toBe('Yay ~! nice work ~!') + expect(stripInlineMarkup('H~2~O and CO~2~')).toBe('H_2O and CO_2') + }) }) describe('protocol sentinels', () => { diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx index 28fd7b986f..e8b3f9b7c0 100644 --- a/ui-tui/src/components/markdown.tsx +++ b/ui-tui/src/components/markdown.tsx @@ -16,8 +16,15 @@ const MD_URL_RE = '((?:[^\\s()]|\\([^\\s()]*\\))+?)' export const MEDIA_LINE_RE = /^\s*[`"']?MEDIA:\s*(\S+?)[`"']?\s*$/ export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/ +// Subscript (`~x~`) is restricted to short alphanumeric runs so prose like +// `thing ~! more ~?` from Kimi / Qwen / GLM (kaomoji-style decorators) doesn't +// get parsed as a span that swallows everything between two stray tildes. Real +// Pandoc subscript is H~2~O / CO~2~ / X~n~ — always word-char content. Without +// this constraint the old pattern `~([^~\s][^~]*?)~` paired up `~!` openers +// with the next `~` anywhere on the line and rendered the interior as dim +// text with a `_` prefix. export const INLINE_RE = new RegExp( - `(!\\[(.*?)\\]\\(${MD_URL_RE}\\)|\\[(.+?)\\]\\(${MD_URL_RE}\\)|<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>|~~(.+?)~~|\`([^\\\`]+)\`|\\*\\*(.+?)\\*\\*|(?\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>|~~(.+?)~~|\`([^\\\`]+)\`|\\*\\*(.+?)\\*\\*|(? .replace(/==(.+?)==/g, '$1') .replace(/\[\^([^\]]+)\]/g, '[$1]') .replace(/\^([^^\s][^^]*?)\^/g, '^$1') - .replace(/~([^~\s][^~]*?)~/g, '_$1') + .replace(/~([A-Za-z0-9]{1,8})~/g, '_$1') const renderTable = (key: number, rows: string[][], t: Theme) => { const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length)))