From c3d39feb3ab8f0b2e891f1fd6f3bc0476a9845d8 Mon Sep 17 00:00:00 2001 From: Austin Pickett Date: Tue, 28 Apr 2026 19:08:11 -0400 Subject: [PATCH] feat(latex): latex in tui --- ui-tui/package-lock.json | 41 +- ui-tui/src/__tests__/markdown.test.ts | 60 ++ ui-tui/src/__tests__/mathUnicode.test.ts | 197 ++++++ .../src/__tests__/streamingMarkdown.test.ts | 42 ++ ui-tui/src/components/markdown.tsx | 112 +++- ui-tui/src/components/streamingMarkdown.tsx | 43 +- ui-tui/src/lib/mathUnicode.ts | 570 ++++++++++++++++++ 7 files changed, 1022 insertions(+), 43 deletions(-) create mode 100644 ui-tui/src/__tests__/mathUnicode.test.ts create mode 100644 ui-tui/src/lib/mathUnicode.ts diff --git a/ui-tui/package-lock.json b/ui-tui/package-lock.json index 017e9913bd..2efd64fe40 100644 --- a/ui-tui/package-lock.json +++ b/ui-tui/package-lock.json @@ -124,6 +124,7 @@ "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.29.0", "@babel/generator": "^7.29.0", @@ -501,31 +502,6 @@ "node": ">=6.9.0" } }, - "node_modules/@emnapi/core": { - "version": "1.10.0", - "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.10.0.tgz", - "integrity": "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==", - "dev": true, - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "@emnapi/wasi-threads": "1.2.1", - "tslib": "^2.4.0" - } - }, - "node_modules/@emnapi/runtime": { - "version": "1.10.0", - "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz", - "integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==", - "dev": true, - "license": "MIT", - "optional": true, - "peer": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, "node_modules/@emnapi/wasi-threads": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.1.tgz", @@ -1700,6 +1676,7 @@ "integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "undici-types": "~7.19.0" } @@ -1710,6 +1687,7 @@ "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.2.2" } @@ -1720,6 +1698,7 @@ "integrity": "sha512-eSkwoemjo76bdXl2MYqtxg51HNwUSkWfODUOQ3PaTLZGh9uIWWFZIjyjaJnex7wXDu+TRx+ATsnSxdN9YWfRTQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/regexpp": "^4.12.2", "@typescript-eslint/scope-manager": "8.58.1", @@ -1749,6 +1728,7 @@ "integrity": "sha512-gGkiNMPqerb2cJSVcruigx9eHBlLG14fSdPdqMoOcBfh+vvn4iCq2C8MzUB89PrxOXk0y3GZ1yIWb9aOzL93bw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.58.1", "@typescript-eslint/types": "8.58.1", @@ -2066,6 +2046,7 @@ "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "dev": true, "license": "MIT", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -2468,6 +2449,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.10.12", "caniuse-lite": "^1.0.30001782", @@ -3203,6 +3185,7 @@ "integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -3334,6 +3317,7 @@ "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "dev": true, "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -4242,6 +4226,7 @@ "resolved": "https://registry.npmjs.org/ink-text-input/-/ink-text-input-6.0.0.tgz", "integrity": "sha512-Fw64n7Yha5deb1rHY137zHTAbSTNelUKuB5Kkk2HACXEtwIHBCf9OH2tP/LQ9fRYTl1F0dZgbW0zPnZk6FA9Lw==", "license": "MIT", + "peer": true, "dependencies": { "chalk": "^5.3.0", "type-fest": "^4.18.2" @@ -5678,6 +5663,7 @@ "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -5787,6 +5773,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.2.5.tgz", "integrity": "sha512-llUJLzz1zTUBrskt2pwZgLq59AemifIftw4aB7JxOqf1HY2FDaGDxgwpAPVzHU1kdWabH7FauP4i1oEeer2WCA==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -6611,6 +6598,7 @@ "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "esbuild": "~0.27.0", "get-tsconfig": "^4.7.5" @@ -6737,6 +6725,7 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -6846,6 +6835,7 @@ "integrity": "sha512-dbU7/iLVa8KZALJyLOBOQ88nOXtNG8vxKuOT4I2mD+Ya70KPceF4IAmDsmU0h1Qsn5bPrvsY9HJstCRh3hG6Uw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "lightningcss": "^1.32.0", "picomatch": "^4.0.4", @@ -7261,6 +7251,7 @@ "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "dev": true, "license": "MIT", + "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/ui-tui/src/__tests__/markdown.test.ts b/ui-tui/src/__tests__/markdown.test.ts index 0e95ba6c0f..a415668f46 100644 --- a/ui-tui/src/__tests__/markdown.test.ts +++ b/ui-tui/src/__tests__/markdown.test.ts @@ -61,6 +61,66 @@ describe('stripInlineMarkup', () => { expect(stripInlineMarkup('Yay ~! nice work ~!')).toBe('Yay ~! nice work ~!') expect(stripInlineMarkup('H~2~O and CO~2~')).toBe('H_2O and CO_2') }) + + it('strips inline math delimiters but keeps the formula text', () => { + expect(stripInlineMarkup('$\\mathbb{Z}$ is a ring')).toBe('\\mathbb{Z} is a ring') + expect(stripInlineMarkup('see \\(a + b\\) ok')).toBe('see a + b ok') + }) +}) + +describe('INLINE_RE inline math', () => { + it('matches single-dollar math and beats emphasis at the same start', () => { + // Without math handling, `*b*` would have matched as italics and + // corrupted the formula. With math added to INLINE_RE, the leftmost + // match at column 0 (`$P=a*b*c$`) wins. + expect(matches('$P=a*b*c$')).toEqual(['$P=a*b*c$']) + expect(matches('see $\\mathbb{Z}$ here')).toEqual(['$\\mathbb{Z}$']) + }) + + it('does not match currency-style prose', () => { + expect(matches('it costs $5 and $10')).toEqual([]) + expect(matches('paid $5')).toEqual([]) + }) + + it('does not let inline math swallow a $$ display fence', () => { + // `$$x$$` is a display block, not two abutting inline-math spans. + expect(matches('$$x$$')).toEqual([]) + }) + + it('matches \\(...\\) inline math', () => { + expect(matches('foo \\(x + y\\) bar')).toEqual(['\\(x + y\\)']) + }) + + it('does not corrupt subscripts/superscripts inside math', () => { + // `_n` and `^r` are markdown emphasis/superscript markers in prose, but + // inside a `$...$` span the entire formula is captured as a single + // inline-math token so the inner regexes never see those characters. + expect(matches('$P=a_n x^n + a_0$')).toEqual(['$P=a_n x^n + a_0$']) + expect(matches('$\\beta_1,\\dots,\\beta_r$')).toEqual(['$\\beta_1,\\dots,\\beta_r$']) + }) + + it('places math content in the correct capture group (regression: m[16] is bare URL)', () => { + // When `m[16]` was the bare URL group AND the inline-math `$...$` + // group simultaneously (because the bare URL pattern lacked its own + // capturing parens), MdInline rendered `$\\mathbb{R}$` as an + // underlined autolink instead of italic amber math. Lock down the + // numbering: math goes in m[17] / m[18], URLs go in m[16]. + const url = [...'see https://example.com here'.matchAll(INLINE_RE)][0]! + const dollarMath = [...'$\\mathbb{R}$'.matchAll(INLINE_RE)][0]! + const parenMath = [...'\\(\\pi\\)'.matchAll(INLINE_RE)][0]! + + expect(url[16]).toBe('https://example.com') + expect(url[17]).toBeUndefined() + expect(url[18]).toBeUndefined() + + expect(dollarMath[16]).toBeUndefined() + expect(dollarMath[17]).toBe('\\mathbb{R}') + expect(dollarMath[18]).toBeUndefined() + + expect(parenMath[16]).toBeUndefined() + expect(parenMath[17]).toBeUndefined() + expect(parenMath[18]).toBe('\\pi') + }) }) describe('protocol sentinels', () => { diff --git a/ui-tui/src/__tests__/mathUnicode.test.ts b/ui-tui/src/__tests__/mathUnicode.test.ts new file mode 100644 index 0000000000..f395174bd7 --- /dev/null +++ b/ui-tui/src/__tests__/mathUnicode.test.ts @@ -0,0 +1,197 @@ +import { describe, expect, it } from 'vitest' + +import { texToUnicode } from '../lib/mathUnicode.js' + +describe('texToUnicode — symbols', () => { + it('substitutes lowercase Greek', () => { + expect(texToUnicode('\\alpha + \\beta + \\pi')).toBe('α + β + π') + expect(texToUnicode('\\omega')).toBe('ω') + }) + + it('substitutes uppercase Greek', () => { + expect(texToUnicode('\\Sigma \\Omega \\Pi')).toBe('Σ Ω Π') + }) + + it('substitutes set theory and logic operators', () => { + expect(texToUnicode('A \\cup B \\cap C')).toBe('A ∪ B ∩ C') + expect(texToUnicode('\\forall x \\in \\emptyset')).toBe('∀ x ∈ ∅') + expect(texToUnicode('p \\implies q \\iff r')).toBe('p ⟹ q ⟺ r') + }) + + it('substitutes relations and arrows', () => { + expect(texToUnicode('a \\le b \\ge c \\ne d')).toBe('a ≤ b ≥ c ≠ d') + expect(texToUnicode('f: A \\to B')).toBe('f: A → B') + }) + + it('uses longest-match-first so \\leq beats \\le', () => { + expect(texToUnicode('\\leq')).toBe('≤') + }) + + it('preserves unknown commands that share a prefix with known ones', () => { + // `\leqq` is a real LaTeX command (≦) we don't have in our table. + // The word-boundary lookahead prevents `\le` from matching, so the + // whole thing is preserved verbatim — much better than `≤qq`. + expect(texToUnicode('\\leqq')).toBe('\\leqq') + }) + + it('refuses to substitute a partial command (word boundary)', () => { + expect(texToUnicode('\\alphabet')).toBe('\\alphabet') + expect(texToUnicode('\\pin')).toBe('\\pin') + }) +}) + +describe('texToUnicode — blackboard / calligraphic / fraktur', () => { + it('renders \\mathbb capitals', () => { + expect(texToUnicode('\\mathbb{R}')).toBe('ℝ') + expect(texToUnicode('\\mathbb{N} \\subset \\mathbb{Z} \\subset \\mathbb{Q} \\subset \\mathbb{R}')).toBe('ℕ ⊂ ℤ ⊂ ℚ ⊂ ℝ') + }) + + it('renders \\mathcal and \\mathfrak', () => { + expect(texToUnicode('\\mathcal{F} \\subset \\mathfrak{A}')).toBe('ℱ ⊂ 𝔄') + }) + + it('preserves \\mathbb{...} when argument is multi-letter or non-letter', () => { + expect(texToUnicode('\\mathbb{NN}')).toBe('\\mathbb{NN}') + expect(texToUnicode('\\mathbb{1}')).toBe('\\mathbb{1}') + }) + + it('strips \\mathbf / \\mathit / \\mathrm / \\text wrappers (no Unicode bold/italic in monospace)', () => { + expect(texToUnicode('\\mathbf{x}')).toBe('x') + expect(texToUnicode('\\text{if } x > 0')).toBe('if x > 0') + expect(texToUnicode('\\operatorname{rank}(A)')).toBe('rank(A)') + }) +}) + +describe('texToUnicode — sub / superscripts', () => { + it('converts simple superscripts', () => { + expect(texToUnicode('x^2 + y^2')).toBe('x² + y²') + expect(texToUnicode('e^{n}')).toBe('eⁿ') + }) + + it('converts simple subscripts', () => { + expect(texToUnicode('a_1 + a_2 + a_n')).toBe('a₁ + a₂ + aₙ') + expect(texToUnicode('x_{0}')).toBe('x₀') + }) + + it('converts mixed-content scripts when every glyph has a Unicode form', () => { + // `+`, digits, and lowercase letters all have superscript glyphs, + // so `n+1` → `ⁿ⁺¹`. Comma has no subscript form, so `i,j` falls + // back to `_(i,j)` (parens) rather than partially substituting — + // parens read as ordinary grouping while braces look like leftover + // unrendered LaTeX. + expect(texToUnicode('x^{n+1}')).toBe('xⁿ⁺¹') + expect(texToUnicode('a_{i,j}')).toBe('a_(i,j)') + }) + + it('uses parens (not braces) when the body has Greek with no superscript form', () => { + // π has no Unicode superscript, so `e^{i\pi}` after symbol pass is + // `e^{iπ}` and the script fallback emits `e^(iπ)` — much more + // readable than the LaTeX-looking `e^{iπ}`. + expect(texToUnicode('e^{i\\pi}')).toBe('e^(iπ)') + }) + + it('strips braces on script fallback when body collapses to a single char', () => { + // `^{\infty}` → symbol pass produces `^{∞}` → convertScript can't + // find ∞ in SUPERSCRIPT, but the body is one char so we drop the + // braces and emit `^∞` (much more readable than `^{∞}`). + expect(texToUnicode('e^{\\infty}')).toBe('e^∞') + }) + + it('handles a real-world sum', () => { + expect(texToUnicode('\\sum_{n=0}^{\\infty} \\frac{1}{n!}')).toBe('∑ₙ₌₀^∞ 1/n!') + }) +}) + +describe('texToUnicode — fractions', () => { + it('collapses \\frac to a/b', () => { + expect(texToUnicode('\\frac{1}{2}')).toBe('1/2') + expect(texToUnicode('\\frac{a}{b}')).toBe('a/b') + }) + + it('parenthesises multi-token numerator / denominator', () => { + expect(texToUnicode('\\frac{n+1}{2}')).toBe('(n+1)/2') + expect(texToUnicode('\\frac{a + b}{c - d}')).toBe('(a + b)/(c - d)') + }) + + it('handles nested fractions', () => { + expect(texToUnicode('\\frac{1}{\\frac{1}{x}}')).toBe('1/(1/x)') + }) +}) + +describe('texToUnicode — combining marks', () => { + it('applies \\overline / \\bar / \\hat / \\vec / \\tilde', () => { + expect(texToUnicode('\\overline{x}')).toBe('x\u0305') + expect(texToUnicode('\\hat{y}')).toBe('y\u0302') + expect(texToUnicode('\\vec{v}')).toBe('v\u20D7') + }) +}) + +describe('texToUnicode — left/right delimiters', () => { + it('strips \\left and \\right keeping the delimiter character', () => { + expect(texToUnicode('\\left( x + y \\right)')).toBe('( x + y )') + expect(texToUnicode('\\left| x \\right|')).toBe('| x |') + }) + + it('handles escaped delimiters \\left\\{ ... \\right\\}', () => { + expect(texToUnicode('\\left\\{p/q \\mid q \\neq 0\\right\\}')).toBe('{p/q ∣ q ≠ 0}') + }) + + it('handles named delimiters via \\left\\langle / \\right\\rangle', () => { + expect(texToUnicode('\\left\\langle u, v \\right\\rangle')).toBe('⟨ u, v ⟩') + }) + + it('drops \\left. and \\right. (which are explicit "no delimiter")', () => { + expect(texToUnicode('\\left. f \\right|')).toBe(' f |') + }) + + it('preserves \\leftarrow / \\rightarrow (word boundary blocks the strip)', () => { + expect(texToUnicode('A \\leftarrow B \\rightarrow C')).toBe('A ← B → C') + }) +}) + +describe('texToUnicode — labelled arrows', () => { + it('renders \\xrightarrow{label} as ─label→', () => { + expect(texToUnicode('a \\xrightarrow{x=1} b')).toBe('a ─x=1→ b') + }) + + it('renders \\xleftarrow{label} as ←label─', () => { + expect(texToUnicode('a \\xleftarrow{n} b')).toBe('a ←n─ b') + }) + + it('still applies symbol substitution inside the label', () => { + expect(texToUnicode('a \\xrightarrow{n \\to \\infty} L')).toBe('a ─n → ∞→ L') + }) +}) + +describe('texToUnicode — punctuation commands without lookahead', () => { + it('substitutes \\{ even when immediately followed by a letter', () => { + // Regression: with a global `(?![A-Za-z])` lookahead, `\{p` refused + // to substitute (because `p` is a letter) and rendered as `\{p`. + expect(texToUnicode('\\{p, q\\}')).toBe('{p, q}') + }) + + it('substitutes thin-space \\, before a letter', () => { + expect(texToUnicode('a\\,b')).toBe('a b') + }) +}) + +describe('texToUnicode — round-trip realism', () => { + it('renders a typical model-emitted formula', () => { + expect(texToUnicode('\\alpha \\in \\mathbb{R}, \\alpha \\notin \\mathbb{Q}')).toBe('α ∈ ℝ, α ∉ ℚ') + }) + + it('preserves unknown commands verbatim', () => { + expect(texToUnicode('\\bigtriangleup \\circledast')).toBe('\\bigtriangleup \\circledast') + }) + + it('handles commands without delimiters between', () => { + // Word-boundary lookahead means `\alpha\beta` doesn't accidentally + // match `\alphabeta` as one ungrouped token. + expect(texToUnicode('\\alpha\\beta')).toBe('αβ') + }) + + it('leaves plain text alone', () => { + expect(texToUnicode('hello world')).toBe('hello world') + expect(texToUnicode('')).toBe('') + }) +}) diff --git a/ui-tui/src/__tests__/streamingMarkdown.test.ts b/ui-tui/src/__tests__/streamingMarkdown.test.ts index cd283d8a9e..0655cbba89 100644 --- a/ui-tui/src/__tests__/streamingMarkdown.test.ts +++ b/ui-tui/src/__tests__/streamingMarkdown.test.ts @@ -67,6 +67,48 @@ describe('findStableBoundary', () => { it('handles empty input', () => { expect(findStableBoundary('')).toBe(-1) }) + + it('refuses to split inside an open $$ math block', () => { + // Display math has been opened but not closed; the only blank line + // sits inside the open block, so there's no safe boundary yet. + const text = '$$\nx + y\n\nmore math' + + expect(findStableBoundary(text)).toBe(-1) + }) + + it('allows splitting after a $$ math block closes', () => { + const text = '$$\nx + y = z\n$$\n\nnarration continues' + const idx = findStableBoundary(text) + + expect(text.slice(0, idx)).toBe('$$\nx + y = z\n$$\n\n') + expect(text.slice(idx)).toBe('narration continues') + }) + + it('splits before an open $$ block but not inside', () => { + // Mirror of the existing fenced-code test: prose, then an unclosed + // math block. The only safe boundary is the blank line BEFORE `$$`. + const text = 'intro paragraph\n\n$$\nx + y\n\nmore' + const idx = findStableBoundary(text) + + expect(text.slice(0, idx)).toBe('intro paragraph\n\n') + expect(text.slice(idx).startsWith('$$')).toBe(true) + }) + + it('treats single-line $$x$$ as zero net toggle', () => { + // `$$x = y$$` opens AND closes on one line, so the stable boundary + // after it is allowed. + const text = 'intro\n\n$$x = y$$\n\nnarration' + const idx = findStableBoundary(text) + + expect(text.slice(0, idx)).toBe('intro\n\n$$x = y$$\n\n') + expect(text.slice(idx)).toBe('narration') + }) + + it('refuses to split inside an open \\[ math block', () => { + const text = '\\[\nx + y\n\nmore' + + expect(findStableBoundary(text)).toBe(-1) + }) }) describe('streaming theme assumption', () => { diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx index d3b6710b9e..46e6297426 100644 --- a/ui-tui/src/components/markdown.tsx +++ b/ui-tui/src/components/markdown.tsx @@ -2,6 +2,7 @@ import { Box, Link, Text } from '@hermes/ink' import { memo, type ReactNode, useMemo } from 'react' import { ensureEmojiPresentation } from '../lib/emoji.js' +import { texToUnicode } from '../lib/mathUnicode.js' import { highlightLine, isHighlightable } from '../lib/syntax.js' import type { Theme } from '../theme.js' @@ -19,6 +20,15 @@ const QUOTE_RE = /^\s*(?:>\s*)+/ const TABLE_DIVIDER_CELL_RE = /^:?-{3,}:?$/ const MD_URL_RE = '((?:[^\\s()]|\\([^\\s()]*\\))+?)' +// Display math openers: `$$ ... $$` (TeX) and `\[ ... \]` (LaTeX). The +// opener is matched only when `$$` / `\[` appears at the very start of the +// trimmed line — `startsWith('$$')` used to fire on prose like +// `$$x+y$$ followed by more`, opening a block that never closed because the +// trailing `$$` on the same line was invisible to the close-scan loop. +const MATH_BLOCK_OPEN_RE = /^\s*(\$\$|\\\[)(.*)$/ +const MATH_BLOCK_CLOSE_DOLLAR_RE = /^(.*?)\$\$\s*$/ +const MATH_BLOCK_CLOSE_BRACKET_RE = /^(.*?)\\\]\s*$/ + export const MEDIA_LINE_RE = /^\s*[`"']?MEDIA:\s*(\S+?)[`"']?\s*$/ export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/ @@ -31,6 +41,13 @@ export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/ // `thing ~! more ~?` from Kimi / Qwen / GLM (kaomoji-style decorators) // doesn't pair up the first `~` with the next one on the line and swallow // the text between them as a dim `_`-prefixed span. +// +// Inline math (`$x$` and `\(x\)`) takes precedence over emphasis at the +// same start position because regex alternation is leftmost-first; a +// dollar-delimited span at column N wins over a `*` at column N+1, so +// `$P=a*b*c$` renders as math instead of having `*b*` corrupted into +// italics. Single-character minimums and "no space adjacent to delimiter" +// rules keep currency prose like `$5 to $10` from being swallowed. export const INLINE_RE = new RegExp( [ `!\\[(.*?)\\]\\(${MD_URL_RE}\\)`, // 1,2 image @@ -46,7 +63,13 @@ export const INLINE_RE = new RegExp( `\\[\\^([^\\]]+)\\]`, // 13 footnote ref `\\^([^^\\s][^^]*?)\\^`, // 14 superscript `~([A-Za-z0-9]{1,8})~`, // 15 subscript - `https?:\\/\\/[^\\s<]+` // 16 bare URL + `(https?:\\/\\/[^\\s<]+)`, // 16 bare URL — wrapped so it owns its own + // capture group; without this, the math + // spans below would land in m[16] and the + // MdInline dispatcher would treat them as + // bare URLs and render them as autolinks. + `(? .replace(/\[\^([^\]]+)\]/g, '[$1]') .replace(/\^([^^\s][^^]*?)\^/g, '^$1') .replace(/~([A-Za-z0-9]{1,8})~/g, '_$1') + .replace(/(? { const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length))) @@ -201,6 +226,19 @@ function MdInline({ t, text }: { t: Theme; text: string }) { if (url.length < m[16].length) { parts.push({m[16].slice(url.length)}) } + } else if (m[17] ?? m[18]) { + // Inline math is run through `texToUnicode` (Greek letters, ℕℤℚℝ, + // operators, sub/superscripts, fractions) and rendered in italic + // amber. Italic is the disambiguator — links use amber+underline, + // so without italic readers can't tell `\mathbb{R}` (math) from a + // hyperlinked word. Anything `texToUnicode` doesn't recognise is + // preserved verbatim, so unfamiliar commands just look like their + // raw LaTeX rather than vanishing. + parts.push( + + {texToUnicode(m[17] ?? m[18]!)} + + ) } last = i + m[0].length @@ -398,32 +436,84 @@ function MdImpl({ compact, t, text }: MdProps) { continue } - if (line.trim().startsWith('$$')) { - start('code') + const mathOpen = line.match(MATH_BLOCK_OPEN_RE) + if (mathOpen) { + const opener = mathOpen[1]! + const closeRe = opener === '$$' ? MATH_BLOCK_CLOSE_DOLLAR_RE : MATH_BLOCK_CLOSE_BRACKET_RE + const headRest = mathOpen[2] ?? '' const block: string[] = [] - for (i++; i < lines.length; i++) { - if (lines[i]!.trim().startsWith('$$')) { - i++ + // Single-line block: `$$x + y = z$$` or `\[x\]`. Capture inner content + // and emit the block immediately. Without this, the close-scan loop + // skips line `i` and treats the next opener as our closer, swallowing + // every paragraph in between. + const sameLineClose = headRest.match(closeRe) + + if (sameLineClose) { + const inner = sameLineClose[1]!.trim() + + start('code') + nodes.push( + + ─ math + + {inner ? {texToUnicode(inner)} : null} + + ) + i++ + + continue + } + + // Multi-line block: scan ahead for a real closer before committing. + // If none exists in the rest of the document, render this line as a + // paragraph instead of consuming everything that follows. + let closeIdx = -1 + + for (let j = i + 1; j < lines.length; j++) { + if (closeRe.test(lines[j]!)) { + closeIdx = j break } - - block.push(lines[i]!) } + if (closeIdx < 0) { + start('paragraph') + nodes.push() + i++ + + continue + } + + if (headRest.trim()) { + block.push(headRest) + } + + for (let j = i + 1; j < closeIdx; j++) { + block.push(lines[j]!) + } + + const tail = lines[closeIdx]!.match(closeRe)![1]!.trimEnd() + + if (tail.trim()) { + block.push(tail) + } + + start('code') nodes.push( ─ math {block.map((l, j) => ( - {l} + {texToUnicode(l)} ))} ) + i = closeIdx + 1 continue } @@ -434,7 +524,7 @@ function MdImpl({ compact, t, text }: MdProps) { start('heading') nodes.push( - {heading} + ) i++ @@ -446,7 +536,7 @@ function MdImpl({ compact, t, text }: MdProps) { start('heading') nodes.push( - {line.trim()} + ) i += 2 diff --git a/ui-tui/src/components/streamingMarkdown.tsx b/ui-tui/src/components/streamingMarkdown.tsx index 111ed61e09..86dde930e5 100644 --- a/ui-tui/src/components/streamingMarkdown.tsx +++ b/ui-tui/src/components/streamingMarkdown.tsx @@ -35,19 +35,48 @@ import type { Theme } from '../theme.js' import { Md } from './markdown.js' -// Count ``` or ~~~ fence toggles in `s` up to `end`. Odd = currently inside -// a fenced block; we can't split the prefix there or we'd orphan the fence. +// Count ``` / ~~~ AND `$$` / `\[…\]` fence toggles in `s` up to `end`. Odd +// = currently inside a fenced block; splitting the prefix there would +// orphan the fence and let the unstable suffix re-render as broken +// markdown. Math fences only toggle when the code fence is closed so +// snippets like ` ```\n$$x$$\n``` ` (math example inside a code block) +// don't double-count. A `$$x$$` line that opens AND closes on its own +// produces zero net toggles; that's `len >= 4` plus `endsDollar`. const fenceOpenAt = (s: string, end: number) => { - let open = false + let codeOpen = false + let mathOpen = false + let mathOpener: '$$' | '\\[' | null = null let i = 0 while (i < end) { const nl = s.indexOf('\n', i) const lineEnd = nl < 0 || nl > end ? end : nl - const line = s.slice(i, lineEnd) + const line = s.slice(i, lineEnd).trim() - if (/^\s*(?:`{3,}|~{3,})/.test(line)) { - open = !open + if (/^(?:`{3,}|~{3,})/.test(line)) { + codeOpen = !codeOpen + } else if (!codeOpen) { + if (!mathOpen && /^\$\$/.test(line)) { + const isSingleLine = line.length >= 4 && /\$\$$/.test(line) + + if (!isSingleLine) { + mathOpen = true + mathOpener = '$$' + } + } else if (!mathOpen && /^\\\[/.test(line)) { + const isSingleLine = /\\\]$/.test(line) + + if (!isSingleLine) { + mathOpen = true + mathOpener = '\\[' + } + } else if (mathOpen && mathOpener === '$$' && /\$\$$/.test(line)) { + mathOpen = false + mathOpener = null + } else if (mathOpen && mathOpener === '\\[' && /\\\]$/.test(line)) { + mathOpen = false + mathOpener = null + } } if (nl < 0 || nl >= end) { @@ -57,7 +86,7 @@ const fenceOpenAt = (s: string, end: number) => { i = nl + 1 } - return open + return codeOpen || mathOpen } // Find the last "\n\n" boundary before `end` that is OUTSIDE a fenced code diff --git a/ui-tui/src/lib/mathUnicode.ts b/ui-tui/src/lib/mathUnicode.ts new file mode 100644 index 0000000000..162cc265a7 --- /dev/null +++ b/ui-tui/src/lib/mathUnicode.ts @@ -0,0 +1,570 @@ +// Best-effort LaTeX → Unicode for inline / display math captured by the +// markdown renderer. The terminal can't typeset LaTeX, but Unicode covers +// most of what models actually emit: Greek letters, blackboard / fraktur / +// calligraphic capitals, set theory + logic operators, common arrows, +// sub/superscripts, and `\frac{a}{b}` collapsed to `a/b`. +// +// Design rules: +// • Pure regex pipeline. Anything we don't recognise is preserved +// verbatim (so a `\foo{bar}` we've never heard of still survives). +// A real LaTeX parser would be more correct but throws on partial +// input — terminal users would rather see the raw command than a +// parse-error placeholder. +// • Longest-match-first ordering on commands so `\le` doesn't shadow +// `\leq`, `\sub` doesn't shadow `\subseteq`, etc. +// • Word-boundary lookahead `(?![A-Za-z])` after each command so +// `\pix` (made-up command) doesn't get partially substituted as `π`. +// • `\mathbb{X}`, `\mathcal{X}`, `\mathfrak{X}` only handle a single +// letter argument — multi-letter `\mathbb{NN}` is rare and would +// need a real parser to do correctly. +// • Sub/super scripts only convert if EVERY character has a Unicode +// equivalent. Mixed content like `^{n+1}` falls back to the raw +// LaTeX so we don't emit `ⁿ+¹` (which has no `+` superscript glyph +// in some fonts and reads worse than the source). + +const SYMBOLS: Record = { + // Greek lowercase + '\\alpha': 'α', + '\\beta': 'β', + '\\gamma': 'γ', + '\\delta': 'δ', + '\\epsilon': 'ε', + '\\varepsilon': 'ε', + '\\zeta': 'ζ', + '\\eta': 'η', + '\\theta': 'θ', + '\\vartheta': 'ϑ', + '\\iota': 'ι', + '\\kappa': 'κ', + '\\lambda': 'λ', + '\\mu': 'μ', + '\\nu': 'ν', + '\\xi': 'ξ', + '\\pi': 'π', + '\\varpi': 'ϖ', + '\\rho': 'ρ', + '\\varrho': 'ϱ', + '\\sigma': 'σ', + '\\varsigma': 'ς', + '\\tau': 'τ', + '\\upsilon': 'υ', + '\\phi': 'φ', + '\\varphi': 'φ', + '\\chi': 'χ', + '\\psi': 'ψ', + '\\omega': 'ω', + + // Greek uppercase + '\\Gamma': 'Γ', + '\\Delta': 'Δ', + '\\Theta': 'Θ', + '\\Lambda': 'Λ', + '\\Xi': 'Ξ', + '\\Pi': 'Π', + '\\Sigma': 'Σ', + '\\Upsilon': 'Υ', + '\\Phi': 'Φ', + '\\Psi': 'Ψ', + '\\Omega': 'Ω', + + // Big operators + '\\sum': '∑', + '\\prod': '∏', + '\\coprod': '∐', + '\\int': '∫', + '\\iint': '∬', + '\\iiint': '∭', + '\\oint': '∮', + '\\bigcup': '⋃', + '\\bigcap': '⋂', + '\\bigvee': '⋁', + '\\bigwedge': '⋀', + '\\bigoplus': '⨁', + '\\bigotimes': '⨂', + + // Calculus + '\\partial': '∂', + '\\nabla': '∇', + '\\sqrt': '√', + + // Sets + '\\emptyset': '∅', + '\\varnothing': '∅', + '\\infty': '∞', + '\\in': '∈', + '\\notin': '∉', + '\\ni': '∋', + '\\subset': '⊂', + '\\supset': '⊃', + '\\subseteq': '⊆', + '\\supseteq': '⊇', + '\\subsetneq': '⊊', + '\\supsetneq': '⊋', + '\\cup': '∪', + '\\cap': '∩', + '\\setminus': '∖', + '\\complement': '∁', + + // Logic + '\\forall': '∀', + '\\exists': '∃', + '\\nexists': '∄', + '\\land': '∧', + '\\lor': '∨', + '\\lnot': '¬', + '\\neg': '¬', + '\\therefore': '∴', + '\\because': '∵', + + // Relations + '\\le': '≤', + '\\leq': '≤', + '\\ge': '≥', + '\\geq': '≥', + '\\ne': '≠', + '\\neq': '≠', + '\\ll': '≪', + '\\gg': '≫', + '\\approx': '≈', + '\\equiv': '≡', + '\\cong': '≅', + '\\sim': '∼', + '\\simeq': '≃', + '\\propto': '∝', + '\\perp': '⊥', + '\\parallel': '∥', + '\\models': '⊨', + '\\vdash': '⊢', + '\\mid': '∣', + + // Brackets / fences (named delimiter commands; the `\left\X` / `\right\X` + // unwrapping below leaves these behind for the symbol pass to resolve). + '\\langle': '⟨', + '\\rangle': '⟩', + '\\lceil': '⌈', + '\\rceil': '⌉', + '\\lfloor': '⌊', + '\\rfloor': '⌋', + '\\|': '‖', + + // Arrows + '\\to': '→', + '\\rightarrow': '→', + '\\leftarrow': '←', + '\\leftrightarrow': '↔', + '\\Rightarrow': '⇒', + '\\Leftarrow': '⇐', + '\\Leftrightarrow': '⇔', + '\\implies': '⟹', + '\\impliedby': '⟸', + '\\iff': '⟺', + '\\mapsto': '↦', + '\\hookrightarrow': '↪', + '\\hookleftarrow': '↩', + '\\uparrow': '↑', + '\\downarrow': '↓', + '\\updownarrow': '↕', + + // Binary operators + '\\cdot': '⋅', + '\\cdots': '⋯', + '\\ldots': '…', + '\\dots': '…', + '\\dotsb': '…', + '\\dotsc': '…', + '\\vdots': '⋮', + '\\ddots': '⋱', + '\\times': '×', + '\\div': '÷', + '\\pm': '±', + '\\mp': '∓', + '\\circ': '∘', + '\\bullet': '•', + '\\star': '⋆', + '\\ast': '∗', + '\\oplus': '⊕', + '\\ominus': '⊖', + '\\otimes': '⊗', + '\\odot': '⊙', + '\\diamond': '⋄', + '\\angle': '∠', + '\\triangle': '△', + + // Spacing — collapse to varying widths of regular space + '\\,': ' ', + '\\;': ' ', + '\\:': ' ', + '\\!': '', + '\\ ': ' ', + '\\quad': ' ', + '\\qquad': ' ', + + // Functions (LaTeX renders these in roman; we just keep the name) + '\\sin': 'sin', + '\\cos': 'cos', + '\\tan': 'tan', + '\\cot': 'cot', + '\\sec': 'sec', + '\\csc': 'csc', + '\\arcsin': 'arcsin', + '\\arccos': 'arccos', + '\\arctan': 'arctan', + '\\sinh': 'sinh', + '\\cosh': 'cosh', + '\\tanh': 'tanh', + '\\log': 'log', + '\\ln': 'ln', + '\\exp': 'exp', + '\\det': 'det', + '\\dim': 'dim', + '\\ker': 'ker', + '\\lim': 'lim', + '\\liminf': 'liminf', + '\\limsup': 'limsup', + '\\sup': 'sup', + '\\inf': 'inf', + '\\max': 'max', + '\\min': 'min', + '\\arg': 'arg', + '\\gcd': 'gcd', + + // Escaped literals — model occasionally emits these for display + '\\&': '&', + '\\%': '%', + '\\$': '$', + '\\#': '#', + '\\_': '_', + '\\{': '{', + '\\}': '}' +} + +const BB: Record = { + A: '𝔸', + B: '𝔹', + C: 'ℂ', + D: '𝔻', + E: '𝔼', + F: '𝔽', + G: '𝔾', + H: 'ℍ', + I: '𝕀', + J: '𝕁', + K: '𝕂', + L: '𝕃', + M: '𝕄', + N: 'ℕ', + O: '𝕆', + P: 'ℙ', + Q: 'ℚ', + R: 'ℝ', + S: '𝕊', + T: '𝕋', + U: '𝕌', + V: '𝕍', + W: '𝕎', + X: '𝕏', + Y: '𝕐', + Z: 'ℤ' +} + +const CAL: Record = { + A: '𝒜', + B: 'ℬ', + C: '𝒞', + D: '𝒟', + E: 'ℰ', + F: 'ℱ', + G: '𝒢', + H: 'ℋ', + I: 'ℐ', + J: '𝒥', + K: '𝒦', + L: 'ℒ', + M: 'ℳ', + N: '𝒩', + O: '𝒪', + P: '𝒫', + Q: '𝒬', + R: 'ℛ', + S: '𝒮', + T: '𝒯', + U: '𝒰', + V: '𝒱', + W: '𝒲', + X: '𝒳', + Y: '𝒴', + Z: '𝒵' +} + +const FRAK: Record = { + A: '𝔄', + B: '𝔅', + C: 'ℭ', + D: '𝔇', + E: '𝔈', + F: '𝔉', + G: '𝔊', + H: 'ℌ', + I: 'ℑ', + J: '𝔍', + K: '𝔎', + L: '𝔏', + M: '𝔐', + N: '𝔑', + O: '𝔒', + P: '𝔓', + Q: '𝔔', + R: 'ℜ', + S: '𝔖', + T: '𝔗', + U: '𝔘', + V: '𝔙', + W: '𝔚', + X: '𝔛', + Y: '𝔜', + Z: 'ℨ' +} + +const SUPERSCRIPT: Record = { + '0': '⁰', + '1': '¹', + '2': '²', + '3': '³', + '4': '⁴', + '5': '⁵', + '6': '⁶', + '7': '⁷', + '8': '⁸', + '9': '⁹', + '+': '⁺', + '-': '⁻', + '=': '⁼', + '(': '⁽', + ')': '⁾', + a: 'ᵃ', + b: 'ᵇ', + c: 'ᶜ', + d: 'ᵈ', + e: 'ᵉ', + f: 'ᶠ', + g: 'ᵍ', + h: 'ʰ', + i: 'ⁱ', + j: 'ʲ', + k: 'ᵏ', + l: 'ˡ', + m: 'ᵐ', + n: 'ⁿ', + o: 'ᵒ', + p: 'ᵖ', + r: 'ʳ', + s: 'ˢ', + t: 'ᵗ', + u: 'ᵘ', + v: 'ᵛ', + w: 'ʷ', + x: 'ˣ', + y: 'ʸ', + z: 'ᶻ' +} + +const SUBSCRIPT: Record = { + '0': '₀', + '1': '₁', + '2': '₂', + '3': '₃', + '4': '₄', + '5': '₅', + '6': '₆', + '7': '₇', + '8': '₈', + '9': '₉', + '+': '₊', + '-': '₋', + '=': '₌', + '(': '₍', + ')': '₎', + a: 'ₐ', + e: 'ₑ', + h: 'ₕ', + i: 'ᵢ', + j: 'ⱼ', + k: 'ₖ', + l: 'ₗ', + m: 'ₘ', + n: 'ₙ', + o: 'ₒ', + p: 'ₚ', + r: 'ᵣ', + s: 'ₛ', + t: 'ₜ', + u: 'ᵤ', + v: 'ᵥ', + x: 'ₓ' +} + +const escapeRe = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + +// Pre-compile two symbol regexes: one for letter-ending commands (`\pi`, +// `\sum`) which need a `(?![A-Za-z])` lookahead so they don't partially +// match `\pix` or `\summa`, and one for punctuation-ending commands +// (`\{`, `\,`, `\|`) which must NOT have the lookahead — otherwise +// `\{p` would refuse to substitute because `p` is a letter. +// +// Longest commands first inside each group so `\leq` beats `\le`. +const splitByEnding = (keys: string[]) => { + const letter: string[] = [] + const punct: string[] = [] + + for (const k of keys) { + if (/[A-Za-z]$/.test(k)) { + letter.push(k) + } else { + punct.push(k) + } + } + + return { letter, punct } +} + +const buildAlt = (cmds: string[]) => + cmds + .sort((a, b) => b.length - a.length) + .map(escapeRe) + .join('|') + +const { letter: LETTER_CMDS, punct: PUNCT_CMDS } = splitByEnding(Object.keys(SYMBOLS)) + +const SYMBOL_LETTER_RE = new RegExp('(?:' + buildAlt(LETTER_CMDS) + ')(?![A-Za-z])', 'g') +const SYMBOL_PUNCT_RE = new RegExp('(?:' + buildAlt(PUNCT_CMDS) + ')', 'g') + +const convertScript = (input: string, table: Record, sigil: '^' | '_'): string => { + let out = '' + let allMapped = true + + for (const ch of input) { + const mapped = table[ch] + + if (!mapped) { + allMapped = false + + break + } + + out += mapped + } + + if (allMapped) { + return out + } + + // Fallback: if the body is a single visible character (e.g. `∞` after + // earlier symbol substitution), render it without braces — `^∞` reads + // far better than `^{∞}` in a terminal. Multi-char bodies that don't + // fully convert use parens (`e^(iπ)`) instead of braces (`e^{iπ}`) + // because parens are normal punctuation while braces look like + // unrendered LaTeX. + const trimmed = input.trim() + + if ([...trimmed].length === 1) { + return `${sigil}${trimmed}` + } + + return `${sigil}(${trimmed})` +} + +// Wrap multi-token expressions in parens so `\frac{a+b}{c}` becomes +// `(a+b)/c` rather than `a+b/c`. We only wrap when the expression has +// loose precedence — additive operators or whitespace that would change +// meaning under inline `/`. Atomic factors like `n!`, `x^2`, `\sin x` +// don't need parens; wrapping them just clutters the output. +const wrapForFrac = (expr: string) => { + const trimmed = expr.trim() + + if (!trimmed) { + return trimmed + } + + if (/^\(.*\)$/.test(trimmed)) { + return trimmed + } + + if (/[+\-/*]|\s/.test(trimmed)) { + return `(${trimmed})` + } + + return trimmed +} + +export function texToUnicode(input: string): string { + let s = input + + s = s.replace(/\\mathbb\s*\{([A-Za-z])\}/g, (raw, c: string) => BB[c] ?? raw) + s = s.replace(/\\mathcal\s*\{([A-Za-z])\}/g, (raw, c: string) => CAL[c] ?? raw) + s = s.replace(/\\mathfrak\s*\{([A-Za-z])\}/g, (raw, c: string) => FRAK[c] ?? raw) + s = s.replace(/\\mathbf\s*\{([^{}]+)\}/g, (_, c: string) => c) + s = s.replace(/\\mathit\s*\{([^{}]+)\}/g, (_, c: string) => c) + s = s.replace(/\\mathrm\s*\{([^{}]+)\}/g, (_, c: string) => c) + s = s.replace(/\\text\s*\{([^{}]+)\}/g, (_, c: string) => c) + s = s.replace(/\\operatorname\s*\{([^{}]+)\}/g, (_, c: string) => c) + + s = s.replace(/\\overline\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0305`) + s = s.replace(/\\hat\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0302`) + s = s.replace(/\\bar\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0304`) + s = s.replace(/\\tilde\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0303`) + s = s.replace(/\\vec\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u20D7`) + s = s.replace(/\\dot\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0307`) + s = s.replace(/\\ddot\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0308`) + + // Apply \frac repeatedly so nested fractions resolve from the inside + // out — `\frac{1}{1+\frac{1}{x}}` collapses cleanly. + let prev = '' + let guard = 0 + + while (s !== prev && guard++ < 8) { + prev = s + s = s.replace(/\\frac\s*\{([^{}]*)\}\s*\{([^{}]*)\}/g, (_, num: string, den: string) => `${wrapForFrac(num)}/${wrapForFrac(den)}`) + } + + // `\xrightarrow{label}` / `\xleftarrow{label}` collapse to an arrow with + // the label inline. LaTeX renders the label above the arrow; in monospace + // we put it adjacent — `─label→` is the closest readable approximation. + // Run before the symbol pass so the label can still pick up Greek and + // operator substitutions afterwards. + s = s.replace(/\\xrightarrow\s*\{([^{}]*)\}/g, (_, label: string) => `─${label.trim()}→`) + s = s.replace(/\\xleftarrow\s*\{([^{}]*)\}/g, (_, label: string) => `←${label.trim()}─`) + s = s.replace(/\\Longrightarrow/g, '⟹') + s = s.replace(/\\Longleftarrow/g, '⟸') + s = s.replace(/\\Longleftrightarrow/g, '⟺') + + // `\left` and `\right` are sizing wrappers around any delimiter — bare + // (`\left(`), escaped (`\left\{`), or named (`\left\langle`). Strip the + // wrapper unconditionally and let the rest of the pipeline (or the + // upcoming symbol pass) handle whatever delimiter follows. The optional + // `.?` consumes `\left.` / `\right.` which mean "no delimiter". + // Lookahead `(?![A-Za-z])` keeps `\leftarrow` / `\leftrightarrow` safe. + s = s.replace(/\\left(?![A-Za-z])\.?/g, '') + s = s.replace(/\\right(?![A-Za-z])\.?/g, '') + + // Run symbol substitution BEFORE scripts so a body like `^{\infty}` + // becomes `^{∞}` first; convertScript can then either map ∞ to a + // superscript (it can't — Unicode lacks one) or fall back to `^∞` + // by stripping braces around the now-single-character body. + // + // Punctuation pass first — these can be followed by letters (`\{p` + // is "open-brace then p"), so the letter pass's `(?![A-Za-z])` rule + // would wrongly block them. + s = s.replace(SYMBOL_PUNCT_RE, m => SYMBOLS[m] ?? m) + s = s.replace(SYMBOL_LETTER_RE, m => SYMBOLS[m] ?? m) + + // Bare `^c` / `_c` handles ONLY alphanumerics and `+`/`-`/`=`. Parens + // are intentionally excluded because the braced-fallback above can + // emit `(...)` and we don't want a second pass to greedily convert + // its opening paren into `⁽` and orphan the closing one. + s = s.replace(/\^\s*\{([^{}]+)\}/g, (_, body: string) => convertScript(body, SUPERSCRIPT, '^')) + s = s.replace(/\^([A-Za-z0-9+\-=])/g, (raw, ch: string) => SUPERSCRIPT[ch] ?? raw) + s = s.replace(/_\s*\{([^{}]+)\}/g, (_, body: string) => convertScript(body, SUBSCRIPT, '_')) + s = s.replace(/_([A-Za-z0-9+\-=])/g, (raw, ch: string) => SUBSCRIPT[ch] ?? raw) + + return s +}