feat(latex): latex in tui

This commit is contained in:
Austin Pickett 2026-04-28 19:08:11 -04:00
parent 124da27767
commit c3d39feb3a
7 changed files with 1022 additions and 43 deletions

View file

@ -124,6 +124,7 @@
"integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"@babel/code-frame": "^7.29.0", "@babel/code-frame": "^7.29.0",
"@babel/generator": "^7.29.0", "@babel/generator": "^7.29.0",
@ -501,31 +502,6 @@
"node": ">=6.9.0" "node": ">=6.9.0"
} }
}, },
"node_modules/@emnapi/core": {
"version": "1.10.0",
"resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.10.0.tgz",
"integrity": "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==",
"dev": true,
"license": "MIT",
"optional": true,
"peer": true,
"dependencies": {
"@emnapi/wasi-threads": "1.2.1",
"tslib": "^2.4.0"
}
},
"node_modules/@emnapi/runtime": {
"version": "1.10.0",
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz",
"integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==",
"dev": true,
"license": "MIT",
"optional": true,
"peer": true,
"dependencies": {
"tslib": "^2.4.0"
}
},
"node_modules/@emnapi/wasi-threads": { "node_modules/@emnapi/wasi-threads": {
"version": "1.2.1", "version": "1.2.1",
"resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.1.tgz", "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.1.tgz",
@ -1700,6 +1676,7 @@
"integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==", "integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"undici-types": "~7.19.0" "undici-types": "~7.19.0"
} }
@ -1710,6 +1687,7 @@
"integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==", "integrity": "sha512-ilcTH/UniCkMdtexkoCN0bI7pMcJDvmQFPvuPvmEaYA/NSfFTAgdUSLAoVjaRJm7+6PvcM+q1zYOwS4wTYMF9w==",
"devOptional": true, "devOptional": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"csstype": "^3.2.2" "csstype": "^3.2.2"
} }
@ -1720,6 +1698,7 @@
"integrity": "sha512-eSkwoemjo76bdXl2MYqtxg51HNwUSkWfODUOQ3PaTLZGh9uIWWFZIjyjaJnex7wXDu+TRx+ATsnSxdN9YWfRTQ==", "integrity": "sha512-eSkwoemjo76bdXl2MYqtxg51HNwUSkWfODUOQ3PaTLZGh9uIWWFZIjyjaJnex7wXDu+TRx+ATsnSxdN9YWfRTQ==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"@eslint-community/regexpp": "^4.12.2", "@eslint-community/regexpp": "^4.12.2",
"@typescript-eslint/scope-manager": "8.58.1", "@typescript-eslint/scope-manager": "8.58.1",
@ -1749,6 +1728,7 @@
"integrity": "sha512-gGkiNMPqerb2cJSVcruigx9eHBlLG14fSdPdqMoOcBfh+vvn4iCq2C8MzUB89PrxOXk0y3GZ1yIWb9aOzL93bw==", "integrity": "sha512-gGkiNMPqerb2cJSVcruigx9eHBlLG14fSdPdqMoOcBfh+vvn4iCq2C8MzUB89PrxOXk0y3GZ1yIWb9aOzL93bw==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"@typescript-eslint/scope-manager": "8.58.1", "@typescript-eslint/scope-manager": "8.58.1",
"@typescript-eslint/types": "8.58.1", "@typescript-eslint/types": "8.58.1",
@ -2066,6 +2046,7 @@
"integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==", "integrity": "sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"bin": { "bin": {
"acorn": "bin/acorn" "acorn": "bin/acorn"
}, },
@ -2468,6 +2449,7 @@
} }
], ],
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"baseline-browser-mapping": "^2.10.12", "baseline-browser-mapping": "^2.10.12",
"caniuse-lite": "^1.0.30001782", "caniuse-lite": "^1.0.30001782",
@ -3203,6 +3185,7 @@
"integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==", "integrity": "sha512-XoMjdBOwe/esVgEvLmNsD3IRHkm7fbKIUGvrleloJXUZgDHig2IPWNniv+GwjyJXzuNqVjlr5+4yVUZjycJwfQ==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/eslint-utils": "^4.8.0",
"@eslint-community/regexpp": "^4.12.1", "@eslint-community/regexpp": "^4.12.1",
@ -3334,6 +3317,7 @@
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"funding": { "funding": {
"url": "https://github.com/sponsors/colinhacks" "url": "https://github.com/sponsors/colinhacks"
} }
@ -4242,6 +4226,7 @@
"resolved": "https://registry.npmjs.org/ink-text-input/-/ink-text-input-6.0.0.tgz", "resolved": "https://registry.npmjs.org/ink-text-input/-/ink-text-input-6.0.0.tgz",
"integrity": "sha512-Fw64n7Yha5deb1rHY137zHTAbSTNelUKuB5Kkk2HACXEtwIHBCf9OH2tP/LQ9fRYTl1F0dZgbW0zPnZk6FA9Lw==", "integrity": "sha512-Fw64n7Yha5deb1rHY137zHTAbSTNelUKuB5Kkk2HACXEtwIHBCf9OH2tP/LQ9fRYTl1F0dZgbW0zPnZk6FA9Lw==",
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"chalk": "^5.3.0", "chalk": "^5.3.0",
"type-fest": "^4.18.2" "type-fest": "^4.18.2"
@ -5678,6 +5663,7 @@
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"engines": { "engines": {
"node": ">=12" "node": ">=12"
}, },
@ -5787,6 +5773,7 @@
"resolved": "https://registry.npmjs.org/react/-/react-19.2.5.tgz", "resolved": "https://registry.npmjs.org/react/-/react-19.2.5.tgz",
"integrity": "sha512-llUJLzz1zTUBrskt2pwZgLq59AemifIftw4aB7JxOqf1HY2FDaGDxgwpAPVzHU1kdWabH7FauP4i1oEeer2WCA==", "integrity": "sha512-llUJLzz1zTUBrskt2pwZgLq59AemifIftw4aB7JxOqf1HY2FDaGDxgwpAPVzHU1kdWabH7FauP4i1oEeer2WCA==",
"license": "MIT", "license": "MIT",
"peer": true,
"engines": { "engines": {
"node": ">=0.10.0" "node": ">=0.10.0"
} }
@ -6611,6 +6598,7 @@
"integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"esbuild": "~0.27.0", "esbuild": "~0.27.0",
"get-tsconfig": "^4.7.5" "get-tsconfig": "^4.7.5"
@ -6737,6 +6725,7 @@
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
"dev": true, "dev": true,
"license": "Apache-2.0", "license": "Apache-2.0",
"peer": true,
"bin": { "bin": {
"tsc": "bin/tsc", "tsc": "bin/tsc",
"tsserver": "bin/tsserver" "tsserver": "bin/tsserver"
@ -6846,6 +6835,7 @@
"integrity": "sha512-dbU7/iLVa8KZALJyLOBOQ88nOXtNG8vxKuOT4I2mD+Ya70KPceF4IAmDsmU0h1Qsn5bPrvsY9HJstCRh3hG6Uw==", "integrity": "sha512-dbU7/iLVa8KZALJyLOBOQ88nOXtNG8vxKuOT4I2mD+Ya70KPceF4IAmDsmU0h1Qsn5bPrvsY9HJstCRh3hG6Uw==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"dependencies": { "dependencies": {
"lightningcss": "^1.32.0", "lightningcss": "^1.32.0",
"picomatch": "^4.0.4", "picomatch": "^4.0.4",
@ -7261,6 +7251,7 @@
"integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==",
"dev": true, "dev": true,
"license": "MIT", "license": "MIT",
"peer": true,
"funding": { "funding": {
"url": "https://github.com/sponsors/colinhacks" "url": "https://github.com/sponsors/colinhacks"
} }

View file

@ -61,6 +61,66 @@ describe('stripInlineMarkup', () => {
expect(stripInlineMarkup('Yay ~! nice work ~!')).toBe('Yay ~! nice work ~!') expect(stripInlineMarkup('Yay ~! nice work ~!')).toBe('Yay ~! nice work ~!')
expect(stripInlineMarkup('H~2~O and CO~2~')).toBe('H_2O and CO_2') expect(stripInlineMarkup('H~2~O and CO~2~')).toBe('H_2O and CO_2')
}) })
it('strips inline math delimiters but keeps the formula text', () => {
expect(stripInlineMarkup('$\\mathbb{Z}$ is a ring')).toBe('\\mathbb{Z} is a ring')
expect(stripInlineMarkup('see \\(a + b\\) ok')).toBe('see a + b ok')
})
})
describe('INLINE_RE inline math', () => {
it('matches single-dollar math and beats emphasis at the same start', () => {
// Without math handling, `*b*` would have matched as italics and
// corrupted the formula. With math added to INLINE_RE, the leftmost
// match at column 0 (`$P=a*b*c$`) wins.
expect(matches('$P=a*b*c$')).toEqual(['$P=a*b*c$'])
expect(matches('see $\\mathbb{Z}$ here')).toEqual(['$\\mathbb{Z}$'])
})
it('does not match currency-style prose', () => {
expect(matches('it costs $5 and $10')).toEqual([])
expect(matches('paid $5')).toEqual([])
})
it('does not let inline math swallow a $$ display fence', () => {
// `$$x$$` is a display block, not two abutting inline-math spans.
expect(matches('$$x$$')).toEqual([])
})
it('matches \\(...\\) inline math', () => {
expect(matches('foo \\(x + y\\) bar')).toEqual(['\\(x + y\\)'])
})
it('does not corrupt subscripts/superscripts inside math', () => {
// `_n` and `^r` are markdown emphasis/superscript markers in prose, but
// inside a `$...$` span the entire formula is captured as a single
// inline-math token so the inner regexes never see those characters.
expect(matches('$P=a_n x^n + a_0$')).toEqual(['$P=a_n x^n + a_0$'])
expect(matches('$\\beta_1,\\dots,\\beta_r$')).toEqual(['$\\beta_1,\\dots,\\beta_r$'])
})
it('places math content in the correct capture group (regression: m[16] is bare URL)', () => {
// When `m[16]` was the bare URL group AND the inline-math `$...$`
// group simultaneously (because the bare URL pattern lacked its own
// capturing parens), MdInline rendered `$\\mathbb{R}$` as an
// underlined autolink instead of italic amber math. Lock down the
// numbering: math goes in m[17] / m[18], URLs go in m[16].
const url = [...'see https://example.com here'.matchAll(INLINE_RE)][0]!
const dollarMath = [...'$\\mathbb{R}$'.matchAll(INLINE_RE)][0]!
const parenMath = [...'\\(\\pi\\)'.matchAll(INLINE_RE)][0]!
expect(url[16]).toBe('https://example.com')
expect(url[17]).toBeUndefined()
expect(url[18]).toBeUndefined()
expect(dollarMath[16]).toBeUndefined()
expect(dollarMath[17]).toBe('\\mathbb{R}')
expect(dollarMath[18]).toBeUndefined()
expect(parenMath[16]).toBeUndefined()
expect(parenMath[17]).toBeUndefined()
expect(parenMath[18]).toBe('\\pi')
})
}) })
describe('protocol sentinels', () => { describe('protocol sentinels', () => {

View file

@ -0,0 +1,197 @@
import { describe, expect, it } from 'vitest'
import { texToUnicode } from '../lib/mathUnicode.js'
describe('texToUnicode — symbols', () => {
it('substitutes lowercase Greek', () => {
expect(texToUnicode('\\alpha + \\beta + \\pi')).toBe('α + β + π')
expect(texToUnicode('\\omega')).toBe('ω')
})
it('substitutes uppercase Greek', () => {
expect(texToUnicode('\\Sigma \\Omega \\Pi')).toBe('Σ Ω Π')
})
it('substitutes set theory and logic operators', () => {
expect(texToUnicode('A \\cup B \\cap C')).toBe('A B ∩ C')
expect(texToUnicode('\\forall x \\in \\emptyset')).toBe('∀ x ∈ ∅')
expect(texToUnicode('p \\implies q \\iff r')).toBe('p ⟹ q ⟺ r')
})
it('substitutes relations and arrows', () => {
expect(texToUnicode('a \\le b \\ge c \\ne d')).toBe('a ≤ b ≥ c ≠ d')
expect(texToUnicode('f: A \\to B')).toBe('f: A → B')
})
it('uses longest-match-first so \\leq beats \\le', () => {
expect(texToUnicode('\\leq')).toBe('≤')
})
it('preserves unknown commands that share a prefix with known ones', () => {
// `\leqq` is a real LaTeX command (≦) we don't have in our table.
// The word-boundary lookahead prevents `\le` from matching, so the
// whole thing is preserved verbatim — much better than `≤qq`.
expect(texToUnicode('\\leqq')).toBe('\\leqq')
})
it('refuses to substitute a partial command (word boundary)', () => {
expect(texToUnicode('\\alphabet')).toBe('\\alphabet')
expect(texToUnicode('\\pin')).toBe('\\pin')
})
})
describe('texToUnicode — blackboard / calligraphic / fraktur', () => {
it('renders \\mathbb capitals', () => {
expect(texToUnicode('\\mathbb{R}')).toBe('')
expect(texToUnicode('\\mathbb{N} \\subset \\mathbb{Z} \\subset \\mathbb{Q} \\subset \\mathbb{R}')).toBe('')
})
it('renders \\mathcal and \\mathfrak', () => {
expect(texToUnicode('\\mathcal{F} \\subset \\mathfrak{A}')).toBe('𝔄')
})
it('preserves \\mathbb{...} when argument is multi-letter or non-letter', () => {
expect(texToUnicode('\\mathbb{NN}')).toBe('\\mathbb{NN}')
expect(texToUnicode('\\mathbb{1}')).toBe('\\mathbb{1}')
})
it('strips \\mathbf / \\mathit / \\mathrm / \\text wrappers (no Unicode bold/italic in monospace)', () => {
expect(texToUnicode('\\mathbf{x}')).toBe('x')
expect(texToUnicode('\\text{if } x > 0')).toBe('if x > 0')
expect(texToUnicode('\\operatorname{rank}(A)')).toBe('rank(A)')
})
})
describe('texToUnicode — sub / superscripts', () => {
it('converts simple superscripts', () => {
expect(texToUnicode('x^2 + y^2')).toBe('x² + y²')
expect(texToUnicode('e^{n}')).toBe('eⁿ')
})
it('converts simple subscripts', () => {
expect(texToUnicode('a_1 + a_2 + a_n')).toBe('a₁ + a₂ + aₙ')
expect(texToUnicode('x_{0}')).toBe('x₀')
})
it('converts mixed-content scripts when every glyph has a Unicode form', () => {
// `+`, digits, and lowercase letters all have superscript glyphs,
// so `n+1` → `ⁿ⁺¹`. Comma has no subscript form, so `i,j` falls
// back to `_(i,j)` (parens) rather than partially substituting —
// parens read as ordinary grouping while braces look like leftover
// unrendered LaTeX.
expect(texToUnicode('x^{n+1}')).toBe('xⁿ⁺¹')
expect(texToUnicode('a_{i,j}')).toBe('a_(i,j)')
})
it('uses parens (not braces) when the body has Greek with no superscript form', () => {
// π has no Unicode superscript, so `e^{i\pi}` after symbol pass is
// `e^{iπ}` and the script fallback emits `e^(iπ)` — much more
// readable than the LaTeX-looking `e^{iπ}`.
expect(texToUnicode('e^{i\\pi}')).toBe('e^(iπ)')
})
it('strips braces on script fallback when body collapses to a single char', () => {
// `^{\infty}` → symbol pass produces `^{∞}` → convertScript can't
// find ∞ in SUPERSCRIPT, but the body is one char so we drop the
// braces and emit `^∞` (much more readable than `^{∞}`).
expect(texToUnicode('e^{\\infty}')).toBe('e^∞')
})
it('handles a real-world sum', () => {
expect(texToUnicode('\\sum_{n=0}^{\\infty} \\frac{1}{n!}')).toBe('∑ₙ₌₀^∞ 1/n!')
})
})
describe('texToUnicode — fractions', () => {
it('collapses \\frac to a/b', () => {
expect(texToUnicode('\\frac{1}{2}')).toBe('1/2')
expect(texToUnicode('\\frac{a}{b}')).toBe('a/b')
})
it('parenthesises multi-token numerator / denominator', () => {
expect(texToUnicode('\\frac{n+1}{2}')).toBe('(n+1)/2')
expect(texToUnicode('\\frac{a + b}{c - d}')).toBe('(a + b)/(c - d)')
})
it('handles nested fractions', () => {
expect(texToUnicode('\\frac{1}{\\frac{1}{x}}')).toBe('1/(1/x)')
})
})
describe('texToUnicode — combining marks', () => {
it('applies \\overline / \\bar / \\hat / \\vec / \\tilde', () => {
expect(texToUnicode('\\overline{x}')).toBe('x\u0305')
expect(texToUnicode('\\hat{y}')).toBe('y\u0302')
expect(texToUnicode('\\vec{v}')).toBe('v\u20D7')
})
})
describe('texToUnicode — left/right delimiters', () => {
it('strips \\left and \\right keeping the delimiter character', () => {
expect(texToUnicode('\\left( x + y \\right)')).toBe('( x + y )')
expect(texToUnicode('\\left| x \\right|')).toBe('| x |')
})
it('handles escaped delimiters \\left\\{ ... \\right\\}', () => {
expect(texToUnicode('\\left\\{p/q \\mid q \\neq 0\\right\\}')).toBe('{p/q q ≠ 0}')
})
it('handles named delimiters via \\left\\langle / \\right\\rangle', () => {
expect(texToUnicode('\\left\\langle u, v \\right\\rangle')).toBe('⟨ u, v ⟩')
})
it('drops \\left. and \\right. (which are explicit "no delimiter")', () => {
expect(texToUnicode('\\left. f \\right|')).toBe(' f |')
})
it('preserves \\leftarrow / \\rightarrow (word boundary blocks the strip)', () => {
expect(texToUnicode('A \\leftarrow B \\rightarrow C')).toBe('A ← B → C')
})
})
describe('texToUnicode — labelled arrows', () => {
it('renders \\xrightarrow{label} as ─label→', () => {
expect(texToUnicode('a \\xrightarrow{x=1} b')).toBe('a ─x=1→ b')
})
it('renders \\xleftarrow{label} as ←label─', () => {
expect(texToUnicode('a \\xleftarrow{n} b')).toBe('a ←n─ b')
})
it('still applies symbol substitution inside the label', () => {
expect(texToUnicode('a \\xrightarrow{n \\to \\infty} L')).toBe('a ─n → ∞→ L')
})
})
describe('texToUnicode — punctuation commands without lookahead', () => {
it('substitutes \\{ even when immediately followed by a letter', () => {
// Regression: with a global `(?![A-Za-z])` lookahead, `\{p` refused
// to substitute (because `p` is a letter) and rendered as `\{p`.
expect(texToUnicode('\\{p, q\\}')).toBe('{p, q}')
})
it('substitutes thin-space \\, before a letter', () => {
expect(texToUnicode('a\\,b')).toBe('a b')
})
})
describe('texToUnicode — round-trip realism', () => {
it('renders a typical model-emitted formula', () => {
expect(texToUnicode('\\alpha \\in \\mathbb{R}, \\alpha \\notin \\mathbb{Q}')).toBe('α, α')
})
it('preserves unknown commands verbatim', () => {
expect(texToUnicode('\\bigtriangleup \\circledast')).toBe('\\bigtriangleup \\circledast')
})
it('handles commands without delimiters between', () => {
// Word-boundary lookahead means `\alpha\beta` doesn't accidentally
// match `\alphabeta` as one ungrouped token.
expect(texToUnicode('\\alpha\\beta')).toBe('αβ')
})
it('leaves plain text alone', () => {
expect(texToUnicode('hello world')).toBe('hello world')
expect(texToUnicode('')).toBe('')
})
})

View file

@ -67,6 +67,48 @@ describe('findStableBoundary', () => {
it('handles empty input', () => { it('handles empty input', () => {
expect(findStableBoundary('')).toBe(-1) expect(findStableBoundary('')).toBe(-1)
}) })
it('refuses to split inside an open $$ math block', () => {
// Display math has been opened but not closed; the only blank line
// sits inside the open block, so there's no safe boundary yet.
const text = '$$\nx + y\n\nmore math'
expect(findStableBoundary(text)).toBe(-1)
})
it('allows splitting after a $$ math block closes', () => {
const text = '$$\nx + y = z\n$$\n\nnarration continues'
const idx = findStableBoundary(text)
expect(text.slice(0, idx)).toBe('$$\nx + y = z\n$$\n\n')
expect(text.slice(idx)).toBe('narration continues')
})
it('splits before an open $$ block but not inside', () => {
// Mirror of the existing fenced-code test: prose, then an unclosed
// math block. The only safe boundary is the blank line BEFORE `$$`.
const text = 'intro paragraph\n\n$$\nx + y\n\nmore'
const idx = findStableBoundary(text)
expect(text.slice(0, idx)).toBe('intro paragraph\n\n')
expect(text.slice(idx).startsWith('$$')).toBe(true)
})
it('treats single-line $$x$$ as zero net toggle', () => {
// `$$x = y$$` opens AND closes on one line, so the stable boundary
// after it is allowed.
const text = 'intro\n\n$$x = y$$\n\nnarration'
const idx = findStableBoundary(text)
expect(text.slice(0, idx)).toBe('intro\n\n$$x = y$$\n\n')
expect(text.slice(idx)).toBe('narration')
})
it('refuses to split inside an open \\[ math block', () => {
const text = '\\[\nx + y\n\nmore'
expect(findStableBoundary(text)).toBe(-1)
})
}) })
describe('streaming theme assumption', () => { describe('streaming theme assumption', () => {

View file

@ -2,6 +2,7 @@ import { Box, Link, Text } from '@hermes/ink'
import { memo, type ReactNode, useMemo } from 'react' import { memo, type ReactNode, useMemo } from 'react'
import { ensureEmojiPresentation } from '../lib/emoji.js' import { ensureEmojiPresentation } from '../lib/emoji.js'
import { texToUnicode } from '../lib/mathUnicode.js'
import { highlightLine, isHighlightable } from '../lib/syntax.js' import { highlightLine, isHighlightable } from '../lib/syntax.js'
import type { Theme } from '../theme.js' import type { Theme } from '../theme.js'
@ -19,6 +20,15 @@ const QUOTE_RE = /^\s*(?:>\s*)+/
const TABLE_DIVIDER_CELL_RE = /^:?-{3,}:?$/ const TABLE_DIVIDER_CELL_RE = /^:?-{3,}:?$/
const MD_URL_RE = '((?:[^\\s()]|\\([^\\s()]*\\))+?)' const MD_URL_RE = '((?:[^\\s()]|\\([^\\s()]*\\))+?)'
// Display math openers: `$$ ... $$` (TeX) and `\[ ... \]` (LaTeX). The
// opener is matched only when `$$` / `\[` appears at the very start of the
// trimmed line — `startsWith('$$')` used to fire on prose like
// `$$x+y$$ followed by more`, opening a block that never closed because the
// trailing `$$` on the same line was invisible to the close-scan loop.
const MATH_BLOCK_OPEN_RE = /^\s*(\$\$|\\\[)(.*)$/
const MATH_BLOCK_CLOSE_DOLLAR_RE = /^(.*?)\$\$\s*$/
const MATH_BLOCK_CLOSE_BRACKET_RE = /^(.*?)\\\]\s*$/
export const MEDIA_LINE_RE = /^\s*[`"']?MEDIA:\s*(\S+?)[`"']?\s*$/ export const MEDIA_LINE_RE = /^\s*[`"']?MEDIA:\s*(\S+?)[`"']?\s*$/
export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/ export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/
@ -31,6 +41,13 @@ export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/
// `thing ~! more ~?` from Kimi / Qwen / GLM (kaomoji-style decorators) // `thing ~! more ~?` from Kimi / Qwen / GLM (kaomoji-style decorators)
// doesn't pair up the first `~` with the next one on the line and swallow // doesn't pair up the first `~` with the next one on the line and swallow
// the text between them as a dim `_`-prefixed span. // the text between them as a dim `_`-prefixed span.
//
// Inline math (`$x$` and `\(x\)`) takes precedence over emphasis at the
// same start position because regex alternation is leftmost-first; a
// dollar-delimited span at column N wins over a `*` at column N+1, so
// `$P=a*b*c$` renders as math instead of having `*b*` corrupted into
// italics. Single-character minimums and "no space adjacent to delimiter"
// rules keep currency prose like `$5 to $10` from being swallowed.
export const INLINE_RE = new RegExp( export const INLINE_RE = new RegExp(
[ [
`!\\[(.*?)\\]\\(${MD_URL_RE}\\)`, // 1,2 image `!\\[(.*?)\\]\\(${MD_URL_RE}\\)`, // 1,2 image
@ -46,7 +63,13 @@ export const INLINE_RE = new RegExp(
`\\[\\^([^\\]]+)\\]`, // 13 footnote ref `\\[\\^([^\\]]+)\\]`, // 13 footnote ref
`\\^([^^\\s][^^]*?)\\^`, // 14 superscript `\\^([^^\\s][^^]*?)\\^`, // 14 superscript
`~([A-Za-z0-9]{1,8})~`, // 15 subscript `~([A-Za-z0-9]{1,8})~`, // 15 subscript
`https?:\\/\\/[^\\s<]+` // 16 bare URL `(https?:\\/\\/[^\\s<]+)`, // 16 bare URL — wrapped so it owns its own
// capture group; without this, the math
// spans below would land in m[16] and the
// MdInline dispatcher would treat them as
// bare URLs and render them as autolinks.
`(?<!\\$)\\$([^\\s$](?:[^$\\n]*?[^\\s$])?)\\$(?!\\$)`, // 17 inline math $...$
`\\\\\\(([^\\n]+?)\\\\\\)` // 18 inline math \(...\)
].join('|'), ].join('|'),
'g' 'g'
) )
@ -93,6 +116,8 @@ export const stripInlineMarkup = (v: string) =>
.replace(/\[\^([^\]]+)\]/g, '[$1]') .replace(/\[\^([^\]]+)\]/g, '[$1]')
.replace(/\^([^^\s][^^]*?)\^/g, '^$1') .replace(/\^([^^\s][^^]*?)\^/g, '^$1')
.replace(/~([A-Za-z0-9]{1,8})~/g, '_$1') .replace(/~([A-Za-z0-9]{1,8})~/g, '_$1')
.replace(/(?<!\$)\$([^\s$](?:[^$\n]*?[^\s$])?)\$(?!\$)/g, '$1')
.replace(/\\\(([^\n]+?)\\\)/g, '$1')
const renderTable = (k: number, rows: string[][], t: Theme) => { const renderTable = (k: number, rows: string[][], t: Theme) => {
const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length))) const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length)))
@ -201,6 +226,19 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
if (url.length < m[16].length) { if (url.length < m[16].length) {
parts.push(<Text key={parts.length}>{m[16].slice(url.length)}</Text>) parts.push(<Text key={parts.length}>{m[16].slice(url.length)}</Text>)
} }
} else if (m[17] ?? m[18]) {
// Inline math is run through `texToUnicode` (Greek letters, ,
// operators, sub/superscripts, fractions) and rendered in italic
// amber. Italic is the disambiguator — links use amber+underline,
// so without italic readers can't tell `\mathbb{R}` (math) from a
// hyperlinked word. Anything `texToUnicode` doesn't recognise is
// preserved verbatim, so unfamiliar commands just look like their
// raw LaTeX rather than vanishing.
parts.push(
<Text color={t.color.amber} italic key={parts.length}>
{texToUnicode(m[17] ?? m[18]!)}
</Text>
)
} }
last = i + m[0].length last = i + m[0].length
@ -398,32 +436,84 @@ function MdImpl({ compact, t, text }: MdProps) {
continue continue
} }
if (line.trim().startsWith('$$')) { const mathOpen = line.match(MATH_BLOCK_OPEN_RE)
start('code')
if (mathOpen) {
const opener = mathOpen[1]!
const closeRe = opener === '$$' ? MATH_BLOCK_CLOSE_DOLLAR_RE : MATH_BLOCK_CLOSE_BRACKET_RE
const headRest = mathOpen[2] ?? ''
const block: string[] = [] const block: string[] = []
for (i++; i < lines.length; i++) { // Single-line block: `$$x + y = z$$` or `\[x\]`. Capture inner content
if (lines[i]!.trim().startsWith('$$')) { // and emit the block immediately. Without this, the close-scan loop
// skips line `i` and treats the next opener as our closer, swallowing
// every paragraph in between.
const sameLineClose = headRest.match(closeRe)
if (sameLineClose) {
const inner = sameLineClose[1]!.trim()
start('code')
nodes.push(
<Box flexDirection="column" key={key} paddingLeft={2}>
<Text color={t.color.dim}> math</Text>
{inner ? <Text color={t.color.amber}>{texToUnicode(inner)}</Text> : null}
</Box>
)
i++ i++
continue
}
// Multi-line block: scan ahead for a real closer before committing.
// If none exists in the rest of the document, render this line as a
// paragraph instead of consuming everything that follows.
let closeIdx = -1
for (let j = i + 1; j < lines.length; j++) {
if (closeRe.test(lines[j]!)) {
closeIdx = j
break break
} }
block.push(lines[i]!)
} }
if (closeIdx < 0) {
start('paragraph')
nodes.push(<MdInline key={key} t={t} text={line} />)
i++
continue
}
if (headRest.trim()) {
block.push(headRest)
}
for (let j = i + 1; j < closeIdx; j++) {
block.push(lines[j]!)
}
const tail = lines[closeIdx]!.match(closeRe)![1]!.trimEnd()
if (tail.trim()) {
block.push(tail)
}
start('code')
nodes.push( nodes.push(
<Box flexDirection="column" key={key} paddingLeft={2}> <Box flexDirection="column" key={key} paddingLeft={2}>
<Text color={t.color.dim}> math</Text> <Text color={t.color.dim}> math</Text>
{block.map((l, j) => ( {block.map((l, j) => (
<Text color={t.color.amber} key={j}> <Text color={t.color.amber} key={j}>
{l} {texToUnicode(l)}
</Text> </Text>
))} ))}
</Box> </Box>
) )
i = closeIdx + 1
continue continue
} }
@ -434,7 +524,7 @@ function MdImpl({ compact, t, text }: MdProps) {
start('heading') start('heading')
nodes.push( nodes.push(
<Text bold color={t.color.amber} key={key}> <Text bold color={t.color.amber} key={key}>
{heading} <MdInline t={t} text={heading} />
</Text> </Text>
) )
i++ i++
@ -446,7 +536,7 @@ function MdImpl({ compact, t, text }: MdProps) {
start('heading') start('heading')
nodes.push( nodes.push(
<Text bold color={t.color.amber} key={key}> <Text bold color={t.color.amber} key={key}>
{line.trim()} <MdInline t={t} text={line.trim()} />
</Text> </Text>
) )
i += 2 i += 2

View file

@ -35,19 +35,48 @@ import type { Theme } from '../theme.js'
import { Md } from './markdown.js' import { Md } from './markdown.js'
// Count ``` or ~~~ fence toggles in `s` up to `end`. Odd = currently inside // Count ``` / ~~~ AND `$$` / `\[…\]` fence toggles in `s` up to `end`. Odd
// a fenced block; we can't split the prefix there or we'd orphan the fence. // = currently inside a fenced block; splitting the prefix there would
// orphan the fence and let the unstable suffix re-render as broken
// markdown. Math fences only toggle when the code fence is closed so
// snippets like ` ```\n$$x$$\n``` ` (math example inside a code block)
// don't double-count. A `$$x$$` line that opens AND closes on its own
// produces zero net toggles; that's `len >= 4` plus `endsDollar`.
const fenceOpenAt = (s: string, end: number) => { const fenceOpenAt = (s: string, end: number) => {
let open = false let codeOpen = false
let mathOpen = false
let mathOpener: '$$' | '\\[' | null = null
let i = 0 let i = 0
while (i < end) { while (i < end) {
const nl = s.indexOf('\n', i) const nl = s.indexOf('\n', i)
const lineEnd = nl < 0 || nl > end ? end : nl const lineEnd = nl < 0 || nl > end ? end : nl
const line = s.slice(i, lineEnd) const line = s.slice(i, lineEnd).trim()
if (/^\s*(?:`{3,}|~{3,})/.test(line)) { if (/^(?:`{3,}|~{3,})/.test(line)) {
open = !open codeOpen = !codeOpen
} else if (!codeOpen) {
if (!mathOpen && /^\$\$/.test(line)) {
const isSingleLine = line.length >= 4 && /\$\$$/.test(line)
if (!isSingleLine) {
mathOpen = true
mathOpener = '$$'
}
} else if (!mathOpen && /^\\\[/.test(line)) {
const isSingleLine = /\\\]$/.test(line)
if (!isSingleLine) {
mathOpen = true
mathOpener = '\\['
}
} else if (mathOpen && mathOpener === '$$' && /\$\$$/.test(line)) {
mathOpen = false
mathOpener = null
} else if (mathOpen && mathOpener === '\\[' && /\\\]$/.test(line)) {
mathOpen = false
mathOpener = null
}
} }
if (nl < 0 || nl >= end) { if (nl < 0 || nl >= end) {
@ -57,7 +86,7 @@ const fenceOpenAt = (s: string, end: number) => {
i = nl + 1 i = nl + 1
} }
return open return codeOpen || mathOpen
} }
// Find the last "\n\n" boundary before `end` that is OUTSIDE a fenced code // Find the last "\n\n" boundary before `end` that is OUTSIDE a fenced code

View file

@ -0,0 +1,570 @@
// Best-effort LaTeX → Unicode for inline / display math captured by the
// markdown renderer. The terminal can't typeset LaTeX, but Unicode covers
// most of what models actually emit: Greek letters, blackboard / fraktur /
// calligraphic capitals, set theory + logic operators, common arrows,
// sub/superscripts, and `\frac{a}{b}` collapsed to `a/b`.
//
// Design rules:
// • Pure regex pipeline. Anything we don't recognise is preserved
// verbatim (so a `\foo{bar}` we've never heard of still survives).
// A real LaTeX parser would be more correct but throws on partial
// input — terminal users would rather see the raw command than a
// parse-error placeholder.
// • Longest-match-first ordering on commands so `\le` doesn't shadow
// `\leq`, `\sub` doesn't shadow `\subseteq`, etc.
// • Word-boundary lookahead `(?![A-Za-z])` after each command so
// `\pix` (made-up command) doesn't get partially substituted as `π`.
// • `\mathbb{X}`, `\mathcal{X}`, `\mathfrak{X}` only handle a single
// letter argument — multi-letter `\mathbb{NN}` is rare and would
// need a real parser to do correctly.
// • Sub/super scripts only convert if EVERY character has a Unicode
// equivalent. Mixed content like `^{n+1}` falls back to the raw
// LaTeX so we don't emit `ⁿ+¹` (which has no `+` superscript glyph
// in some fonts and reads worse than the source).
const SYMBOLS: Record<string, string> = {
// Greek lowercase
'\\alpha': 'α',
'\\beta': 'β',
'\\gamma': 'γ',
'\\delta': 'δ',
'\\epsilon': 'ε',
'\\varepsilon': 'ε',
'\\zeta': 'ζ',
'\\eta': 'η',
'\\theta': 'θ',
'\\vartheta': 'ϑ',
'\\iota': 'ι',
'\\kappa': 'κ',
'\\lambda': 'λ',
'\\mu': 'μ',
'\\nu': 'ν',
'\\xi': 'ξ',
'\\pi': 'π',
'\\varpi': 'ϖ',
'\\rho': 'ρ',
'\\varrho': 'ϱ',
'\\sigma': 'σ',
'\\varsigma': 'ς',
'\\tau': 'τ',
'\\upsilon': 'υ',
'\\phi': 'φ',
'\\varphi': 'φ',
'\\chi': 'χ',
'\\psi': 'ψ',
'\\omega': 'ω',
// Greek uppercase
'\\Gamma': 'Γ',
'\\Delta': 'Δ',
'\\Theta': 'Θ',
'\\Lambda': 'Λ',
'\\Xi': 'Ξ',
'\\Pi': 'Π',
'\\Sigma': 'Σ',
'\\Upsilon': 'Υ',
'\\Phi': 'Φ',
'\\Psi': 'Ψ',
'\\Omega': 'Ω',
// Big operators
'\\sum': '∑',
'\\prod': '∏',
'\\coprod': '∐',
'\\int': '∫',
'\\iint': '∬',
'\\iiint': '∭',
'\\oint': '∮',
'\\bigcup': '',
'\\bigcap': '⋂',
'\\bigvee': '',
'\\bigwedge': '⋀',
'\\bigoplus': '⨁',
'\\bigotimes': '⨂',
// Calculus
'\\partial': '∂',
'\\nabla': '∇',
'\\sqrt': '√',
// Sets
'\\emptyset': '∅',
'\\varnothing': '∅',
'\\infty': '∞',
'\\in': '∈',
'\\notin': '∉',
'\\ni': '∋',
'\\subset': '⊂',
'\\supset': '⊃',
'\\subseteq': '⊆',
'\\supseteq': '⊇',
'\\subsetneq': '⊊',
'\\supsetneq': '⊋',
'\\cup': '',
'\\cap': '∩',
'\\setminus': '',
'\\complement': '∁',
// Logic
'\\forall': '∀',
'\\exists': '∃',
'\\nexists': '∄',
'\\land': '∧',
'\\lor': '',
'\\lnot': '¬',
'\\neg': '¬',
'\\therefore': '∴',
'\\because': '∵',
// Relations
'\\le': '≤',
'\\leq': '≤',
'\\ge': '≥',
'\\geq': '≥',
'\\ne': '≠',
'\\neq': '≠',
'\\ll': '≪',
'\\gg': '≫',
'\\approx': '≈',
'\\equiv': '≡',
'\\cong': '≅',
'\\sim': '',
'\\simeq': '≃',
'\\propto': '∝',
'\\perp': '⊥',
'\\parallel': '∥',
'\\models': '⊨',
'\\vdash': '⊢',
'\\mid': '',
// Brackets / fences (named delimiter commands; the `\left\X` / `\right\X`
// unwrapping below leaves these behind for the symbol pass to resolve).
'\\langle': '⟨',
'\\rangle': '⟩',
'\\lceil': '⌈',
'\\rceil': '⌉',
'\\lfloor': '⌊',
'\\rfloor': '⌋',
'\\|': '‖',
// Arrows
'\\to': '→',
'\\rightarrow': '→',
'\\leftarrow': '←',
'\\leftrightarrow': '↔',
'\\Rightarrow': '⇒',
'\\Leftarrow': '⇐',
'\\Leftrightarrow': '⇔',
'\\implies': '⟹',
'\\impliedby': '⟸',
'\\iff': '⟺',
'\\mapsto': '↦',
'\\hookrightarrow': '↪',
'\\hookleftarrow': '↩',
'\\uparrow': '↑',
'\\downarrow': '↓',
'\\updownarrow': '↕',
// Binary operators
'\\cdot': '⋅',
'\\cdots': '⋯',
'\\ldots': '…',
'\\dots': '…',
'\\dotsb': '…',
'\\dotsc': '…',
'\\vdots': '⋮',
'\\ddots': '⋱',
'\\times': '×',
'\\div': '÷',
'\\pm': '±',
'\\mp': '∓',
'\\circ': '∘',
'\\bullet': '•',
'\\star': '⋆',
'\\ast': '',
'\\oplus': '⊕',
'\\ominus': '⊖',
'\\otimes': '⊗',
'\\odot': '⊙',
'\\diamond': '⋄',
'\\angle': '∠',
'\\triangle': '△',
// Spacing — collapse to varying widths of regular space
'\\,': ' ',
'\\;': ' ',
'\\:': ' ',
'\\!': '',
'\\ ': ' ',
'\\quad': ' ',
'\\qquad': ' ',
// Functions (LaTeX renders these in roman; we just keep the name)
'\\sin': 'sin',
'\\cos': 'cos',
'\\tan': 'tan',
'\\cot': 'cot',
'\\sec': 'sec',
'\\csc': 'csc',
'\\arcsin': 'arcsin',
'\\arccos': 'arccos',
'\\arctan': 'arctan',
'\\sinh': 'sinh',
'\\cosh': 'cosh',
'\\tanh': 'tanh',
'\\log': 'log',
'\\ln': 'ln',
'\\exp': 'exp',
'\\det': 'det',
'\\dim': 'dim',
'\\ker': 'ker',
'\\lim': 'lim',
'\\liminf': 'liminf',
'\\limsup': 'limsup',
'\\sup': 'sup',
'\\inf': 'inf',
'\\max': 'max',
'\\min': 'min',
'\\arg': 'arg',
'\\gcd': 'gcd',
// Escaped literals — model occasionally emits these for display
'\\&': '&',
'\\%': '%',
'\\$': '$',
'\\#': '#',
'\\_': '_',
'\\{': '{',
'\\}': '}'
}
const BB: Record<string, string> = {
A: '𝔸',
B: '𝔹',
C: '',
D: '𝔻',
E: '𝔼',
F: '𝔽',
G: '𝔾',
H: '',
I: '𝕀',
J: '𝕁',
K: '𝕂',
L: '𝕃',
M: '𝕄',
N: '',
O: '𝕆',
P: '',
Q: '',
R: '',
S: '𝕊',
T: '𝕋',
U: '𝕌',
V: '𝕍',
W: '𝕎',
X: '𝕏',
Y: '𝕐',
Z: ''
}
const CAL: Record<string, string> = {
A: '𝒜',
B: '',
C: '𝒞',
D: '𝒟',
E: '',
F: '',
G: '𝒢',
H: '',
I: '',
J: '𝒥',
K: '𝒦',
L: '',
M: '',
N: '𝒩',
O: '𝒪',
P: '𝒫',
Q: '𝒬',
R: '',
S: '𝒮',
T: '𝒯',
U: '𝒰',
V: '𝒱',
W: '𝒲',
X: '𝒳',
Y: '𝒴',
Z: '𝒵'
}
const FRAK: Record<string, string> = {
A: '𝔄',
B: '𝔅',
C: '',
D: '𝔇',
E: '𝔈',
F: '𝔉',
G: '𝔊',
H: '',
I: '',
J: '𝔍',
K: '𝔎',
L: '𝔏',
M: '𝔐',
N: '𝔑',
O: '𝔒',
P: '𝔓',
Q: '𝔔',
R: '',
S: '𝔖',
T: '𝔗',
U: '𝔘',
V: '𝔙',
W: '𝔚',
X: '𝔛',
Y: '𝔜',
Z: ''
}
const SUPERSCRIPT: Record<string, string> = {
'0': '⁰',
'1': '¹',
'2': '²',
'3': '³',
'4': '⁴',
'5': '⁵',
'6': '⁶',
'7': '⁷',
'8': '⁸',
'9': '⁹',
'+': '⁺',
'-': '⁻',
'=': '⁼',
'(': '⁽',
')': '⁾',
a: 'ᵃ',
b: 'ᵇ',
c: 'ᶜ',
d: 'ᵈ',
e: 'ᵉ',
f: 'ᶠ',
g: 'ᵍ',
h: 'ʰ',
i: 'ⁱ',
j: 'ʲ',
k: 'ᵏ',
l: 'ˡ',
m: 'ᵐ',
n: 'ⁿ',
o: 'ᵒ',
p: 'ᵖ',
r: 'ʳ',
s: 'ˢ',
t: 'ᵗ',
u: 'ᵘ',
v: 'ᵛ',
w: 'ʷ',
x: 'ˣ',
y: 'ʸ',
z: 'ᶻ'
}
const SUBSCRIPT: Record<string, string> = {
'0': '₀',
'1': '₁',
'2': '₂',
'3': '₃',
'4': '₄',
'5': '₅',
'6': '₆',
'7': '₇',
'8': '₈',
'9': '₉',
'+': '₊',
'-': '₋',
'=': '₌',
'(': '₍',
')': '₎',
a: 'ₐ',
e: 'ₑ',
h: 'ₕ',
i: 'ᵢ',
j: 'ⱼ',
k: 'ₖ',
l: 'ₗ',
m: 'ₘ',
n: 'ₙ',
o: 'ₒ',
p: 'ₚ',
r: 'ᵣ',
s: 'ₛ',
t: 'ₜ',
u: 'ᵤ',
v: 'ᵥ',
x: 'ₓ'
}
const escapeRe = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
// Pre-compile two symbol regexes: one for letter-ending commands (`\pi`,
// `\sum`) which need a `(?![A-Za-z])` lookahead so they don't partially
// match `\pix` or `\summa`, and one for punctuation-ending commands
// (`\{`, `\,`, `\|`) which must NOT have the lookahead — otherwise
// `\{p` would refuse to substitute because `p` is a letter.
//
// Longest commands first inside each group so `\leq` beats `\le`.
const splitByEnding = (keys: string[]) => {
const letter: string[] = []
const punct: string[] = []
for (const k of keys) {
if (/[A-Za-z]$/.test(k)) {
letter.push(k)
} else {
punct.push(k)
}
}
return { letter, punct }
}
const buildAlt = (cmds: string[]) =>
cmds
.sort((a, b) => b.length - a.length)
.map(escapeRe)
.join('|')
const { letter: LETTER_CMDS, punct: PUNCT_CMDS } = splitByEnding(Object.keys(SYMBOLS))
const SYMBOL_LETTER_RE = new RegExp('(?:' + buildAlt(LETTER_CMDS) + ')(?![A-Za-z])', 'g')
const SYMBOL_PUNCT_RE = new RegExp('(?:' + buildAlt(PUNCT_CMDS) + ')', 'g')
const convertScript = (input: string, table: Record<string, string>, sigil: '^' | '_'): string => {
let out = ''
let allMapped = true
for (const ch of input) {
const mapped = table[ch]
if (!mapped) {
allMapped = false
break
}
out += mapped
}
if (allMapped) {
return out
}
// Fallback: if the body is a single visible character (e.g. `∞` after
// earlier symbol substitution), render it without braces — `^∞` reads
// far better than `^{∞}` in a terminal. Multi-char bodies that don't
// fully convert use parens (`e^(iπ)`) instead of braces (`e^{iπ}`)
// because parens are normal punctuation while braces look like
// unrendered LaTeX.
const trimmed = input.trim()
if ([...trimmed].length === 1) {
return `${sigil}${trimmed}`
}
return `${sigil}(${trimmed})`
}
// Wrap multi-token expressions in parens so `\frac{a+b}{c}` becomes
// `(a+b)/c` rather than `a+b/c`. We only wrap when the expression has
// loose precedence — additive operators or whitespace that would change
// meaning under inline `/`. Atomic factors like `n!`, `x^2`, `\sin x`
// don't need parens; wrapping them just clutters the output.
const wrapForFrac = (expr: string) => {
const trimmed = expr.trim()
if (!trimmed) {
return trimmed
}
if (/^\(.*\)$/.test(trimmed)) {
return trimmed
}
if (/[+\-/*]|\s/.test(trimmed)) {
return `(${trimmed})`
}
return trimmed
}
export function texToUnicode(input: string): string {
let s = input
s = s.replace(/\\mathbb\s*\{([A-Za-z])\}/g, (raw, c: string) => BB[c] ?? raw)
s = s.replace(/\\mathcal\s*\{([A-Za-z])\}/g, (raw, c: string) => CAL[c] ?? raw)
s = s.replace(/\\mathfrak\s*\{([A-Za-z])\}/g, (raw, c: string) => FRAK[c] ?? raw)
s = s.replace(/\\mathbf\s*\{([^{}]+)\}/g, (_, c: string) => c)
s = s.replace(/\\mathit\s*\{([^{}]+)\}/g, (_, c: string) => c)
s = s.replace(/\\mathrm\s*\{([^{}]+)\}/g, (_, c: string) => c)
s = s.replace(/\\text\s*\{([^{}]+)\}/g, (_, c: string) => c)
s = s.replace(/\\operatorname\s*\{([^{}]+)\}/g, (_, c: string) => c)
s = s.replace(/\\overline\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0305`)
s = s.replace(/\\hat\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0302`)
s = s.replace(/\\bar\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0304`)
s = s.replace(/\\tilde\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0303`)
s = s.replace(/\\vec\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u20D7`)
s = s.replace(/\\dot\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0307`)
s = s.replace(/\\ddot\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0308`)
// Apply \frac repeatedly so nested fractions resolve from the inside
// out — `\frac{1}{1+\frac{1}{x}}` collapses cleanly.
let prev = ''
let guard = 0
while (s !== prev && guard++ < 8) {
prev = s
s = s.replace(/\\frac\s*\{([^{}]*)\}\s*\{([^{}]*)\}/g, (_, num: string, den: string) => `${wrapForFrac(num)}/${wrapForFrac(den)}`)
}
// `\xrightarrow{label}` / `\xleftarrow{label}` collapse to an arrow with
// the label inline. LaTeX renders the label above the arrow; in monospace
// we put it adjacent — `─label→` is the closest readable approximation.
// Run before the symbol pass so the label can still pick up Greek and
// operator substitutions afterwards.
s = s.replace(/\\xrightarrow\s*\{([^{}]*)\}/g, (_, label: string) => `${label.trim()}`)
s = s.replace(/\\xleftarrow\s*\{([^{}]*)\}/g, (_, label: string) => `${label.trim()}`)
s = s.replace(/\\Longrightarrow/g, '⟹')
s = s.replace(/\\Longleftarrow/g, '⟸')
s = s.replace(/\\Longleftrightarrow/g, '⟺')
// `\left` and `\right` are sizing wrappers around any delimiter — bare
// (`\left(`), escaped (`\left\{`), or named (`\left\langle`). Strip the
// wrapper unconditionally and let the rest of the pipeline (or the
// upcoming symbol pass) handle whatever delimiter follows. The optional
// `.?` consumes `\left.` / `\right.` which mean "no delimiter".
// Lookahead `(?![A-Za-z])` keeps `\leftarrow` / `\leftrightarrow` safe.
s = s.replace(/\\left(?![A-Za-z])\.?/g, '')
s = s.replace(/\\right(?![A-Za-z])\.?/g, '')
// Run symbol substitution BEFORE scripts so a body like `^{\infty}`
// becomes `^{∞}` first; convertScript can then either map ∞ to a
// superscript (it can't — Unicode lacks one) or fall back to `^∞`
// by stripping braces around the now-single-character body.
//
// Punctuation pass first — these can be followed by letters (`\{p`
// is "open-brace then p"), so the letter pass's `(?![A-Za-z])` rule
// would wrongly block them.
s = s.replace(SYMBOL_PUNCT_RE, m => SYMBOLS[m] ?? m)
s = s.replace(SYMBOL_LETTER_RE, m => SYMBOLS[m] ?? m)
// Bare `^c` / `_c` handles ONLY alphanumerics and `+`/`-`/`=`. Parens
// are intentionally excluded because the braced-fallback above can
// emit `(...)` and we don't want a second pass to greedily convert
// its opening paren into `⁽` and orphan the closing one.
s = s.replace(/\^\s*\{([^{}]+)\}/g, (_, body: string) => convertScript(body, SUPERSCRIPT, '^'))
s = s.replace(/\^([A-Za-z0-9+\-=])/g, (raw, ch: string) => SUPERSCRIPT[ch] ?? raw)
s = s.replace(/_\s*\{([^{}]+)\}/g, (_, body: string) => convertScript(body, SUBSCRIPT, '_'))
s = s.replace(/_([A-Za-z0-9+\-=])/g, (raw, ch: string) => SUBSCRIPT[ch] ?? raw)
return s
}