hermes-agent/ui-tui/src/lib/mathUnicode.ts
Austin Pickett 3379f88ea4 docs: clarify wrapForFrac and streaming math-fence rationale
Address two Copilot review comments on PR #17175.

- `wrapForFrac` doc said "additive operators or whitespace" but the
  implementation also matches `*` and `/`. The wider behaviour is the
  one we want (nested products and fractions need parens to disambiguate
  inline `/`), so the doc is updated to match instead of tightening the
  regex.

- `fenceOpenAt` was flagged as "overly conservative" vs. `markdown.tsx`,
  which falls back to paragraph rendering for unclosed `$$` openers.
  Mirroring that fallback in the streaming chunker would prematurely
  commit a paragraph rendering of the unclosed opener to the monotonic
  stable prefix, where it would be frozen and become wrong the moment
  the closer streams in. The asymmetry is deliberate; document why so
  it isn't "fixed" again later.

Made-with: Cursor
2026-04-28 21:43:32 -04:00

770 lines
20 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Best-effort LaTeX → Unicode for inline / display math captured by the
// markdown renderer. The terminal can't typeset LaTeX, but Unicode covers
// most of what models actually emit: Greek letters, blackboard / fraktur /
// calligraphic capitals, set theory + logic operators, common arrows,
// sub/superscripts, and `\frac{a}{b}` collapsed to `a/b`.
//
// Design rules:
// • Pure regex pipeline. Anything we don't recognise is preserved
// verbatim (so a `\foo{bar}` we've never heard of still survives).
// A real LaTeX parser would be more correct but throws on partial
// input — terminal users would rather see the raw command than a
// parse-error placeholder.
// • Longest-match-first ordering on commands so `\le` doesn't shadow
// `\leq`, `\sub` doesn't shadow `\subseteq`, etc.
// • Word-boundary lookahead `(?![A-Za-z])` after each command so
// `\pix` (made-up command) doesn't get partially substituted as `π`.
// • `\mathbb{X}`, `\mathcal{X}`, `\mathfrak{X}` only handle a single
// letter argument — multi-letter `\mathbb{NN}` is rare and would
// need a real parser to do correctly.
// • Sub/super scripts only convert if EVERY character has a Unicode
// equivalent. Mixed content like `^{n+1}` falls back to the raw
// LaTeX so we don't emit `ⁿ+¹` (which has no `+` superscript glyph
// in some fonts and reads worse than the source).
const SYMBOLS: Record<string, string> = {
// Greek lowercase
'\\alpha': 'α',
'\\beta': 'β',
'\\gamma': 'γ',
'\\delta': 'δ',
'\\epsilon': 'ε',
'\\varepsilon': 'ε',
'\\zeta': 'ζ',
'\\eta': 'η',
'\\theta': 'θ',
'\\vartheta': 'ϑ',
'\\iota': 'ι',
'\\kappa': 'κ',
'\\lambda': 'λ',
'\\mu': 'μ',
'\\nu': 'ν',
'\\xi': 'ξ',
'\\pi': 'π',
'\\varpi': 'ϖ',
'\\rho': 'ρ',
'\\varrho': 'ϱ',
'\\sigma': 'σ',
'\\varsigma': 'ς',
'\\tau': 'τ',
'\\upsilon': 'υ',
'\\phi': 'φ',
'\\varphi': 'φ',
'\\chi': 'χ',
'\\psi': 'ψ',
'\\omega': 'ω',
// Greek uppercase
'\\Gamma': 'Γ',
'\\Delta': 'Δ',
'\\Theta': 'Θ',
'\\Lambda': 'Λ',
'\\Xi': 'Ξ',
'\\Pi': 'Π',
'\\Sigma': 'Σ',
'\\Upsilon': 'Υ',
'\\Phi': 'Φ',
'\\Psi': 'Ψ',
'\\Omega': 'Ω',
// Big operators
'\\sum': '∑',
'\\prod': '∏',
'\\coprod': '∐',
'\\int': '∫',
'\\iint': '∬',
'\\iiint': '∭',
'\\oint': '∮',
'\\bigcup': '',
'\\bigcap': '⋂',
'\\bigvee': '',
'\\bigwedge': '⋀',
'\\bigoplus': '⨁',
'\\bigotimes': '⨂',
// Calculus
'\\partial': '∂',
'\\nabla': '∇',
'\\sqrt': '√',
// Sets
'\\emptyset': '∅',
'\\varnothing': '∅',
'\\infty': '∞',
'\\in': '∈',
'\\notin': '∉',
'\\ni': '∋',
'\\subset': '⊂',
'\\supset': '⊃',
'\\subseteq': '⊆',
'\\supseteq': '⊇',
'\\subsetneq': '⊊',
'\\supsetneq': '⊋',
'\\cup': '',
'\\cap': '∩',
'\\setminus': '',
'\\complement': '∁',
// Logic
'\\forall': '∀',
'\\exists': '∃',
'\\nexists': '∄',
'\\land': '∧',
'\\lor': '',
'\\lnot': '¬',
'\\neg': '¬',
'\\therefore': '∴',
'\\because': '∵',
// Relations
'\\le': '≤',
'\\leq': '≤',
'\\ge': '≥',
'\\geq': '≥',
'\\ne': '≠',
'\\neq': '≠',
'\\ll': '≪',
'\\gg': '≫',
'\\approx': '≈',
'\\equiv': '≡',
'\\cong': '≅',
'\\sim': '',
'\\simeq': '≃',
'\\propto': '∝',
'\\perp': '⊥',
'\\parallel': '∥',
'\\models': '⊨',
'\\vdash': '⊢',
'\\mid': '',
'\\nmid': '∤',
'\\divides': '',
// Common standalone glyphs
'\\blacksquare': '■',
'\\square': '□',
'\\Box': '□',
'\\qed': '∎',
'\\bigstar': '★',
// Modular arithmetic — the `\pmod{p}` form (with arg) is handled below;
// the bare `\bmod` / `\mod` commands are simple text substitutions.
'\\bmod': 'mod',
'\\mod': 'mod',
// Brackets / fences (named delimiter commands; the `\left\X` / `\right\X`
// unwrapping below leaves these behind for the symbol pass to resolve).
'\\langle': '⟨',
'\\rangle': '⟩',
'\\lceil': '⌈',
'\\rceil': '⌉',
'\\lfloor': '⌊',
'\\rfloor': '⌋',
'\\|': '‖',
// Arrows
'\\to': '→',
'\\rightarrow': '→',
'\\leftarrow': '←',
'\\leftrightarrow': '↔',
'\\Rightarrow': '⇒',
'\\Leftarrow': '⇐',
'\\Leftrightarrow': '⇔',
'\\implies': '⟹',
'\\impliedby': '⟸',
'\\iff': '⟺',
'\\mapsto': '↦',
'\\hookrightarrow': '↪',
'\\hookleftarrow': '↩',
'\\uparrow': '↑',
'\\downarrow': '↓',
'\\updownarrow': '↕',
// Binary operators
'\\cdot': '⋅',
'\\cdots': '⋯',
'\\ldots': '…',
'\\dots': '…',
'\\dotsb': '…',
'\\dotsc': '…',
'\\vdots': '⋮',
'\\ddots': '⋱',
'\\times': '×',
'\\div': '÷',
'\\pm': '±',
'\\mp': '∓',
'\\circ': '∘',
'\\bullet': '•',
'\\star': '⋆',
'\\ast': '',
'\\oplus': '⊕',
'\\ominus': '⊖',
'\\otimes': '⊗',
'\\odot': '⊙',
'\\diamond': '⋄',
'\\angle': '∠',
'\\triangle': '△',
// Spacing — collapse to varying widths of regular space
'\\,': ' ',
'\\;': ' ',
'\\:': ' ',
'\\!': '',
'\\ ': ' ',
'\\quad': ' ',
'\\qquad': ' ',
// Functions (LaTeX renders these in roman; we just keep the name)
'\\sin': 'sin',
'\\cos': 'cos',
'\\tan': 'tan',
'\\cot': 'cot',
'\\sec': 'sec',
'\\csc': 'csc',
'\\arcsin': 'arcsin',
'\\arccos': 'arccos',
'\\arctan': 'arctan',
'\\sinh': 'sinh',
'\\cosh': 'cosh',
'\\tanh': 'tanh',
'\\log': 'log',
'\\ln': 'ln',
'\\exp': 'exp',
'\\det': 'det',
'\\dim': 'dim',
'\\ker': 'ker',
'\\lim': 'lim',
'\\liminf': 'liminf',
'\\limsup': 'limsup',
'\\sup': 'sup',
'\\inf': 'inf',
'\\max': 'max',
'\\min': 'min',
'\\arg': 'arg',
'\\gcd': 'gcd',
// Escaped literals — model occasionally emits these for display
'\\&': '&',
'\\%': '%',
'\\$': '$',
'\\#': '#',
'\\_': '_',
'\\{': '{',
'\\}': '}'
}
const BB: Record<string, string> = {
A: '𝔸',
B: '𝔹',
C: '',
D: '𝔻',
E: '𝔼',
F: '𝔽',
G: '𝔾',
H: '',
I: '𝕀',
J: '𝕁',
K: '𝕂',
L: '𝕃',
M: '𝕄',
N: '',
O: '𝕆',
P: '',
Q: '',
R: '',
S: '𝕊',
T: '𝕋',
U: '𝕌',
V: '𝕍',
W: '𝕎',
X: '𝕏',
Y: '𝕐',
Z: ''
}
const CAL: Record<string, string> = {
A: '𝒜',
B: '',
C: '𝒞',
D: '𝒟',
E: '',
F: '',
G: '𝒢',
H: '',
I: '',
J: '𝒥',
K: '𝒦',
L: '',
M: '',
N: '𝒩',
O: '𝒪',
P: '𝒫',
Q: '𝒬',
R: '',
S: '𝒮',
T: '𝒯',
U: '𝒰',
V: '𝒱',
W: '𝒲',
X: '𝒳',
Y: '𝒴',
Z: '𝒵'
}
const FRAK: Record<string, string> = {
A: '𝔄',
B: '𝔅',
C: '',
D: '𝔇',
E: '𝔈',
F: '𝔉',
G: '𝔊',
H: '',
I: '',
J: '𝔍',
K: '𝔎',
L: '𝔏',
M: '𝔐',
N: '𝔑',
O: '𝔒',
P: '𝔓',
Q: '𝔔',
R: '',
S: '𝔖',
T: '𝔗',
U: '𝔘',
V: '𝔙',
W: '𝔚',
X: '𝔛',
Y: '𝔜',
Z: ''
}
const SUPERSCRIPT: Record<string, string> = {
'0': '⁰',
'1': '¹',
'2': '²',
'3': '³',
'4': '⁴',
'5': '⁵',
'6': '⁶',
'7': '⁷',
'8': '⁸',
'9': '⁹',
'+': '⁺',
'-': '⁻',
'=': '⁼',
'(': '⁽',
')': '⁾',
a: 'ᵃ',
b: 'ᵇ',
c: 'ᶜ',
d: 'ᵈ',
e: 'ᵉ',
f: 'ᶠ',
g: 'ᵍ',
h: 'ʰ',
i: 'ⁱ',
j: 'ʲ',
k: 'ᵏ',
l: 'ˡ',
m: 'ᵐ',
n: 'ⁿ',
o: 'ᵒ',
p: 'ᵖ',
r: 'ʳ',
s: 'ˢ',
t: 'ᵗ',
u: 'ᵘ',
v: 'ᵛ',
w: 'ʷ',
x: 'ˣ',
y: 'ʸ',
z: 'ᶻ'
}
const SUBSCRIPT: Record<string, string> = {
'0': '₀',
'1': '₁',
'2': '₂',
'3': '₃',
'4': '₄',
'5': '₅',
'6': '₆',
'7': '₇',
'8': '₈',
'9': '₉',
'+': '₊',
'-': '₋',
'=': '₌',
'(': '₍',
')': '₎',
a: 'ₐ',
e: 'ₑ',
h: 'ₕ',
i: 'ᵢ',
j: 'ⱼ',
k: 'ₖ',
l: 'ₗ',
m: 'ₘ',
n: 'ₙ',
o: 'ₒ',
p: 'ₚ',
r: 'ᵣ',
s: 'ₛ',
t: 'ₜ',
u: 'ᵤ',
v: 'ᵥ',
x: 'ₓ'
}
// Sentinel control characters used to mark `\boxed` / `\fbox` regions in
// the converted output. The renderer splits on these to apply a highlight
// style; consumers that don't want highlighting can strip them with the
// exported `BOX_RE` below.
export const BOX_OPEN = '\u0001'
export const BOX_CLOSE = '\u0002'
export const BOX_RE = /\u0001([^\u0001\u0002]*)\u0002/g
const escapeRe = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
// Pre-compile two symbol regexes: one for letter-ending commands (`\pi`,
// `\sum`) which need a `(?![A-Za-z])` lookahead so they don't partially
// match `\pix` or `\summa`, and one for punctuation-ending commands
// (`\{`, `\,`, `\|`) which must NOT have the lookahead — otherwise
// `\{p` would refuse to substitute because `p` is a letter.
//
// Longest commands first inside each group so `\leq` beats `\le`.
const splitByEnding = (keys: string[]) => {
const letter: string[] = []
const punct: string[] = []
for (const k of keys) {
if (/[A-Za-z]$/.test(k)) {
letter.push(k)
} else {
punct.push(k)
}
}
return { letter, punct }
}
const buildAlt = (cmds: string[]) =>
cmds
.sort((a, b) => b.length - a.length)
.map(escapeRe)
.join('|')
const { letter: LETTER_CMDS, punct: PUNCT_CMDS } = splitByEnding(Object.keys(SYMBOLS))
const SYMBOL_LETTER_RE = new RegExp('(?:' + buildAlt(LETTER_CMDS) + ')(?![A-Za-z])', 'g')
const SYMBOL_PUNCT_RE = new RegExp('(?:' + buildAlt(PUNCT_CMDS) + ')', 'g')
const convertScript = (input: string, table: Record<string, string>, sigil: '^' | '_'): string => {
let out = ''
let allMapped = true
for (const ch of input) {
const mapped = table[ch]
if (!mapped) {
allMapped = false
break
}
out += mapped
}
if (allMapped) {
return out
}
// Fallback: if the body is a single visible character (e.g. `∞` after
// earlier symbol substitution), render it without braces — `^∞` reads
// far better than `^{∞}` in a terminal. Multi-char bodies that don't
// fully convert use parens (`e^(iπ)`) instead of braces (`e^{iπ}`)
// because parens are normal punctuation while braces look like
// unrendered LaTeX.
const trimmed = input.trim()
if ([...trimmed].length === 1) {
return `${sigil}${trimmed}`
}
return `${sigil}(${trimmed})`
}
// Walk the string and parse `{...}` honouring nested braces. Unlike a
// `\{[^{}]*\}` regex this survives `\frac{|t|^{p-1}|P(t)|^p}{...}` where
// the numerator contains its own braces from a superscript. Returns the
// inner content (without the outer braces) and the offset just past the
// closing `}`. Returns null if there is no balanced brace at `start`.
const readBraced = (s: string, start: number): { content: string; end: number } | null => {
if (s[start] !== '{') {
return null
}
let depth = 1
let i = start + 1
while (i < s.length && depth > 0) {
const c = s[i]
// Skip escapes — `\{` and `\}` inside a body are literal braces and
// should not change the brace counter.
if (c === '\\' && i + 1 < s.length) {
i += 2
continue
}
if (c === '{') {
depth++
} else if (c === '}') {
depth--
}
if (depth > 0) {
i++
}
}
if (depth !== 0) {
return null
}
return { content: s.slice(start + 1, i), end: i + 1 }
}
// Replace every occurrence of `\command{arg}` using balanced-brace parsing
// (so `\boxed{x^{n+1}}` works where a `[^{}]*` regex would fail). The
// `render` callback receives the inner content already recursed-into, so
// `\boxed{\boxed{x}}` resolves outside-in cleanly. Unmatched `\command`
// (no following `{...}`) is preserved verbatim.
const replaceBracedCommand = (input: string, command: string, render: (content: string) => string): string => {
const cmdLen = command.length
let out = ''
let i = 0
while (i < input.length) {
const idx = input.indexOf(command, i)
if (idx < 0) {
out += input.slice(i)
return out
}
const after = input[idx + cmdLen]
if (after && /[A-Za-z]/.test(after)) {
out += input.slice(i, idx + cmdLen)
i = idx + cmdLen
continue
}
out += input.slice(i, idx)
let p = idx + cmdLen
while (input[p] === ' ' || input[p] === '\t') p++
const arg = readBraced(input, p)
if (!arg) {
out += input.slice(idx, p + 1)
i = p + 1
continue
}
out += render(replaceBracedCommand(arg.content, command, render))
i = arg.end
}
return out
}
// Replace every `\frac{num}{den}` with `num/den` (parens around either
// side when its precedence demands it). The recursion handles nested
// fractions naturally: `\frac{1}{\frac{1}{x}}` collapses to `1/(1/x)`
// because we recurse into `den` before deciding whether to parenthesise.
const replaceFracs = (input: string): string => {
let out = ''
let i = 0
while (i < input.length) {
const idx = input.indexOf('\\frac', i)
if (idx < 0) {
out += input.slice(i)
return out
}
const after = input[idx + 5]
// `(?![A-Za-z])` — protect hypothetical commands like `\fraction`.
if (after && /[A-Za-z]/.test(after)) {
out += input.slice(i, idx + 5)
i = idx + 5
continue
}
out += input.slice(i, idx)
let p = idx + 5
while (input[p] === ' ' || input[p] === '\t') p++
const num = readBraced(input, p)
if (!num) {
out += input.slice(idx, p + 1)
i = p + 1
continue
}
p = num.end
while (input[p] === ' ' || input[p] === '\t') p++
const den = readBraced(input, p)
if (!den) {
out += input.slice(idx, p + 1)
i = p + 1
continue
}
out += `${wrapForFrac(replaceFracs(num.content))}/${wrapForFrac(replaceFracs(den.content))}`
i = den.end
}
return out
}
// Wrap multi-token expressions in parens so `\frac{a+b}{c}` becomes
// `(a+b)/c` rather than `a+b/c`. We wrap whenever inline `/` would
// change the meaning — that's any binary operator (`+`, `-`, `*`, `/`)
// or whitespace separating tokens. `*` and `/` matter because nested
// fractions and products like `\frac{a*b}{c}` and `\frac{1/x}{y}` would
// otherwise read as `a*b/c` (right-associative ambiguity) and `1/x/y`.
// Atomic factors like `n!`, `x^2`, `\sin x` don't trigger any of these
// and stay un-parenthesised — wrapping them just clutters the output.
const wrapForFrac = (expr: string) => {
const trimmed = expr.trim()
if (!trimmed) {
return trimmed
}
if (/^\(.*\)$/.test(trimmed)) {
return trimmed
}
if (/[+\-/*]|\s/.test(trimmed)) {
return `(${trimmed})`
}
return trimmed
}
export function texToUnicode(input: string): string {
let s = input
s = s.replace(/\\mathbb\s*\{([A-Za-z])\}/g, (raw, c: string) => BB[c] ?? raw)
s = s.replace(/\\mathcal\s*\{([A-Za-z])\}/g, (raw, c: string) => CAL[c] ?? raw)
s = s.replace(/\\mathfrak\s*\{([A-Za-z])\}/g, (raw, c: string) => FRAK[c] ?? raw)
s = s.replace(/\\mathbf\s*\{([^{}]+)\}/g, (_, c: string) => c)
s = s.replace(/\\mathit\s*\{([^{}]+)\}/g, (_, c: string) => c)
s = s.replace(/\\mathrm\s*\{([^{}]+)\}/g, (_, c: string) => c)
s = s.replace(/\\text\s*\{([^{}]+)\}/g, (_, c: string) => c)
s = s.replace(/\\operatorname\s*\{([^{}]+)\}/g, (_, c: string) => c)
s = s.replace(/\\overline\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0305`)
s = s.replace(/\\hat\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0302`)
s = s.replace(/\\bar\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0304`)
s = s.replace(/\\tilde\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0303`)
s = s.replace(/\\vec\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u20D7`)
s = s.replace(/\\dot\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0307`)
s = s.replace(/\\ddot\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0308`)
s = replaceFracs(s)
// `\boxed{X}` / `\fbox{X}` highlight a final answer. Terminals can't
// draw a real box, so we wrap the content in U+0001 / U+0002 control
// characters — non-printable, never present in real text — and let the
// markdown renderer split on them and apply a highlight style (inverse
// video) to the bracketed region. This keeps `texToUnicode` pure-string
// while letting the React layer do the actual visual emphasis.
// Argument is parsed with balanced braces so nested `{...}` from
// superscripts / fractions inside the box survive.
s = replaceBracedCommand(s, '\\boxed', body => `${BOX_OPEN}${body.trim()}${BOX_CLOSE}`)
s = replaceBracedCommand(s, '\\fbox', body => `${BOX_OPEN}${body.trim()}${BOX_CLOSE}`)
// `\xrightarrow{label}` / `\xleftarrow{label}` collapse to an arrow with
// the label inline. LaTeX renders the label above the arrow; in monospace
// we put it adjacent — `─label→` is the closest readable approximation.
// Run before the symbol pass so the label can still pick up Greek and
// operator substitutions afterwards.
s = s.replace(/\\xrightarrow\s*\{([^{}]*)\}/g, (_, label: string) => `${label.trim()}`)
s = s.replace(/\\xleftarrow\s*\{([^{}]*)\}/g, (_, label: string) => `${label.trim()}`)
s = s.replace(/\\Longrightarrow/g, '⟹')
s = s.replace(/\\Longleftarrow/g, '⟸')
s = s.replace(/\\Longleftrightarrow/g, '⟺')
// `\pmod{p}` → ` (mod p)` (LaTeX adds parens automatically); `\pod{p}`
// is a paren-less variant; `\tag{n}` is the equation-number annotation
// shown to the right of an equation. Collapse to a single-space-prefixed
// bracketed form. The leading `\s*` in the pattern absorbs any whitespace
// already in the source so we don't end up with `b (mod p)` (double
// space) when the user wrote `b \pmod{p}`.
s = s.replace(/\s*\\pmod\s*\{([^{}]*)\}/g, (_, p: string) => ` (mod ${p.trim()})`)
s = s.replace(/\s*\\pod\s*\{([^{}]*)\}/g, (_, p: string) => ` (${p.trim()})`)
s = s.replace(/\s*\\tag\s*\{([^{}]*)\}/g, (_, n: string) => ` (${n.trim()})`)
// `\big`, `\Big`, `\bigg`, `\Bigg` (with optional `l`/`r`/`m` suffix)
// are sizing wrappers analogous to `\left`/`\right` but without the
// automatic-pairing semantics. Strip them and leave whatever delimiter
// follows. The trailing `(?![A-Za-z])` protects `\bigtriangleup` and
// any other letter-continuation command from being shaved.
s = s.replace(/\\(?:Bigg|bigg|Big|big)[lrm]?(?![A-Za-z])/g, '')
// Style / size hints that don't typeset any glyph and only affect how
// things would be sized in a real LaTeX engine. In a terminal every
// glyph is one monospace cell, so there's nothing to do — drop them
// (with any trailing whitespace) so they don't leak through as raw
// `\displaystyle` in the output.
s = s.replace(/\\(?:scriptscriptstyle|displaystyle|scriptstyle|textstyle|nolimits|limits)(?![A-Za-z])\s*/g, '')
// `\left` and `\right` are sizing wrappers around any delimiter — bare
// (`\left(`), escaped (`\left\{`), or named (`\left\langle`). Strip the
// wrapper unconditionally and let the rest of the pipeline (or the
// upcoming symbol pass) handle whatever delimiter follows. The optional
// `.?` consumes `\left.` / `\right.` which mean "no delimiter".
// Lookahead `(?![A-Za-z])` keeps `\leftarrow` / `\leftrightarrow` safe.
s = s.replace(/\\left(?![A-Za-z])\.?/g, '')
s = s.replace(/\\right(?![A-Za-z])\.?/g, '')
// Run symbol substitution BEFORE scripts so a body like `^{\infty}`
// becomes `^{∞}` first; convertScript can then either map ∞ to a
// superscript (it can't — Unicode lacks one) or fall back to `^∞`
// by stripping braces around the now-single-character body.
//
// Punctuation pass first — these can be followed by letters (`\{p`
// is "open-brace then p"), so the letter pass's `(?![A-Za-z])` rule
// would wrongly block them.
s = s.replace(SYMBOL_PUNCT_RE, m => SYMBOLS[m] ?? m)
s = s.replace(SYMBOL_LETTER_RE, m => SYMBOLS[m] ?? m)
// Bare `^c` / `_c` handles ONLY alphanumerics and `+`/`-`/`=`. Parens
// are intentionally excluded because the braced-fallback above can
// emit `(...)` and we don't want a second pass to greedily convert
// its opening paren into `⁽` and orphan the closing one.
s = s.replace(/\^\s*\{([^{}]+)\}/g, (_, body: string) => convertScript(body, SUPERSCRIPT, '^'))
s = s.replace(/\^([A-Za-z0-9+\-=])/g, (raw, ch: string) => SUPERSCRIPT[ch] ?? raw)
s = s.replace(/_\s*\{([^{}]+)\}/g, (_, body: string) => convertScript(body, SUBSCRIPT, '_'))
s = s.replace(/_([A-Za-z0-9+\-=])/g, (raw, ch: string) => SUBSCRIPT[ch] ?? raw)
return s
}