mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
fix: account for latex
This commit is contained in:
parent
c3d39feb3a
commit
cb039ac000
3 changed files with 367 additions and 21 deletions
|
|
@ -1,6 +1,8 @@
|
||||||
import { describe, expect, it } from 'vitest'
|
import { describe, expect, it } from 'vitest'
|
||||||
|
|
||||||
import { texToUnicode } from '../lib/mathUnicode.js'
|
import { BOX_CLOSE, BOX_OPEN, BOX_RE, texToUnicode } from '../lib/mathUnicode.js'
|
||||||
|
|
||||||
|
const stripBox = (s: string) => s.replace(BOX_RE, '$1')
|
||||||
|
|
||||||
describe('texToUnicode — symbols', () => {
|
describe('texToUnicode — symbols', () => {
|
||||||
it('substitutes lowercase Greek', () => {
|
it('substitutes lowercase Greek', () => {
|
||||||
|
|
@ -116,6 +118,100 @@ describe('texToUnicode — fractions', () => {
|
||||||
it('handles nested fractions', () => {
|
it('handles nested fractions', () => {
|
||||||
expect(texToUnicode('\\frac{1}{\\frac{1}{x}}')).toBe('1/(1/x)')
|
expect(texToUnicode('\\frac{1}{\\frac{1}{x}}')).toBe('1/(1/x)')
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('handles braces inside numerator / denominator (regression: regex \\frac couldn\'t)', () => {
|
||||||
|
// The regex-only `\frac` matcher used `[^{}]*` for each arg, which
|
||||||
|
// failed the moment a numerator contained its own braces (here the
|
||||||
|
// `{p-1}` from a superscript). The balanced-brace parser handles it.
|
||||||
|
expect(texToUnicode('\\frac{|t|^{p-1}|P(t)|^p}{(p-1)!}')).toBe('(|t|ᵖ⁻¹|P(t)|ᵖ)/((p-1)!)')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('preserves \\frac when arguments are malformed', () => {
|
||||||
|
expect(texToUnicode('\\frac{a}')).toBe('\\frac{a}')
|
||||||
|
expect(texToUnicode('\\fraction{a}{b}')).toBe('\\fraction{a}{b}')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('texToUnicode — typography no-ops', () => {
|
||||||
|
it('strips \\displaystyle / \\textstyle / \\scriptstyle / \\scriptscriptstyle', () => {
|
||||||
|
expect(texToUnicode('\\displaystyle\\sum_{i=1}^n x_i')).toBe('∑ᵢ₌₁ⁿ xᵢ')
|
||||||
|
expect(texToUnicode('f(x) = \\displaystyle \\frac{1}{2}')).toBe('f(x) = 1/2')
|
||||||
|
expect(texToUnicode('\\textstyle x + y')).toBe('x + y')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('strips \\limits / \\nolimits which only affect bound positioning', () => {
|
||||||
|
expect(texToUnicode('\\sum\\limits_{k=1}^n a_k')).toBe('∑ₖ₌₁ⁿ aₖ')
|
||||||
|
expect(texToUnicode('\\int\\nolimits_0^1 f(x) dx')).toBe('∫₀¹ f(x) dx')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not eat letter-continuation commands like \\limit_inf', () => {
|
||||||
|
// The `(?![A-Za-z])` lookahead protects hypothetical commands that
|
||||||
|
// start with `\limit` / `\display` / etc. The bare names are stripped
|
||||||
|
// but anything longer is preserved verbatim.
|
||||||
|
expect(texToUnicode('\\limitinf x')).toBe('\\limitinf x')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('texToUnicode — sizing wrappers', () => {
|
||||||
|
it('strips \\big / \\Big / \\bigg / \\Bigg before delimiters', () => {
|
||||||
|
expect(texToUnicode('\\bigl[ x \\bigr]')).toBe('[ x ]')
|
||||||
|
expect(texToUnicode('\\Big( y \\Big)')).toBe('( y )')
|
||||||
|
expect(texToUnicode('\\bigg| z \\bigg|')).toBe('| z |')
|
||||||
|
expect(texToUnicode('\\Biggl\\{ a \\Biggr\\}')).toBe('{ a }')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not eat \\bigtriangleup or other letter-continuations', () => {
|
||||||
|
expect(texToUnicode('A \\bigtriangleup B')).toBe('A \\bigtriangleup B')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('texToUnicode — modular arithmetic and tags', () => {
|
||||||
|
it('renders \\pmod{p} as " (mod p)"', () => {
|
||||||
|
expect(texToUnicode('a \\equiv b \\pmod{p}')).toBe('a ≡ b (mod p)')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('renders \\bmod / \\mod inline', () => {
|
||||||
|
expect(texToUnicode('a \\bmod n')).toBe('a mod n')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('collapses \\tag{n} to " (n)"', () => {
|
||||||
|
expect(texToUnicode('x = y \\tag{24}')).toBe('x = y (24)')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('texToUnicode — newly added symbols', () => {
|
||||||
|
it('renders \\nmid, \\blacksquare, \\qed', () => {
|
||||||
|
expect(texToUnicode('p \\nmid q')).toBe('p ∤ q')
|
||||||
|
expect(texToUnicode('Therefore \\blacksquare')).toBe('Therefore ■')
|
||||||
|
expect(texToUnicode('done \\qed')).toBe('done ∎')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
describe('texToUnicode — \\boxed / \\fbox', () => {
|
||||||
|
// `\boxed` produces non-printable U+0001 / U+0002 sentinels around its
|
||||||
|
// content so the markdown renderer can apply highlight styling. These
|
||||||
|
// tests assert both the sentinel form and the human-readable
|
||||||
|
// strip-fallback (BOX_RE).
|
||||||
|
it('wraps simple boxed content in BOX_OPEN/BOX_CLOSE sentinels', () => {
|
||||||
|
expect(texToUnicode('\\boxed{x = 0}')).toBe(`${BOX_OPEN}x = 0${BOX_CLOSE}`)
|
||||||
|
expect(stripBox(texToUnicode('\\boxed{x = 0}'))).toBe('x = 0')
|
||||||
|
expect(stripBox(texToUnicode('\\fbox{answer}'))).toBe('answer')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('handles boxed expressions with nested braces (regression: regex couldn\'t)', () => {
|
||||||
|
// A `[^{}]*` regex would stop at the first `{` inside the body. The
|
||||||
|
// balanced-brace parser walks past it.
|
||||||
|
expect(stripBox(texToUnicode('\\boxed{x^{n+1}}'))).toBe('xⁿ⁺¹')
|
||||||
|
expect(stripBox(texToUnicode('\\boxed{\\frac{a}{b}}'))).toBe('a/b')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('handles real-world boxed final answer', () => {
|
||||||
|
expect(stripBox(texToUnicode('\\boxed{J = -\\sum_{k=0}^n a_k F(k)}'))).toBe('J = -∑ₖ₌₀ⁿ aₖ F(k)')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('preserves \\boxed without a brace argument', () => {
|
||||||
|
expect(texToUnicode('\\boxed something')).toBe('\\boxed something')
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('texToUnicode — combining marks', () => {
|
describe('texToUnicode — combining marks', () => {
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,59 @@ import { Box, Link, Text } from '@hermes/ink'
|
||||||
import { memo, type ReactNode, useMemo } from 'react'
|
import { memo, type ReactNode, useMemo } from 'react'
|
||||||
|
|
||||||
import { ensureEmojiPresentation } from '../lib/emoji.js'
|
import { ensureEmojiPresentation } from '../lib/emoji.js'
|
||||||
import { texToUnicode } from '../lib/mathUnicode.js'
|
import { BOX_CLOSE, BOX_OPEN, texToUnicode } from '../lib/mathUnicode.js'
|
||||||
import { highlightLine, isHighlightable } from '../lib/syntax.js'
|
import { highlightLine, isHighlightable } from '../lib/syntax.js'
|
||||||
import type { Theme } from '../theme.js'
|
import type { Theme } from '../theme.js'
|
||||||
|
|
||||||
|
// `\boxed{X}` regions in `texToUnicode` output are marked with the
|
||||||
|
// non-printable U+0001 / U+0002 sentinels. Split on them and render the
|
||||||
|
// boxed segment with `inverse + bold` so it reads as a highlighter-pen
|
||||||
|
// emphasis on top of whatever color the parent `<Text>` is using (amber
|
||||||
|
// for math). The leading / trailing space inside the highlight gives a
|
||||||
|
// one-cell visual margin so the highlight reads as a block, not a hug.
|
||||||
|
const renderMath = (text: string): ReactNode => {
|
||||||
|
if (!text.includes(BOX_OPEN)) {
|
||||||
|
return text
|
||||||
|
}
|
||||||
|
|
||||||
|
const out: ReactNode[] = []
|
||||||
|
let i = 0
|
||||||
|
let key = 0
|
||||||
|
|
||||||
|
while (i < text.length) {
|
||||||
|
const start = text.indexOf(BOX_OPEN, i)
|
||||||
|
|
||||||
|
if (start < 0) {
|
||||||
|
out.push(text.slice(i))
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if (start > i) {
|
||||||
|
out.push(text.slice(i, start))
|
||||||
|
}
|
||||||
|
|
||||||
|
const end = text.indexOf(BOX_CLOSE, start + 1)
|
||||||
|
|
||||||
|
if (end < 0) {
|
||||||
|
out.push(text.slice(start))
|
||||||
|
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
out.push(
|
||||||
|
<Text bold inverse key={key++}>
|
||||||
|
{' '}
|
||||||
|
{text.slice(start + 1, end)}{' '}
|
||||||
|
</Text>
|
||||||
|
)
|
||||||
|
|
||||||
|
i = end + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
const FENCE_RE = /^\s*(`{3,}|~{3,})(.*)$/
|
const FENCE_RE = /^\s*(`{3,}|~{3,})(.*)$/
|
||||||
const FENCE_CLOSE_RE = /^\s*(`{3,}|~{3,})\s*$/
|
const FENCE_CLOSE_RE = /^\s*(`{3,}|~{3,})\s*$/
|
||||||
const HR_RE = /^ {0,3}([-*_])(?:\s*\1){2,}\s*$/
|
const HR_RE = /^ {0,3}([-*_])(?:\s*\1){2,}\s*$/
|
||||||
|
|
@ -171,31 +220,39 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
|
||||||
} else if (m[6]) {
|
} else if (m[6]) {
|
||||||
parts.push(
|
parts.push(
|
||||||
<Text key={parts.length} strikethrough>
|
<Text key={parts.length} strikethrough>
|
||||||
{m[6]}
|
<MdInline t={t} text={m[6]} />
|
||||||
</Text>
|
</Text>
|
||||||
)
|
)
|
||||||
} else if (m[7]) {
|
} else if (m[7]) {
|
||||||
|
// Code is the one wrap that does NOT recurse — inline `code` spans
|
||||||
|
// are verbatim by definition. Letting MdInline reprocess them
|
||||||
|
// would corrupt regex examples and shell snippets.
|
||||||
parts.push(
|
parts.push(
|
||||||
<Text color={t.color.amber} dimColor key={parts.length}>
|
<Text color={t.color.amber} dimColor key={parts.length}>
|
||||||
{m[7]}
|
{m[7]}
|
||||||
</Text>
|
</Text>
|
||||||
)
|
)
|
||||||
} else if (m[8] ?? m[9]) {
|
} else if (m[8] ?? m[9]) {
|
||||||
|
// Recurse into bold / italic / strike / highlight so nested
|
||||||
|
// `$...$` math (and other inline tokens) inside a `**bolded
|
||||||
|
// statement with $\mathbb{Z}$ math**` actually render. Without
|
||||||
|
// this the inner content is dropped into a single `<Text bold>`
|
||||||
|
// verbatim and the math renderer never sees it.
|
||||||
parts.push(
|
parts.push(
|
||||||
<Text bold key={parts.length}>
|
<Text bold key={parts.length}>
|
||||||
{m[8] ?? m[9]}
|
<MdInline t={t} text={m[8] ?? m[9]!} />
|
||||||
</Text>
|
</Text>
|
||||||
)
|
)
|
||||||
} else if (m[10] ?? m[11]) {
|
} else if (m[10] ?? m[11]) {
|
||||||
parts.push(
|
parts.push(
|
||||||
<Text italic key={parts.length}>
|
<Text italic key={parts.length}>
|
||||||
{m[10] ?? m[11]}
|
<MdInline t={t} text={m[10] ?? m[11]!} />
|
||||||
</Text>
|
</Text>
|
||||||
)
|
)
|
||||||
} else if (m[12]) {
|
} else if (m[12]) {
|
||||||
parts.push(
|
parts.push(
|
||||||
<Text backgroundColor={t.color.diffAdded} color={t.color.diffAddedWord} key={parts.length}>
|
<Text backgroundColor={t.color.diffAdded} color={t.color.diffAddedWord} key={parts.length}>
|
||||||
{m[12]}
|
<MdInline t={t} text={m[12]} />
|
||||||
</Text>
|
</Text>
|
||||||
)
|
)
|
||||||
} else if (m[13]) {
|
} else if (m[13]) {
|
||||||
|
|
@ -236,7 +293,7 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
|
||||||
// raw LaTeX rather than vanishing.
|
// raw LaTeX rather than vanishing.
|
||||||
parts.push(
|
parts.push(
|
||||||
<Text color={t.color.amber} italic key={parts.length}>
|
<Text color={t.color.amber} italic key={parts.length}>
|
||||||
{texToUnicode(m[17] ?? m[18]!)}
|
{renderMath(texToUnicode(m[17] ?? m[18]!))}
|
||||||
</Text>
|
</Text>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
@ -456,9 +513,7 @@ function MdImpl({ compact, t, text }: MdProps) {
|
||||||
start('code')
|
start('code')
|
||||||
nodes.push(
|
nodes.push(
|
||||||
<Box flexDirection="column" key={key} paddingLeft={2}>
|
<Box flexDirection="column" key={key} paddingLeft={2}>
|
||||||
<Text color={t.color.dim}>─ math</Text>
|
{inner ? <Text color={t.color.amber}>{renderMath(texToUnicode(inner))}</Text> : null}
|
||||||
|
|
||||||
{inner ? <Text color={t.color.amber}>{texToUnicode(inner)}</Text> : null}
|
|
||||||
</Box>
|
</Box>
|
||||||
)
|
)
|
||||||
i++
|
i++
|
||||||
|
|
@ -504,11 +559,9 @@ function MdImpl({ compact, t, text }: MdProps) {
|
||||||
start('code')
|
start('code')
|
||||||
nodes.push(
|
nodes.push(
|
||||||
<Box flexDirection="column" key={key} paddingLeft={2}>
|
<Box flexDirection="column" key={key} paddingLeft={2}>
|
||||||
<Text color={t.color.dim}>─ math</Text>
|
|
||||||
|
|
||||||
{block.map((l, j) => (
|
{block.map((l, j) => (
|
||||||
<Text color={t.color.amber} key={j}>
|
<Text color={t.color.amber} key={j}>
|
||||||
{texToUnicode(l)}
|
{renderMath(texToUnicode(l))}
|
||||||
</Text>
|
</Text>
|
||||||
))}
|
))}
|
||||||
</Box>
|
</Box>
|
||||||
|
|
|
||||||
|
|
@ -136,6 +136,20 @@ const SYMBOLS: Record<string, string> = {
|
||||||
'\\models': '⊨',
|
'\\models': '⊨',
|
||||||
'\\vdash': '⊢',
|
'\\vdash': '⊢',
|
||||||
'\\mid': '∣',
|
'\\mid': '∣',
|
||||||
|
'\\nmid': '∤',
|
||||||
|
'\\divides': '∣',
|
||||||
|
|
||||||
|
// Common standalone glyphs
|
||||||
|
'\\blacksquare': '■',
|
||||||
|
'\\square': '□',
|
||||||
|
'\\Box': '□',
|
||||||
|
'\\qed': '∎',
|
||||||
|
'\\bigstar': '★',
|
||||||
|
|
||||||
|
// Modular arithmetic — the `\pmod{p}` form (with arg) is handled below;
|
||||||
|
// the bare `\bmod` / `\mod` commands are simple text substitutions.
|
||||||
|
'\\bmod': 'mod',
|
||||||
|
'\\mod': 'mod',
|
||||||
|
|
||||||
// Brackets / fences (named delimiter commands; the `\left\X` / `\right\X`
|
// Brackets / fences (named delimiter commands; the `\left\X` / `\right\X`
|
||||||
// unwrapping below leaves these behind for the symbol pass to resolve).
|
// unwrapping below leaves these behind for the symbol pass to resolve).
|
||||||
|
|
@ -403,6 +417,14 @@ const SUBSCRIPT: Record<string, string> = {
|
||||||
x: 'ₓ'
|
x: 'ₓ'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sentinel control characters used to mark `\boxed` / `\fbox` regions in
|
||||||
|
// the converted output. The renderer splits on these to apply a highlight
|
||||||
|
// style; consumers that don't want highlighting can strip them with the
|
||||||
|
// exported `BOX_RE` below.
|
||||||
|
export const BOX_OPEN = '\u0001'
|
||||||
|
export const BOX_CLOSE = '\u0002'
|
||||||
|
export const BOX_RE = /\u0001([^\u0001\u0002]*)\u0002/g
|
||||||
|
|
||||||
const escapeRe = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
const escapeRe = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
||||||
|
|
||||||
// Pre-compile two symbol regexes: one for letter-ending commands (`\pi`,
|
// Pre-compile two symbol regexes: one for letter-ending commands (`\pi`,
|
||||||
|
|
@ -473,6 +495,154 @@ const convertScript = (input: string, table: Record<string, string>, sigil: '^'
|
||||||
return `${sigil}(${trimmed})`
|
return `${sigil}(${trimmed})`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Walk the string and parse `{...}` honouring nested braces. Unlike a
|
||||||
|
// `\{[^{}]*\}` regex this survives `\frac{|t|^{p-1}|P(t)|^p}{...}` where
|
||||||
|
// the numerator contains its own braces from a superscript. Returns the
|
||||||
|
// inner content (without the outer braces) and the offset just past the
|
||||||
|
// closing `}`. Returns null if there is no balanced brace at `start`.
|
||||||
|
const readBraced = (s: string, start: number): { content: string; end: number } | null => {
|
||||||
|
if (s[start] !== '{') {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
let depth = 1
|
||||||
|
let i = start + 1
|
||||||
|
|
||||||
|
while (i < s.length && depth > 0) {
|
||||||
|
const c = s[i]
|
||||||
|
|
||||||
|
// Skip escapes — `\{` and `\}` inside a body are literal braces and
|
||||||
|
// should not change the brace counter.
|
||||||
|
if (c === '\\' && i + 1 < s.length) {
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c === '{') {
|
||||||
|
depth++
|
||||||
|
} else if (c === '}') {
|
||||||
|
depth--
|
||||||
|
}
|
||||||
|
|
||||||
|
if (depth > 0) {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (depth !== 0) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
return { content: s.slice(start + 1, i), end: i + 1 }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace every occurrence of `\command{arg}` using balanced-brace parsing
|
||||||
|
// (so `\boxed{x^{n+1}}` works where a `[^{}]*` regex would fail). The
|
||||||
|
// `render` callback receives the inner content already recursed-into, so
|
||||||
|
// `\boxed{\boxed{x}}` resolves outside-in cleanly. Unmatched `\command`
|
||||||
|
// (no following `{...}`) is preserved verbatim.
|
||||||
|
const replaceBracedCommand = (input: string, command: string, render: (content: string) => string): string => {
|
||||||
|
const cmdLen = command.length
|
||||||
|
let out = ''
|
||||||
|
let i = 0
|
||||||
|
|
||||||
|
while (i < input.length) {
|
||||||
|
const idx = input.indexOf(command, i)
|
||||||
|
|
||||||
|
if (idx < 0) {
|
||||||
|
out += input.slice(i)
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
const after = input[idx + cmdLen]
|
||||||
|
|
||||||
|
if (after && /[A-Za-z]/.test(after)) {
|
||||||
|
out += input.slice(i, idx + cmdLen)
|
||||||
|
i = idx + cmdLen
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
out += input.slice(i, idx)
|
||||||
|
|
||||||
|
let p = idx + cmdLen
|
||||||
|
|
||||||
|
while (input[p] === ' ' || input[p] === '\t') p++
|
||||||
|
|
||||||
|
const arg = readBraced(input, p)
|
||||||
|
|
||||||
|
if (!arg) {
|
||||||
|
out += input.slice(idx, p + 1)
|
||||||
|
i = p + 1
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
out += render(replaceBracedCommand(arg.content, command, render))
|
||||||
|
i = arg.end
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace every `\frac{num}{den}` with `num/den` (parens around either
|
||||||
|
// side when its precedence demands it). The recursion handles nested
|
||||||
|
// fractions naturally: `\frac{1}{\frac{1}{x}}` collapses to `1/(1/x)`
|
||||||
|
// because we recurse into `den` before deciding whether to parenthesise.
|
||||||
|
const replaceFracs = (input: string): string => {
|
||||||
|
let out = ''
|
||||||
|
let i = 0
|
||||||
|
|
||||||
|
while (i < input.length) {
|
||||||
|
const idx = input.indexOf('\\frac', i)
|
||||||
|
|
||||||
|
if (idx < 0) {
|
||||||
|
out += input.slice(i)
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
const after = input[idx + 5]
|
||||||
|
|
||||||
|
// `(?![A-Za-z])` — protect hypothetical commands like `\fraction`.
|
||||||
|
if (after && /[A-Za-z]/.test(after)) {
|
||||||
|
out += input.slice(i, idx + 5)
|
||||||
|
i = idx + 5
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
out += input.slice(i, idx)
|
||||||
|
|
||||||
|
let p = idx + 5
|
||||||
|
|
||||||
|
while (input[p] === ' ' || input[p] === '\t') p++
|
||||||
|
|
||||||
|
const num = readBraced(input, p)
|
||||||
|
|
||||||
|
if (!num) {
|
||||||
|
out += input.slice(idx, p + 1)
|
||||||
|
i = p + 1
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
p = num.end
|
||||||
|
|
||||||
|
while (input[p] === ' ' || input[p] === '\t') p++
|
||||||
|
|
||||||
|
const den = readBraced(input, p)
|
||||||
|
|
||||||
|
if (!den) {
|
||||||
|
out += input.slice(idx, p + 1)
|
||||||
|
i = p + 1
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
out += `${wrapForFrac(replaceFracs(num.content))}/${wrapForFrac(replaceFracs(den.content))}`
|
||||||
|
i = den.end
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
// Wrap multi-token expressions in parens so `\frac{a+b}{c}` becomes
|
// Wrap multi-token expressions in parens so `\frac{a+b}{c}` becomes
|
||||||
// `(a+b)/c` rather than `a+b/c`. We only wrap when the expression has
|
// `(a+b)/c` rather than `a+b/c`. We only wrap when the expression has
|
||||||
// loose precedence — additive operators or whitespace that would change
|
// loose precedence — additive operators or whitespace that would change
|
||||||
|
|
@ -516,15 +686,18 @@ export function texToUnicode(input: string): string {
|
||||||
s = s.replace(/\\dot\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0307`)
|
s = s.replace(/\\dot\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0307`)
|
||||||
s = s.replace(/\\ddot\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0308`)
|
s = s.replace(/\\ddot\s*\{([^{}]+)\}/g, (_, c: string) => `${c}\u0308`)
|
||||||
|
|
||||||
// Apply \frac repeatedly so nested fractions resolve from the inside
|
s = replaceFracs(s)
|
||||||
// out — `\frac{1}{1+\frac{1}{x}}` collapses cleanly.
|
|
||||||
let prev = ''
|
|
||||||
let guard = 0
|
|
||||||
|
|
||||||
while (s !== prev && guard++ < 8) {
|
// `\boxed{X}` / `\fbox{X}` highlight a final answer. Terminals can't
|
||||||
prev = s
|
// draw a real box, so we wrap the content in U+0001 / U+0002 control
|
||||||
s = s.replace(/\\frac\s*\{([^{}]*)\}\s*\{([^{}]*)\}/g, (_, num: string, den: string) => `${wrapForFrac(num)}/${wrapForFrac(den)}`)
|
// characters — non-printable, never present in real text — and let the
|
||||||
}
|
// markdown renderer split on them and apply a highlight style (inverse
|
||||||
|
// video) to the bracketed region. This keeps `texToUnicode` pure-string
|
||||||
|
// while letting the React layer do the actual visual emphasis.
|
||||||
|
// Argument is parsed with balanced braces so nested `{...}` from
|
||||||
|
// superscripts / fractions inside the box survive.
|
||||||
|
s = replaceBracedCommand(s, '\\boxed', body => `${BOX_OPEN}${body.trim()}${BOX_CLOSE}`)
|
||||||
|
s = replaceBracedCommand(s, '\\fbox', body => `${BOX_OPEN}${body.trim()}${BOX_CLOSE}`)
|
||||||
|
|
||||||
// `\xrightarrow{label}` / `\xleftarrow{label}` collapse to an arrow with
|
// `\xrightarrow{label}` / `\xleftarrow{label}` collapse to an arrow with
|
||||||
// the label inline. LaTeX renders the label above the arrow; in monospace
|
// the label inline. LaTeX renders the label above the arrow; in monospace
|
||||||
|
|
@ -537,6 +710,30 @@ export function texToUnicode(input: string): string {
|
||||||
s = s.replace(/\\Longleftarrow/g, '⟸')
|
s = s.replace(/\\Longleftarrow/g, '⟸')
|
||||||
s = s.replace(/\\Longleftrightarrow/g, '⟺')
|
s = s.replace(/\\Longleftrightarrow/g, '⟺')
|
||||||
|
|
||||||
|
// `\pmod{p}` → ` (mod p)` (LaTeX adds parens automatically); `\pod{p}`
|
||||||
|
// is a paren-less variant; `\tag{n}` is the equation-number annotation
|
||||||
|
// shown to the right of an equation. Collapse to a single-space-prefixed
|
||||||
|
// bracketed form. The leading `\s*` in the pattern absorbs any whitespace
|
||||||
|
// already in the source so we don't end up with `b (mod p)` (double
|
||||||
|
// space) when the user wrote `b \pmod{p}`.
|
||||||
|
s = s.replace(/\s*\\pmod\s*\{([^{}]*)\}/g, (_, p: string) => ` (mod ${p.trim()})`)
|
||||||
|
s = s.replace(/\s*\\pod\s*\{([^{}]*)\}/g, (_, p: string) => ` (${p.trim()})`)
|
||||||
|
s = s.replace(/\s*\\tag\s*\{([^{}]*)\}/g, (_, n: string) => ` (${n.trim()})`)
|
||||||
|
|
||||||
|
// `\big`, `\Big`, `\bigg`, `\Bigg` (with optional `l`/`r`/`m` suffix)
|
||||||
|
// are sizing wrappers analogous to `\left`/`\right` but without the
|
||||||
|
// automatic-pairing semantics. Strip them and leave whatever delimiter
|
||||||
|
// follows. The trailing `(?![A-Za-z])` protects `\bigtriangleup` and
|
||||||
|
// any other letter-continuation command from being shaved.
|
||||||
|
s = s.replace(/\\(?:Bigg|bigg|Big|big)[lrm]?(?![A-Za-z])/g, '')
|
||||||
|
|
||||||
|
// Style / size hints that don't typeset any glyph and only affect how
|
||||||
|
// things would be sized in a real LaTeX engine. In a terminal every
|
||||||
|
// glyph is one monospace cell, so there's nothing to do — drop them
|
||||||
|
// (with any trailing whitespace) so they don't leak through as raw
|
||||||
|
// `\displaystyle` in the output.
|
||||||
|
s = s.replace(/\\(?:scriptscriptstyle|displaystyle|scriptstyle|textstyle|nolimits|limits)(?![A-Za-z])\s*/g, '')
|
||||||
|
|
||||||
// `\left` and `\right` are sizing wrappers around any delimiter — bare
|
// `\left` and `\right` are sizing wrappers around any delimiter — bare
|
||||||
// (`\left(`), escaped (`\left\{`), or named (`\left\langle`). Strip the
|
// (`\left(`), escaped (`\left\{`), or named (`\left\langle`). Strip the
|
||||||
// wrapper unconditionally and let the rest of the pipeline (or the
|
// wrapper unconditionally and let the rest of the pipeline (or the
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue