mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
Merge pull request #17175 from NousResearch/fix/markdown
feat(latex): latex in tui
This commit is contained in:
commit
430302c197
7 changed files with 1391 additions and 50 deletions
|
|
@ -2,9 +2,60 @@ import { Box, Link, Text } from '@hermes/ink'
|
|||
import { Fragment, memo, type ReactNode, useMemo } from 'react'
|
||||
|
||||
import { ensureEmojiPresentation } from '../lib/emoji.js'
|
||||
import { BOX_CLOSE, BOX_OPEN, texToUnicode } from '../lib/mathUnicode.js'
|
||||
import { highlightLine, isHighlightable } from '../lib/syntax.js'
|
||||
import type { Theme } from '../theme.js'
|
||||
|
||||
// `\boxed{X}` regions in `texToUnicode` output are marked with the
|
||||
// non-printable U+0001 / U+0002 sentinels. Split on them and render the
|
||||
// boxed segment with `inverse + bold` so it reads as a highlighter-pen
|
||||
// emphasis on top of whatever color the parent `<Text>` is using (the
|
||||
// theme accent for math). The leading / trailing space inside the
|
||||
// highlight gives a one-cell visual margin so the highlight reads as a
|
||||
// block, not a hug.
|
||||
const renderMath = (text: string): ReactNode => {
|
||||
if (!text.includes(BOX_OPEN)) {
|
||||
return text
|
||||
}
|
||||
|
||||
const out: ReactNode[] = []
|
||||
let i = 0
|
||||
let key = 0
|
||||
|
||||
while (i < text.length) {
|
||||
const start = text.indexOf(BOX_OPEN, i)
|
||||
|
||||
if (start < 0) {
|
||||
out.push(text.slice(i))
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
if (start > i) {
|
||||
out.push(text.slice(i, start))
|
||||
}
|
||||
|
||||
const end = text.indexOf(BOX_CLOSE, start + 1)
|
||||
|
||||
if (end < 0) {
|
||||
out.push(text.slice(start))
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
out.push(
|
||||
<Text bold inverse key={key++}>
|
||||
{' '}
|
||||
{text.slice(start + 1, end)}{' '}
|
||||
</Text>
|
||||
)
|
||||
|
||||
i = end + 1
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
const FENCE_RE = /^\s*(`{3,}|~{3,})(.*)$/
|
||||
const FENCE_CLOSE_RE = /^\s*(`{3,}|~{3,})\s*$/
|
||||
const HR_RE = /^ {0,3}([-*_])(?:\s*\1){2,}\s*$/
|
||||
|
|
@ -19,6 +70,15 @@ const QUOTE_RE = /^\s*(?:>\s*)+/
|
|||
const TABLE_DIVIDER_CELL_RE = /^:?-{3,}:?$/
|
||||
const MD_URL_RE = '((?:[^\\s()]|\\([^\\s()]*\\))+?)'
|
||||
|
||||
// Display math openers: `$$ ... $$` (TeX) and `\[ ... \]` (LaTeX). The
|
||||
// opener is matched only when `$$` / `\[` appears at the very start of the
|
||||
// trimmed line — `startsWith('$$')` used to fire on prose like
|
||||
// `$$x+y$$ followed by more`, opening a block that never closed because the
|
||||
// trailing `$$` on the same line was invisible to the close-scan loop.
|
||||
const MATH_BLOCK_OPEN_RE = /^\s*(\$\$|\\\[)(.*)$/
|
||||
const MATH_BLOCK_CLOSE_DOLLAR_RE = /^(.*?)\$\$\s*$/
|
||||
const MATH_BLOCK_CLOSE_BRACKET_RE = /^(.*?)\\\]\s*$/
|
||||
|
||||
export const MEDIA_LINE_RE = /^\s*[`"']?MEDIA:\s*(\S+?)[`"']?\s*$/
|
||||
export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/
|
||||
|
||||
|
|
@ -31,6 +91,13 @@ export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/
|
|||
// `thing ~! more ~?` from Kimi / Qwen / GLM (kaomoji-style decorators)
|
||||
// doesn't pair up the first `~` with the next one on the line and swallow
|
||||
// the text between them as a dim `_`-prefixed span.
|
||||
//
|
||||
// Inline math (`$x$` and `\(x\)`) takes precedence over emphasis at the
|
||||
// same start position because regex alternation is leftmost-first; a
|
||||
// dollar-delimited span at column N wins over a `*` at column N+1, so
|
||||
// `$P=a*b*c$` renders as math instead of having `*b*` corrupted into
|
||||
// italics. Single-character minimums and "no space adjacent to delimiter"
|
||||
// rules keep currency prose like `$5 to $10` from being swallowed.
|
||||
export const INLINE_RE = new RegExp(
|
||||
[
|
||||
`!\\[(.*?)\\]\\(${MD_URL_RE}\\)`, // 1,2 image
|
||||
|
|
@ -46,7 +113,13 @@ export const INLINE_RE = new RegExp(
|
|||
`\\[\\^([^\\]]+)\\]`, // 13 footnote ref
|
||||
`\\^([^^\\s][^^]*?)\\^`, // 14 superscript
|
||||
`~([A-Za-z0-9]{1,8})~`, // 15 subscript
|
||||
`https?:\\/\\/[^\\s<]+` // 16 bare URL
|
||||
`(https?:\\/\\/[^\\s<]+)`, // 16 bare URL — wrapped so it owns its own
|
||||
// capture group; without this, the math
|
||||
// spans below would land in m[16] and the
|
||||
// MdInline dispatcher would treat them as
|
||||
// bare URLs and render them as autolinks.
|
||||
`(?<!\\$)\\$([^\\s$](?:[^$\\n]*?[^\\s$])?)\\$(?!\\$)`, // 17 inline math $...$
|
||||
`\\\\\\(([^\\n]+?)\\\\\\)` // 18 inline math \(...\)
|
||||
].join('|'),
|
||||
'g'
|
||||
)
|
||||
|
|
@ -93,12 +166,14 @@ export const stripInlineMarkup = (v: string) =>
|
|||
.replace(/\[\^([^\]]+)\]/g, '[$1]')
|
||||
.replace(/\^([^^\s][^^]*?)\^/g, '^$1')
|
||||
.replace(/~([A-Za-z0-9]{1,8})~/g, '_$1')
|
||||
.replace(/(?<!\$)\$([^\s$](?:[^$\n]*?[^\s$])?)\$(?!\$)/g, '$1')
|
||||
.replace(/\\\(([^\n]+?)\\\)/g, '$1')
|
||||
|
||||
const renderTable = (k: number, rows: string[][], t: Theme) => {
|
||||
const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length)))
|
||||
|
||||
// Thin divider under the header. Without it tables look like prose
|
||||
// with extra spacing because the header is just amber-coloured text
|
||||
// with extra spacing because the header is just accent-coloured text
|
||||
// (#15534). We avoid full borders on purpose — column widths come
|
||||
// from `stripInlineMarkup(...).length` (UTF-16 code units, not
|
||||
// display width), so a real outline often misaligns on emoji and
|
||||
|
|
@ -163,31 +238,39 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
|
|||
} else if (m[6]) {
|
||||
parts.push(
|
||||
<Text key={parts.length} strikethrough>
|
||||
{m[6]}
|
||||
<MdInline t={t} text={m[6]} />
|
||||
</Text>
|
||||
)
|
||||
} else if (m[7]) {
|
||||
// Code is the one wrap that does NOT recurse — inline `code` spans
|
||||
// are verbatim by definition. Letting MdInline reprocess them
|
||||
// would corrupt regex examples and shell snippets.
|
||||
parts.push(
|
||||
<Text color={t.color.accent} dimColor key={parts.length}>
|
||||
{m[7]}
|
||||
</Text>
|
||||
)
|
||||
} else if (m[8] ?? m[9]) {
|
||||
// Recurse into bold / italic / strike / highlight so nested
|
||||
// `$...$` math (and other inline tokens) inside a `**bolded
|
||||
// statement with $\mathbb{Z}$ math**` actually render. Without
|
||||
// this the inner content is dropped into a single `<Text bold>`
|
||||
// verbatim and the math renderer never sees it.
|
||||
parts.push(
|
||||
<Text bold key={parts.length}>
|
||||
{m[8] ?? m[9]}
|
||||
<MdInline t={t} text={m[8] ?? m[9]!} />
|
||||
</Text>
|
||||
)
|
||||
} else if (m[10] ?? m[11]) {
|
||||
parts.push(
|
||||
<Text italic key={parts.length}>
|
||||
{m[10] ?? m[11]}
|
||||
<MdInline t={t} text={m[10] ?? m[11]!} />
|
||||
</Text>
|
||||
)
|
||||
} else if (m[12]) {
|
||||
parts.push(
|
||||
<Text backgroundColor={t.color.diffAdded} color={t.color.diffAddedWord} key={parts.length}>
|
||||
{m[12]}
|
||||
<MdInline t={t} text={m[12]} />
|
||||
</Text>
|
||||
)
|
||||
} else if (m[13]) {
|
||||
|
|
@ -218,6 +301,19 @@ function MdInline({ t, text }: { t: Theme; text: string }) {
|
|||
if (url.length < m[16].length) {
|
||||
parts.push(<Text key={parts.length}>{m[16].slice(url.length)}</Text>)
|
||||
}
|
||||
} else if (m[17] ?? m[18]) {
|
||||
// Inline math is run through `texToUnicode` (Greek letters, ℕℤℚℝ,
|
||||
// operators, sub/superscripts, fractions) and rendered in italic
|
||||
// accent. Italic is the disambiguator — links use accent+underline,
|
||||
// so without italic readers can't tell `\mathbb{R}` (math) from a
|
||||
// hyperlinked word. Anything `texToUnicode` doesn't recognise is
|
||||
// preserved verbatim, so unfamiliar commands just look like their
|
||||
// raw LaTeX rather than vanishing.
|
||||
parts.push(
|
||||
<Text color={t.color.accent} italic key={parts.length}>
|
||||
{renderMath(texToUnicode(m[17] ?? m[18]!))}
|
||||
</Text>
|
||||
)
|
||||
}
|
||||
|
||||
last = i + m[0].length
|
||||
|
|
@ -415,32 +511,80 @@ function MdImpl({ compact, t, text }: MdProps) {
|
|||
continue
|
||||
}
|
||||
|
||||
if (line.trim().startsWith('$$')) {
|
||||
start('code')
|
||||
const mathOpen = line.match(MATH_BLOCK_OPEN_RE)
|
||||
|
||||
if (mathOpen) {
|
||||
const opener = mathOpen[1]!
|
||||
const closeRe = opener === '$$' ? MATH_BLOCK_CLOSE_DOLLAR_RE : MATH_BLOCK_CLOSE_BRACKET_RE
|
||||
const headRest = mathOpen[2] ?? ''
|
||||
const block: string[] = []
|
||||
|
||||
for (i++; i < lines.length; i++) {
|
||||
if (lines[i]!.trim().startsWith('$$')) {
|
||||
i++
|
||||
// Single-line block: `$$x + y = z$$` or `\[x\]`. Capture inner content
|
||||
// and emit the block immediately. Without this, the close-scan loop
|
||||
// skips line `i` and treats the next opener as our closer, swallowing
|
||||
// every paragraph in between.
|
||||
const sameLineClose = headRest.match(closeRe)
|
||||
|
||||
if (sameLineClose) {
|
||||
const inner = sameLineClose[1]!.trim()
|
||||
|
||||
start('code')
|
||||
nodes.push(
|
||||
<Box flexDirection="column" key={key} paddingLeft={2}>
|
||||
{inner ? <Text color={t.color.accent}>{renderMath(texToUnicode(inner))}</Text> : null}
|
||||
</Box>
|
||||
)
|
||||
i++
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// Multi-line block: scan ahead for a real closer before committing.
|
||||
// If none exists in the rest of the document, render this line as a
|
||||
// paragraph instead of consuming everything that follows.
|
||||
let closeIdx = -1
|
||||
|
||||
for (let j = i + 1; j < lines.length; j++) {
|
||||
if (closeRe.test(lines[j]!)) {
|
||||
closeIdx = j
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
block.push(lines[i]!)
|
||||
}
|
||||
|
||||
if (closeIdx < 0) {
|
||||
start('paragraph')
|
||||
nodes.push(<MdInline key={key} t={t} text={line} />)
|
||||
i++
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if (headRest.trim()) {
|
||||
block.push(headRest)
|
||||
}
|
||||
|
||||
for (let j = i + 1; j < closeIdx; j++) {
|
||||
block.push(lines[j]!)
|
||||
}
|
||||
|
||||
const tail = lines[closeIdx]!.match(closeRe)![1]!.trimEnd()
|
||||
|
||||
if (tail.trim()) {
|
||||
block.push(tail)
|
||||
}
|
||||
|
||||
start('code')
|
||||
nodes.push(
|
||||
<Box flexDirection="column" key={key} paddingLeft={2}>
|
||||
<Text color={t.color.muted}>─ math</Text>
|
||||
|
||||
{block.map((l, j) => (
|
||||
<Text color={t.color.accent} key={j}>
|
||||
{l}
|
||||
{renderMath(texToUnicode(l))}
|
||||
</Text>
|
||||
))}
|
||||
</Box>
|
||||
)
|
||||
i = closeIdx + 1
|
||||
|
||||
continue
|
||||
}
|
||||
|
|
@ -451,7 +595,7 @@ function MdImpl({ compact, t, text }: MdProps) {
|
|||
start('heading')
|
||||
nodes.push(
|
||||
<Text bold color={t.color.accent} key={key}>
|
||||
{heading}
|
||||
<MdInline t={t} text={heading} />
|
||||
</Text>
|
||||
)
|
||||
i++
|
||||
|
|
@ -463,7 +607,7 @@ function MdImpl({ compact, t, text }: MdProps) {
|
|||
start('heading')
|
||||
nodes.push(
|
||||
<Text bold color={t.color.accent} key={key}>
|
||||
{line.trim()}
|
||||
<MdInline t={t} text={line.trim()} />
|
||||
</Text>
|
||||
)
|
||||
i += 2
|
||||
|
|
|
|||
|
|
@ -35,19 +35,60 @@ import type { Theme } from '../theme.js'
|
|||
|
||||
import { Md } from './markdown.js'
|
||||
|
||||
// Count ``` or ~~~ fence toggles in `s` up to `end`. Odd = currently inside
|
||||
// a fenced block; we can't split the prefix there or we'd orphan the fence.
|
||||
// Count ``` / ~~~ AND `$$` / `\[…\]` fence toggles in `s` up to `end`. Odd
|
||||
// = currently inside a fenced block; splitting the prefix there would
|
||||
// orphan the fence and let the unstable suffix re-render as broken
|
||||
// markdown. Math fences only toggle when the code fence is closed so
|
||||
// snippets like ` ```\n$$x$$\n``` ` (math example inside a code block)
|
||||
// don't double-count. A `$$x$$` line that opens AND closes on its own
|
||||
// produces zero net toggles; that's `len >= 4` plus `endsDollar`.
|
||||
//
|
||||
// NB: this is INTENTIONALLY more conservative than `markdown.tsx`'s
|
||||
// parser, which falls back to paragraph rendering when an `$$` opener
|
||||
// has no matching closer. The renderer can do that safely because it
|
||||
// always sees the full text on every call. The streaming chunker
|
||||
// cannot — once a chunk is committed to the monotonic stable prefix it
|
||||
// is frozen, so prematurely deciding "this `$$` is just prose" would
|
||||
// permanently commit a paragraph rendering that becomes wrong the
|
||||
// instant the closer streams in. Treating any unmatched `$$` opener
|
||||
// as still-open keeps the boundary parked behind it until the closer
|
||||
// arrives (or the stream ends and the non-streaming `<Md>` takes over,
|
||||
// at which point the renderer's fallback kicks in correctly).
|
||||
const fenceOpenAt = (s: string, end: number) => {
|
||||
let open = false
|
||||
let codeOpen = false
|
||||
let mathOpen = false
|
||||
let mathOpener: '$$' | '\\[' | null = null
|
||||
let i = 0
|
||||
|
||||
while (i < end) {
|
||||
const nl = s.indexOf('\n', i)
|
||||
const lineEnd = nl < 0 || nl > end ? end : nl
|
||||
const line = s.slice(i, lineEnd)
|
||||
const line = s.slice(i, lineEnd).trim()
|
||||
|
||||
if (/^\s*(?:`{3,}|~{3,})/.test(line)) {
|
||||
open = !open
|
||||
if (/^(?:`{3,}|~{3,})/.test(line)) {
|
||||
codeOpen = !codeOpen
|
||||
} else if (!codeOpen) {
|
||||
if (!mathOpen && /^\$\$/.test(line)) {
|
||||
const isSingleLine = line.length >= 4 && /\$\$$/.test(line)
|
||||
|
||||
if (!isSingleLine) {
|
||||
mathOpen = true
|
||||
mathOpener = '$$'
|
||||
}
|
||||
} else if (!mathOpen && /^\\\[/.test(line)) {
|
||||
const isSingleLine = /\\\]$/.test(line)
|
||||
|
||||
if (!isSingleLine) {
|
||||
mathOpen = true
|
||||
mathOpener = '\\['
|
||||
}
|
||||
} else if (mathOpen && mathOpener === '$$' && /\$\$$/.test(line)) {
|
||||
mathOpen = false
|
||||
mathOpener = null
|
||||
} else if (mathOpen && mathOpener === '\\[' && /\\\]$/.test(line)) {
|
||||
mathOpen = false
|
||||
mathOpener = null
|
||||
}
|
||||
}
|
||||
|
||||
if (nl < 0 || nl >= end) {
|
||||
|
|
@ -57,7 +98,7 @@ const fenceOpenAt = (s: string, end: number) => {
|
|||
i = nl + 1
|
||||
}
|
||||
|
||||
return open
|
||||
return codeOpen || mathOpen
|
||||
}
|
||||
|
||||
// Find the last "\n\n" boundary before `end` that is OUTSIDE a fenced code
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue