mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(tui): don't swallow Kimi/Qwen ~! ~? kaomoji as subscript spans
The inline markdown regex had `~([^~\s][^~]*?)~` for Pandoc-style subscript
(H~2~O, CO~2~). On models that decorate prose with kaomoji like `thing ~!`
and `cool ~?` — Kimi especially — the opener `~!` paired with the next
stray `~` on the line and dim-formatted everything between them with a
leading `_` character, mangling markdown output.
Tighten the pattern to short alphanumeric-only content (`~[A-Za-z0-9]{1,8}~`)
since real subscript never contains punctuation, spaces, or long runs.
Same tightening applied to stripInlineMarkup so width measurement stays
consistent. Classic CLI was unaffected because it renders these literally.
This commit is contained in:
parent
9fa49206dc
commit
43eb1153e9
2 changed files with 39 additions and 2 deletions
|
|
@ -23,6 +23,31 @@ describe('INLINE_RE emphasis', () => {
|
|||
expect(matches('a*b*c')).toEqual(['*b*'])
|
||||
expect(matches('a**bold**c')).toEqual(['**bold**'])
|
||||
})
|
||||
|
||||
it('matches short alphanumeric subscript (H~2~O, CO~2~, X~n~)', () => {
|
||||
expect(matches('H~2~O')).toEqual(['~2~'])
|
||||
expect(matches('CO~2~ levels')).toEqual(['~2~'])
|
||||
expect(matches('the X~n~ term')).toEqual(['~n~'])
|
||||
})
|
||||
|
||||
it('ignores kaomoji-style ~! and ~? punctuation', () => {
|
||||
// Kimi / Qwen / GLM emit these as decorators and the whole span between
|
||||
// two tildes used to get collapsed into one dim blob.
|
||||
expect(matches('Aww ~! Building step by step, I love it ~!')).toEqual([])
|
||||
expect(matches('cool ~? yeah ~?')).toEqual([])
|
||||
expect(matches('mixed ~! and ~? flow')).toEqual([])
|
||||
})
|
||||
|
||||
it('ignores tilde spans that contain spaces or punctuation', () => {
|
||||
// Real subscript doesn't contain spaces; a tilde followed by words-then-
|
||||
// tilde is almost always conversational. Matching it swallows text.
|
||||
expect(matches('hello ~good idea~ there')).toEqual([])
|
||||
expect(matches('x ~oh no!~ y')).toEqual([])
|
||||
})
|
||||
|
||||
it('does not let strikethrough eat subscript', () => {
|
||||
expect(matches('~~strike~~ and H~2~O')).toEqual(['~~strike~~', '~2~'])
|
||||
})
|
||||
})
|
||||
|
||||
describe('stripInlineMarkup', () => {
|
||||
|
|
@ -31,6 +56,11 @@ describe('stripInlineMarkup', () => {
|
|||
expect(stripInlineMarkup('browser_screenshot_ecc.png')).toBe('browser_screenshot_ecc.png')
|
||||
expect(stripInlineMarkup('__bold__ and foo__bar__')).toBe('bold and foo__bar__')
|
||||
})
|
||||
|
||||
it('leaves ~!/~? kaomoji alone and still handles real subscript', () => {
|
||||
expect(stripInlineMarkup('Yay ~! nice work ~!')).toBe('Yay ~! nice work ~!')
|
||||
expect(stripInlineMarkup('H~2~O and CO~2~')).toBe('H_2O and CO_2')
|
||||
})
|
||||
})
|
||||
|
||||
describe('protocol sentinels', () => {
|
||||
|
|
|
|||
|
|
@ -16,8 +16,15 @@ const MD_URL_RE = '((?:[^\\s()]|\\([^\\s()]*\\))+?)'
|
|||
export const MEDIA_LINE_RE = /^\s*[`"']?MEDIA:\s*(\S+?)[`"']?\s*$/
|
||||
export const AUDIO_DIRECTIVE_RE = /^\s*\[\[audio_as_voice\]\]\s*$/
|
||||
|
||||
// Subscript (`~x~`) is restricted to short alphanumeric runs so prose like
|
||||
// `thing ~! more ~?` from Kimi / Qwen / GLM (kaomoji-style decorators) doesn't
|
||||
// get parsed as a span that swallows everything between two stray tildes. Real
|
||||
// Pandoc subscript is H~2~O / CO~2~ / X~n~ — always word-char content. Without
|
||||
// this constraint the old pattern `~([^~\s][^~]*?)~` paired up `~!` openers
|
||||
// with the next `~` anywhere on the line and rendered the interior as dim
|
||||
// text with a `_` prefix.
|
||||
export const INLINE_RE = new RegExp(
|
||||
`(!\\[(.*?)\\]\\(${MD_URL_RE}\\)|\\[(.+?)\\]\\(${MD_URL_RE}\\)|<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>|~~(.+?)~~|\`([^\\\`]+)\`|\\*\\*(.+?)\\*\\*|(?<!\\w)__(.+?)__(?!\\w)|\\*(.+?)\\*|(?<!\\w)_(.+?)_(?!\\w)|==(.+?)==|\\[\\^([^\\]]+)\\]|\\^([^^\\s][^^]*?)\\^|~([^~\\s][^~]*?)~|(https?:\\/\\/[^\\s<]+))`,
|
||||
`(!\\[(.*?)\\]\\(${MD_URL_RE}\\)|\\[(.+?)\\]\\(${MD_URL_RE}\\)|<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>|~~(.+?)~~|\`([^\\\`]+)\`|\\*\\*(.+?)\\*\\*|(?<!\\w)__(.+?)__(?!\\w)|\\*(.+?)\\*|(?<!\\w)_(.+?)_(?!\\w)|==(.+?)==|\\[\\^([^\\]]+)\\]|\\^([^^\\s][^^]*?)\\^|~([A-Za-z0-9]{1,8})~|(https?:\\/\\/[^\\s<]+))`,
|
||||
'g'
|
||||
)
|
||||
|
||||
|
|
@ -108,7 +115,7 @@ export const stripInlineMarkup = (value: string) =>
|
|||
.replace(/==(.+?)==/g, '$1')
|
||||
.replace(/\[\^([^\]]+)\]/g, '[$1]')
|
||||
.replace(/\^([^^\s][^^]*?)\^/g, '^$1')
|
||||
.replace(/~([^~\s][^~]*?)~/g, '_$1')
|
||||
.replace(/~([A-Za-z0-9]{1,8})~/g, '_$1')
|
||||
|
||||
const renderTable = (key: number, rows: string[][], t: Theme) => {
|
||||
const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length)))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue