hermes-agent/apps/desktop/src/lib/markdown-code.ts
Brooklyn Nicholson db884f4646 chore: uptick
2026-05-02 03:19:39 -05:00

132 lines
3.3 KiB
TypeScript

const VALID_LANGUAGE_RE = /^[a-z0-9][a-z0-9+#-]*$/i
const NON_CODE_FENCE_LANGUAGES = new Set(['', 'text', 'plain', 'plaintext', 'md', 'markdown'])
const COMMON_CODE_LANGUAGES = new Set([
'bash',
'c',
'cpp',
'css',
'diff',
'go',
'html',
'java',
'javascript',
'js',
'json',
'jsx',
'markdown',
'md',
'php',
'python',
'py',
'ruby',
'rust',
'rs',
'sh',
'sql',
'swift',
'tsx',
'ts',
'typescript',
'xml',
'yaml',
'yml'
])
interface CodeSignals {
bulletLines: number
codeSignals: number
hasMarkdown: boolean
proseLines: number
trimmed: string
}
export function sanitizeLanguageTag(tag: string): string {
const trimmed = tag.trim()
const first = trimmed.split(/\s/, 1)[0] || ''
return VALID_LANGUAGE_RE.test(first) && first.length <= 16 ? first.toLowerCase() : ''
}
function proseLineCount(body: string): number {
return body
.split('\n')
.filter(line => {
const trimmed = line.trim()
return Boolean(trimmed) && /^[A-Za-z0-9"'`*-]/.test(trimmed)
})
.length
}
const CODE_SIGNAL_RE = [
/(^|\s)(const|let|var|function|class|import|export|return|if|for|while|switch)\b/gim,
/=>|==|===|!=|!==|\{|\}|;|<\/?[a-z][^>]*>/gi,
/^\s*(#include|SELECT|INSERT|UPDATE|DELETE|CREATE|DROP)\b/gim
]
function codeSignalCount(body: string): number {
return CODE_SIGNAL_RE.reduce((total, pattern) => total + (body.match(pattern)?.length ?? 0), 0)
}
function codeSignals(body: string): CodeSignals {
const trimmed = body.trim()
const markdownSignals = (trimmed.match(/\*\*[^*]+\*\*/g) || []).length + (trimmed.match(/`[^`\n]+`/g) || []).length
return {
bulletLines: (trimmed.match(/^\s*[-*]\s+\S+/gm) || []).length,
codeSignals: codeSignalCount(trimmed),
hasMarkdown: markdownSignals > 0,
proseLines: proseLineCount(trimmed),
trimmed
}
}
export function isLikelyProseFence(info: string, body: string): boolean {
const trimmedInfo = info.trim()
const rawInfo = trimmedInfo.toLowerCase()
const language = sanitizeLanguageTag(info)
const infoToken = trimmedInfo.split(/\s+/, 1)[0] || ''
const hasInfoTail = Boolean(trimmedInfo) && trimmedInfo !== infoToken
if (/^[-*+]\s/.test(rawInfo) || /^https?:\/\//.test(rawInfo)) {
return true
}
const signals = codeSignals(body)
if (!signals.trimmed) {
return false
}
if (hasInfoTail && signals.codeSignals <= 2 && (signals.proseLines >= 2 || signals.bulletLines >= 1)) {
return true
}
if (!NON_CODE_FENCE_LANGUAGES.has(language)) {
return false
}
return (
(signals.bulletLines >= 2 && signals.hasMarkdown && signals.codeSignals <= 2) ||
(signals.proseLines >= 3 && signals.codeSignals === 0)
)
}
export function isLikelyProseCodeBlock(language: string | undefined, code: string | undefined): boolean {
const cleanLanguage = sanitizeLanguageTag(language || '')
const signals = codeSignals(code || '')
if (!signals.trimmed || signals.codeSignals >= 3) {
return false
}
if (signals.bulletLines >= 1 && (signals.hasMarkdown || signals.proseLines >= 2)) {
return true
}
if (NON_CODE_FENCE_LANGUAGES.has(cleanLanguage)) {
return signals.proseLines >= 3 && signals.codeSignals === 0
}
return !COMMON_CODE_LANGUAGES.has(cleanLanguage) && signals.proseLines >= 2 && signals.codeSignals <= 1
}