mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
132 lines
3.3 KiB
TypeScript
132 lines
3.3 KiB
TypeScript
const VALID_LANGUAGE_RE = /^[a-z0-9][a-z0-9+#-]*$/i
|
|
const NON_CODE_FENCE_LANGUAGES = new Set(['', 'text', 'plain', 'plaintext', 'md', 'markdown'])
|
|
const COMMON_CODE_LANGUAGES = new Set([
|
|
'bash',
|
|
'c',
|
|
'cpp',
|
|
'css',
|
|
'diff',
|
|
'go',
|
|
'html',
|
|
'java',
|
|
'javascript',
|
|
'js',
|
|
'json',
|
|
'jsx',
|
|
'markdown',
|
|
'md',
|
|
'php',
|
|
'python',
|
|
'py',
|
|
'ruby',
|
|
'rust',
|
|
'rs',
|
|
'sh',
|
|
'sql',
|
|
'swift',
|
|
'tsx',
|
|
'ts',
|
|
'typescript',
|
|
'xml',
|
|
'yaml',
|
|
'yml'
|
|
])
|
|
|
|
interface CodeSignals {
|
|
bulletLines: number
|
|
codeSignals: number
|
|
hasMarkdown: boolean
|
|
proseLines: number
|
|
trimmed: string
|
|
}
|
|
|
|
export function sanitizeLanguageTag(tag: string): string {
|
|
const trimmed = tag.trim()
|
|
const first = trimmed.split(/\s/, 1)[0] || ''
|
|
|
|
return VALID_LANGUAGE_RE.test(first) && first.length <= 16 ? first.toLowerCase() : ''
|
|
}
|
|
|
|
function proseLineCount(body: string): number {
|
|
return body
|
|
.split('\n')
|
|
.filter(line => {
|
|
const trimmed = line.trim()
|
|
|
|
return Boolean(trimmed) && /^[A-Za-z0-9"'`*-]/.test(trimmed)
|
|
})
|
|
.length
|
|
}
|
|
|
|
const CODE_SIGNAL_RE = [
|
|
/(^|\s)(const|let|var|function|class|import|export|return|if|for|while|switch)\b/gim,
|
|
/=>|==|===|!=|!==|\{|\}|;|<\/?[a-z][^>]*>/gi,
|
|
/^\s*(#include|SELECT|INSERT|UPDATE|DELETE|CREATE|DROP)\b/gim
|
|
]
|
|
|
|
function codeSignalCount(body: string): number {
|
|
return CODE_SIGNAL_RE.reduce((total, pattern) => total + (body.match(pattern)?.length ?? 0), 0)
|
|
}
|
|
|
|
function codeSignals(body: string): CodeSignals {
|
|
const trimmed = body.trim()
|
|
const markdownSignals = (trimmed.match(/\*\*[^*]+\*\*/g) || []).length + (trimmed.match(/`[^`\n]+`/g) || []).length
|
|
|
|
return {
|
|
bulletLines: (trimmed.match(/^\s*[-*]\s+\S+/gm) || []).length,
|
|
codeSignals: codeSignalCount(trimmed),
|
|
hasMarkdown: markdownSignals > 0,
|
|
proseLines: proseLineCount(trimmed),
|
|
trimmed
|
|
}
|
|
}
|
|
|
|
export function isLikelyProseFence(info: string, body: string): boolean {
|
|
const trimmedInfo = info.trim()
|
|
const rawInfo = trimmedInfo.toLowerCase()
|
|
const language = sanitizeLanguageTag(info)
|
|
const infoToken = trimmedInfo.split(/\s+/, 1)[0] || ''
|
|
const hasInfoTail = Boolean(trimmedInfo) && trimmedInfo !== infoToken
|
|
|
|
if (/^[-*+]\s/.test(rawInfo) || /^https?:\/\//.test(rawInfo)) {
|
|
return true
|
|
}
|
|
|
|
const signals = codeSignals(body)
|
|
|
|
if (!signals.trimmed) {
|
|
return false
|
|
}
|
|
|
|
if (hasInfoTail && signals.codeSignals <= 2 && (signals.proseLines >= 2 || signals.bulletLines >= 1)) {
|
|
return true
|
|
}
|
|
|
|
if (!NON_CODE_FENCE_LANGUAGES.has(language)) {
|
|
return false
|
|
}
|
|
|
|
return (
|
|
(signals.bulletLines >= 2 && signals.hasMarkdown && signals.codeSignals <= 2) ||
|
|
(signals.proseLines >= 3 && signals.codeSignals === 0)
|
|
)
|
|
}
|
|
|
|
export function isLikelyProseCodeBlock(language: string | undefined, code: string | undefined): boolean {
|
|
const cleanLanguage = sanitizeLanguageTag(language || '')
|
|
const signals = codeSignals(code || '')
|
|
|
|
if (!signals.trimmed || signals.codeSignals >= 3) {
|
|
return false
|
|
}
|
|
|
|
if (signals.bulletLines >= 1 && (signals.hasMarkdown || signals.proseLines >= 2)) {
|
|
return true
|
|
}
|
|
|
|
if (NON_CODE_FENCE_LANGUAGES.has(cleanLanguage)) {
|
|
return signals.proseLines >= 3 && signals.codeSignals === 0
|
|
}
|
|
|
|
return !COMMON_CODE_LANGUAGES.has(cleanLanguage) && signals.proseLines >= 2 && signals.codeSignals <= 1
|
|
}
|