mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-17 09:41:58 +00:00
fix(desktop): coalesce interleaved reasoning/content stream parts
Models that interleave their reasoning_content and content token streams (Kimi/DeepSeek/GLM-style routes) emit text -> reasoning -> text deltas within a single tool-bounded segment. Appending each delta as its own part shredded one sentence into "Let me" / Thinking / "verify the file", with a Thinking disclosure wedged mid-sentence. Coalesce streaming deltas into the most recent same-type part within the current segment (bounded by any non-streaming part, e.g. a tool call). The opposite streaming channel is transparent, so a reasoning burst between two content deltas no longer opens a fresh text part, while a real tool call still starts a new segment and preserves narration order. Data-layer only; the renderer already groups consecutive reasoning.
This commit is contained in:
parent
5bfed0fe07
commit
1cb75b7971
2 changed files with 94 additions and 24 deletions
|
|
@ -3,6 +3,7 @@ import { describe, expect, it } from 'vitest'
|
|||
import type { ChatMessage, ChatMessagePart } from './chat-messages'
|
||||
import {
|
||||
appendAssistantTextPart,
|
||||
appendReasoningPart,
|
||||
chatMessageText,
|
||||
preserveLocalAssistantErrors,
|
||||
renderMediaTags,
|
||||
|
|
@ -175,6 +176,52 @@ describe('renderMediaTags', () => {
|
|||
})
|
||||
})
|
||||
|
||||
describe('interleaved reasoning/text coalescing', () => {
|
||||
it('keeps narration contiguous when reasoning interrupts mid-sentence', () => {
|
||||
// Models that interleave reasoning_content + content deltas emit
|
||||
// text → reasoning → text within one tool-bounded segment. The two text
|
||||
// fragments are really one sentence and must not be split by the
|
||||
// "Thinking" block between them.
|
||||
let parts: ChatMessagePart[] = appendAssistantTextPart([], 'Let me ')
|
||||
parts = appendReasoningPart(parts, 'checking the file...')
|
||||
parts = appendAssistantTextPart(parts, 'verify the full file is correct:')
|
||||
|
||||
expect(parts.map(p => p.type)).toEqual(['text', 'reasoning'])
|
||||
expect((parts[0] as { text: string }).text).toBe('Let me verify the full file is correct:')
|
||||
expect((parts[1] as { text: string }).text).toBe('checking the file...')
|
||||
})
|
||||
|
||||
it('merges reasoning bursts that straddle a narration fragment', () => {
|
||||
let parts: ChatMessagePart[] = appendReasoningPart([], 'first thought ')
|
||||
parts = appendAssistantTextPart(parts, 'Working on it.')
|
||||
parts = appendReasoningPart(parts, 'second thought')
|
||||
|
||||
expect(parts.map(p => p.type)).toEqual(['reasoning', 'text'])
|
||||
expect((parts[0] as { text: string }).text).toBe('first thought second thought')
|
||||
expect((parts[1] as { text: string }).text).toBe('Working on it.')
|
||||
})
|
||||
|
||||
it('starts a fresh text part after a tool call (segment boundary)', () => {
|
||||
let parts: ChatMessagePart[] = appendAssistantTextPart([], 'Let me check.')
|
||||
parts = upsertToolPart(parts, { name: 'read_file', tool_id: 'tc-1' }, 'running')
|
||||
parts = appendAssistantTextPart(parts, 'Now editing.')
|
||||
|
||||
expect(parts.map(p => p.type)).toEqual(['text', 'tool-call', 'text'])
|
||||
expect((parts[0] as { text: string }).text).toBe('Let me check.')
|
||||
expect((parts[2] as { text: string }).text).toBe('Now editing.')
|
||||
})
|
||||
|
||||
it('does not merge reasoning across a tool call', () => {
|
||||
let parts: ChatMessagePart[] = appendReasoningPart([], 'before tool')
|
||||
parts = upsertToolPart(parts, { name: 'read_file', tool_id: 'tc-1' }, 'running')
|
||||
parts = appendReasoningPart(parts, 'after tool')
|
||||
|
||||
expect(parts.map(p => p.type)).toEqual(['reasoning', 'tool-call', 'reasoning'])
|
||||
expect((parts[0] as { text: string }).text).toBe('before tool')
|
||||
expect((parts[2] as { text: string }).text).toBe('after tool')
|
||||
})
|
||||
})
|
||||
|
||||
describe('preserveLocalAssistantErrors', () => {
|
||||
it('preserves a local user+error pair when hydration omits the failed turn', () => {
|
||||
const nextMessages: ChatMessage[] = [
|
||||
|
|
|
|||
|
|
@ -178,52 +178,75 @@ function displayContentForMessage(role: SessionMessage['role'], content: unknown
|
|||
return [refs.join('\n'), visibleText].filter(Boolean).join('\n\n') || visibleText
|
||||
}
|
||||
|
||||
export function appendTextPart(parts: ChatMessagePart[], delta: string): ChatMessagePart[] {
|
||||
const next = [...parts]
|
||||
const last = next.at(-1)
|
||||
// When a model interleaves its `reasoning_content` and `content` token
|
||||
// streams, deltas land as text → reasoning → text inside a single
|
||||
// tool-bounded segment. Appending each delta as its own part shreds one
|
||||
// sentence into "Let me" / Thinking / "verify the file" — the
|
||||
// interleaved-thinking fragmentation users hit on Kimi/DeepSeek/GLM-style
|
||||
// routes. To keep narration and thinking each contiguous, a streaming delta
|
||||
// merges into the most recent same-type part *within the current segment*.
|
||||
//
|
||||
// A segment is bounded by any non-streaming part (a tool call, image, …): the
|
||||
// opposite streaming channel (text <-> reasoning) is transparent, so a
|
||||
// reasoning burst between two content deltas does NOT open a fresh text part,
|
||||
// but a real tool call does. This collapses interleave noise without
|
||||
// reordering narration across tool calls.
|
||||
function segmentMergeIndex(parts: ChatMessagePart[], type: 'text' | 'reasoning'): number {
|
||||
for (let i = parts.length - 1; i >= 0; i--) {
|
||||
const partType = parts[i]?.type
|
||||
|
||||
if (last?.type === 'text') {
|
||||
next[next.length - 1] = { ...last, text: `${last.text}${delta}` }
|
||||
if (partType === type) {
|
||||
return i
|
||||
}
|
||||
|
||||
return next
|
||||
// text <-> reasoning is the interleave we're collapsing; skip past it.
|
||||
// Anything else (tool-call, file, image, …) closes the segment.
|
||||
if (partType !== 'text' && partType !== 'reasoning') {
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
next.push(textPart(delta))
|
||||
return -1
|
||||
}
|
||||
|
||||
function mergeTextInto(parts: ChatMessagePart[], index: number, delta: string): ChatMessagePart[] {
|
||||
const next = [...parts]
|
||||
const part = next[index]
|
||||
next[index] = { ...part, text: `${(part as { text: string }).text}${delta}` } as ChatMessagePart
|
||||
|
||||
return next
|
||||
}
|
||||
|
||||
export function appendAssistantTextPart(parts: ChatMessagePart[], delta: string): ChatMessagePart[] {
|
||||
const next = appendTextPart(parts, delta)
|
||||
const last = next.at(-1)
|
||||
export function appendTextPart(parts: ChatMessagePart[], delta: string): ChatMessagePart[] {
|
||||
const idx = segmentMergeIndex(parts, 'text')
|
||||
|
||||
if (last?.type === 'text') {
|
||||
const current = last.text
|
||||
return idx >= 0 ? mergeTextInto(parts, idx, delta) : [...parts, textPart(delta)]
|
||||
}
|
||||
|
||||
export function appendAssistantTextPart(parts: ChatMessagePart[], delta: string): ChatMessagePart[] {
|
||||
const idx = segmentMergeIndex(parts, 'text')
|
||||
const targetIndex = idx >= 0 ? idx : parts.length
|
||||
const next = idx >= 0 ? mergeTextInto(parts, idx, delta) : [...parts, textPart(delta)]
|
||||
const target = next[targetIndex]
|
||||
|
||||
if (target?.type === 'text') {
|
||||
const current = target.text
|
||||
|
||||
const deltaMayContainMedia =
|
||||
delta.includes('MEDIA:') || delta.includes('DIA:') || delta.includes('EDIA:') || delta.includes('IA:')
|
||||
|
||||
const needsMediaPass = deltaMayContainMedia || current.includes('MEDIA:')
|
||||
const nextText = needsMediaPass ? renderMediaTags(current) : current
|
||||
next[next.length - 1] = nextText === current ? last : { ...last, text: nextText }
|
||||
next[targetIndex] = nextText === current ? target : { ...target, text: nextText }
|
||||
}
|
||||
|
||||
return next
|
||||
}
|
||||
|
||||
export function appendReasoningPart(parts: ChatMessagePart[], delta: string): ChatMessagePart[] {
|
||||
const next = [...parts]
|
||||
const last = next.at(-1)
|
||||
const idx = segmentMergeIndex(parts, 'reasoning')
|
||||
|
||||
if (last?.type === 'reasoning') {
|
||||
next[next.length - 1] = { ...last, text: `${last.text}${delta}` }
|
||||
|
||||
return next
|
||||
}
|
||||
|
||||
next.push(reasoningPart(delta))
|
||||
|
||||
return next
|
||||
return idx >= 0 ? mergeTextInto(parts, idx, delta) : [...parts, reasoningPart(delta)]
|
||||
}
|
||||
|
||||
export function hasToolPart(message: ChatMessage): boolean {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue