fix(desktop): show summarizing indicator during auto-compaction

Auto-compression rewrites history mid-turn, which made long threads look
like they reset. Re-tag the gateway lifecycle status as compacting and
surface it in the desktop thread loading indicators.
This commit is contained in:
Brooklyn Nicholson 2026-06-14 02:28:07 -05:00
parent c8ad2ca997
commit 715b691723
7 changed files with 218 additions and 23 deletions

View file

@ -40,6 +40,16 @@ from agent.model_metadata import estimate_request_tokens_rough
logger = logging.getLogger(__name__)
# Stable marker the gateway matches on to re-tag the auto-compaction lifecycle
# status as ``kind="compacting"`` (tui_gateway/server.py::_status_update), so
# drivers like the desktop app can show an explicit "Summarizing…" indicator
# instead of the transcript appearing to silently reset. Keep the marker phrase
# intact if you reword COMPACTION_STATUS.
COMPACTION_STATUS_MARKER = "Compacting context"
COMPACTION_STATUS = (
f"🗜️ {COMPACTION_STATUS_MARKER} — summarizing earlier conversation so I can continue..."
)
def _compression_lock_holder(agent: Any) -> str:
"""Build a unique holder id for the lock: pid:tid:agent-instance:uuid.
@ -324,9 +334,7 @@ def compress_context(
f"{approx_tokens:,}" if approx_tokens else "unknown", agent.model,
focus_topic,
)
agent._emit_status(
"🗜️ Compacting context — summarizing earlier conversation so I can continue..."
)
agent._emit_status(COMPACTION_STATUS)
# ── Compression lock ────────────────────────────────────────────────
# Atomic, state.db-backed lock per session_id. Without this, two
@ -799,6 +807,8 @@ def try_shrink_image_parts_in_messages(
__all__ = [
"COMPACTION_STATUS",
"COMPACTION_STATUS_MARKER",
"check_compression_model_feasibility",
"replay_compression_warning",
"compress_context",

View file

@ -27,6 +27,7 @@ import { triggerHaptic } from '@/lib/haptics'
import { isProviderSetupErrorMessage } from '@/lib/provider-setup-errors'
import { parseTodos } from '@/lib/todos'
import { setClarifyRequest } from '@/store/clarify'
import { setSessionCompacting } from '@/store/compaction'
import { refreshBackgroundProcesses } from '@/store/composer-status'
import { $gateway } from '@/store/gateway'
import { dispatchNativeNotification } from '@/store/native-notifications'
@ -825,6 +826,7 @@ export function useMessageStream({
flushQueuedDeltas(sessionId)
clearSessionSubagents(sessionId)
setSessionCompacting(sessionId, null)
nativeSubagentSessionsRef.current.delete(sessionId)
if (isActiveEvent) {
@ -870,6 +872,7 @@ export function useMessageStream({
// session so a background turn finishing can't wipe the active chat's
// prompt, and vice versa.
clearAllPrompts(sessionId)
setSessionCompacting(sessionId, null)
flushQueuedDeltas(sessionId)
@ -904,10 +907,7 @@ export function useMessageStream({
// terminal/process tool calls are the only things that spawn or reap
// background processes — sync the composer status stack right after.
if (
!sessionInterrupted(sessionId) &&
(payload?.name === 'terminal' || payload?.name === 'process')
) {
if (!sessionInterrupted(sessionId) && (payload?.name === 'terminal' || payload?.name === 'process')) {
void refreshBackgroundProcesses(sessionId)
}
}
@ -1061,9 +1061,14 @@ export function useMessageStream({
})
}
} else if (event.type === 'status.update') {
// The gateway's notification poller announces background process
// completions / watch matches here — re-sync the status stack.
if (sessionId && payload?.kind === 'process') {
if (sessionId && payload?.kind === 'compacting') {
// Auto-compaction is rewriting history to a summary mid-turn — surface
// it so the transcript doesn't look like it silently reset. Cleared
// when the turn ends (message.complete / error) or a new one starts.
setSessionCompacting(sessionId, coerceGatewayText(payload?.text))
} else if (sessionId && payload?.kind === 'process') {
// The gateway's notification poller announces background process
// completions / watch matches here — re-sync the status stack.
void refreshBackgroundProcesses(sessionId)
}
} else if (event.type === 'error') {
@ -1075,6 +1080,7 @@ export function useMessageStream({
// the failed turn (same intent as the message.complete clear).
if (sessionId) {
clearAllPrompts(sessionId)
setSessionCompacting(sessionId, null)
}
dispatchNativeNotification({

View file

@ -96,6 +96,7 @@ import { extractPreviewTargets } from '@/lib/preview-targets'
import { useEnterAnimation } from '@/lib/use-enter-animation'
import { cn } from '@/lib/utils'
import { playSpeechText, stopVoicePlayback } from '@/lib/voice-playback'
import { $compactionStatus } from '@/store/compaction'
import type { ComposerAttachment } from '@/store/composer'
import { notifyError } from '@/store/notifications'
import { $connection } from '@/store/session'
@ -273,10 +274,7 @@ const AssistantMessage: FC<{ onBranchInNewChat?: (messageId: string) => void }>
return pickPrimaryPreviewTarget(extractPreviewTargets(completedText))
}, [completedText])
const getMessageText = useCallback(
() => messageContentText(messageRuntime.getState().content),
[messageRuntime]
)
const getMessageText = useCallback(() => messageContentText(messageRuntime.getState().content), [messageRuntime])
const enterRef = useEnterAnimation(isRunning, `assistant-message:${messageId}`)
@ -342,10 +340,12 @@ const StatusRow: FC<{ children: ReactNode; label: string } & React.ComponentProp
const ResponseLoadingIndicator: FC = () => {
const { t } = useI18n()
const elapsed = useElapsedSeconds()
const compaction = useStore($compactionStatus)
return (
<StatusRow data-slot="aui_response-loading" label={t.assistant.thread.loadingResponse}>
<StatusRow data-slot="aui_response-loading" label={compaction ?? t.assistant.thread.loadingResponse}>
<span aria-hidden="true" className="dither inline-block size-3 rounded-[2px] text-midground/80 animate-pulse" />
{compaction && <span className="min-w-0 truncate">{compaction}</span>}
<ActivityTimerText seconds={elapsed} />
</StatusRow>
)
@ -380,6 +380,7 @@ const StreamStallIndicator: FC = () => {
})
const [stalled, setStalled] = useState(false)
const compaction = useStore($compactionStatus)
useEffect(() => {
setStalled(false)
@ -388,15 +389,18 @@ const StreamStallIndicator: FC = () => {
return () => window.clearTimeout(id)
}, [activity])
const elapsed = useElapsedSeconds(stalled)
// Compaction surfaces immediately; an ordinary stall waits out STREAM_STALL_S.
const active = stalled || Boolean(compaction)
const elapsed = useElapsedSeconds(active)
if (!stalled) {
if (!active) {
return null
}
return (
<StatusRow className="mt-1.5" data-slot="aui_stream-stall" label="Hermes is thinking">
<StatusRow className="mt-1.5" data-slot="aui_stream-stall" label={compaction ?? 'Hermes is thinking'}>
<span aria-hidden="true" className="dither inline-block size-3 rounded-[2px] text-midground/80 animate-pulse" />
{compaction && <span className="min-w-0 truncate">{compaction}</span>}
<ActivityTimerText seconds={elapsed} />
</StatusRow>
)

View file

@ -0,0 +1,55 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest'
import { $compactingSessions, $compactionStatus, setSessionCompacting } from './compaction'
import { $activeSessionId } from './session'
describe('compaction store', () => {
beforeEach(() => {
$compactingSessions.set({})
$activeSessionId.set(null)
})
afterEach(() => {
$compactingSessions.set({})
$activeSessionId.set(null)
})
it('tracks compaction status per session independently', () => {
setSessionCompacting('session-a', 'Summarizing a…')
setSessionCompacting('session-b', 'Summarizing b…')
expect($compactingSessions.get()['session-a']).toBe('Summarizing a…')
expect($compactingSessions.get()['session-b']).toBe('Summarizing b…')
})
it('exposes only the active session via the focus-scoped view', () => {
setSessionCompacting('session-a', 'Summarizing a…')
expect($compactionStatus.get()).toBeNull()
$activeSessionId.set('session-a')
expect($compactionStatus.get()).toBe('Summarizing a…')
$activeSessionId.set('session-b')
expect($compactionStatus.get()).toBeNull()
})
it('clears a session without disturbing the others', () => {
setSessionCompacting('session-a', 'Summarizing a…')
setSessionCompacting('session-b', 'Summarizing b…')
setSessionCompacting('session-a', null)
expect($compactingSessions.get()['session-a']).toBeUndefined()
expect($compactingSessions.get()['session-b']).toBe('Summarizing b…')
})
it('is a no-op when clearing an unknown session', () => {
setSessionCompacting('session-a', 'Summarizing a…')
const before = $compactingSessions.get()
setSessionCompacting('session-missing', null)
expect($compactingSessions.get()).toBe(before)
})
})

View file

@ -0,0 +1,42 @@
import { atom, computed } from 'nanostores'
import { $activeSessionId } from './session'
// Status line for sessions whose agent is mid context-compaction, keyed by the
// runtime session id. Auto-compaction fires mid-turn and rewrites history to a
// summary — without a visible signal the transcript looks like it reset itself.
// Per-session (like clarify) so a background chat compacting can't clobber the
// foreground view; cleared when the turn starts, completes, or errors.
const keyFor = (sessionId: string | null | undefined): string => sessionId ?? ''
export const $compactingSessions = atom<Record<string, string>>({})
// The compaction status for the currently-viewed session, or null. The thread
// loading indicator reads this focus-scoped view to swap to "Summarizing…".
export const $compactionStatus = computed(
[$compactingSessions, $activeSessionId],
(sessions, activeId) => sessions[keyFor(activeId)] ?? null
)
export function setSessionCompacting(sessionId: string | null | undefined, status: string | null): void {
const key = keyFor(sessionId)
const sessions = $compactingSessions.get()
if (status) {
if (sessions[key] === status) {
return
}
$compactingSessions.set({ ...sessions, [key]: status })
return
}
if (!(key in sessions)) {
return
}
const next = { ...sessions }
delete next[key]
$compactingSessions.set(next)
}

View file

@ -0,0 +1,73 @@
"""Auto-compaction status re-tagging for the desktop "Summarizing…" indicator.
Auto-compaction reaches the gateway as a generic ``lifecycle`` status. The
gateway re-tags it as ``kind="compacting"`` so drivers (the desktop app) can
show an explicit summarizing indicator instead of the transcript appearing to
silently reset mid-turn.
"""
from __future__ import annotations
import importlib
from unittest.mock import MagicMock, patch
import pytest
@pytest.fixture()
def server():
with patch.dict(
"sys.modules",
{
"hermes_constants": MagicMock(
get_hermes_home=MagicMock(return_value="/tmp/hermes_test_compaction")
),
"hermes_cli.env_loader": MagicMock(),
"hermes_cli.banner": MagicMock(),
"hermes_state": MagicMock(),
},
):
yield importlib.import_module("tui_gateway.server")
def _capture(server, monkeypatch):
events: list[dict] = []
monkeypatch.setattr(
server, "_emit", lambda event, sid, payload=None: events.append(payload or {})
)
return events
def test_compaction_lifecycle_is_retagged(server, monkeypatch):
from agent.conversation_compression import COMPACTION_STATUS
events = _capture(server, monkeypatch)
server._status_update("sid", "lifecycle", COMPACTION_STATUS)
assert events == [{"kind": "compacting", "text": COMPACTION_STATUS}]
def test_other_lifecycle_status_stays_lifecycle(server, monkeypatch):
events = _capture(server, monkeypatch)
server._status_update("sid", "lifecycle", "❌ Rate limited after 5 retries")
assert events[0]["kind"] == "lifecycle"
def test_manual_compressing_kind_is_preserved(server, monkeypatch):
events = _capture(server, monkeypatch)
server._status_update("sid", "compressing", "⠋ compressing 40 messages…")
assert events[0]["kind"] == "compressing"
def test_compaction_status_contains_marker():
# Contract: the gateway matches COMPACTION_STATUS_MARKER inside the emitted
# status text. If the message is reworded, the marker must survive.
from agent.conversation_compression import (
COMPACTION_STATUS,
COMPACTION_STATUS_MARKER,
)
assert COMPACTION_STATUS_MARKER in COMPACTION_STATUS

View file

@ -757,11 +757,16 @@ def _status_update(sid: str, kind: str, text: str | None = None):
body = (text if text is not None else kind).strip()
if not body:
return
_emit(
"status.update",
sid,
{"kind": kind if text is not None else "status", "text": body},
)
out_kind = kind if text is not None else "status"
# Auto-compaction reaches us as a generic "lifecycle" status. Re-tag it so
# drivers (desktop app) can show an explicit "Summarizing…" indicator —
# otherwise a mid-turn compaction looks like the transcript reset itself.
if out_kind == "lifecycle":
from agent.conversation_compression import COMPACTION_STATUS_MARKER
if COMPACTION_STATUS_MARKER in body:
out_kind = "compacting"
_emit("status.update", sid, {"kind": out_kind, "text": body})
def _estimate_image_tokens(width: int, height: int) -> int: