feat(desktop): show error + manual Retry when resume retries exhaust

When a stranded session window's bounded auto-retry gives up (gateway
resume RPC + REST fallback fail through all MAX_RESUME_RETRIES attempts),
the loader latched forever. Add a $resumeExhaustedSessionId atom armed at
the give-up point so the chat view swaps the perpetual spinner for an
explicit error state + manual Retry button. Retry / reconnect / reselect
clears the latch and resets the auto-retry counter for a fresh cycle; a
route-change away from the stranded session also clears it.

Distinct from $resumeFailedSessionId (armed during the backoff window) so
the error UI only appears once auto-recovery has actually given up, not
mid-retry. Adds i18n strings across en/ja/zh/zh-hant and 3 tests covering
latch-arms-on-exhaustion, stays-clear-while-retries-remain, and
clears-on-route-change.
This commit is contained in:
Austin Pickett 2026-06-17 11:45:30 -04:00
parent 1e2c91eaff
commit 253bfc0e3d
11 changed files with 136 additions and 3 deletions

View file

@ -15,7 +15,9 @@ import { Backdrop } from '@/components/Backdrop'
import { PromptOverlays } from '@/components/prompt-overlays' import { PromptOverlays } from '@/components/prompt-overlays'
import { Button } from '@/components/ui/button' import { Button } from '@/components/ui/button'
import { Codicon } from '@/components/ui/codicon' import { Codicon } from '@/components/ui/codicon'
import { ErrorState } from '@/components/ui/error-state'
import { getGlobalModelOptions, type HermesGateway } from '@/hermes' import { getGlobalModelOptions, type HermesGateway } from '@/hermes'
import { useI18n } from '@/i18n'
import type { ChatMessage } from '@/lib/chat-messages' import type { ChatMessage } from '@/lib/chat-messages'
import { quickModelOptions, sessionTitle, toRuntimeMessage } from '@/lib/chat-runtime' import { quickModelOptions, sessionTitle, toRuntimeMessage } from '@/lib/chat-runtime'
import { useIncrementalExternalStoreRuntime } from '@/lib/incremental-external-store-runtime' import { useIncrementalExternalStoreRuntime } from '@/lib/incremental-external-store-runtime'
@ -38,6 +40,7 @@ import {
$lastVisibleMessageIsUser, $lastVisibleMessageIsUser,
$messages, $messages,
$messagesEmpty, $messagesEmpty,
$resumeExhaustedSessionId,
$selectedStoredSessionId, $selectedStoredSessionId,
$sessions, $sessions,
sessionPinId sessionPinId
@ -86,6 +89,7 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
onEdit: (message: AppendMessage) => Promise<void> onEdit: (message: AppendMessage) => Promise<void>
onReload: (parentId: string | null) => Promise<void> onReload: (parentId: string | null) => Promise<void>
onRestoreToMessage?: (messageId: string) => Promise<void> onRestoreToMessage?: (messageId: string) => Promise<void>
onRetryResume: (sessionId: string) => void
onTranscribeAudio?: (audio: Blob) => Promise<string> onTranscribeAudio?: (audio: Blob) => Promise<string>
} }
@ -272,9 +276,11 @@ export function ChatView({
onEdit, onEdit,
onReload, onReload,
onRestoreToMessage, onRestoreToMessage,
onRetryResume,
onTranscribeAudio onTranscribeAudio
}: ChatViewProps) { }: ChatViewProps) {
const location = useLocation() const location = useLocation()
const { t } = useI18n()
const activeSessionId = useStore($activeSessionId) const activeSessionId = useStore($activeSessionId)
const awaitingResponse = useStore($awaitingResponse) const awaitingResponse = useStore($awaitingResponse)
const busy = useStore($busy) const busy = useStore($busy)
@ -296,6 +302,7 @@ export function ChatView({
const messagesEmpty = useStore($messagesEmpty) const messagesEmpty = useStore($messagesEmpty)
const lastVisibleIsUser = useStore($lastVisibleMessageIsUser) const lastVisibleIsUser = useStore($lastVisibleMessageIsUser)
const selectedSessionId = useStore($selectedStoredSessionId) const selectedSessionId = useStore($selectedStoredSessionId)
const resumeExhaustedSessionId = useStore($resumeExhaustedSessionId)
const routedSessionId = routeSessionId(location.pathname) const routedSessionId = routeSessionId(location.pathname)
const isRoutedSessionView = Boolean(routedSessionId) const isRoutedSessionView = Boolean(routedSessionId)
@ -315,9 +322,21 @@ export function ChatView({
// session exists — even if it has zero messages (a brand-new routed // session exists — even if it has zero messages (a brand-new routed
// session). The flicker where `busy` flips true briefly during hydrate // session). The flicker where `busy` flips true briefly during hydrate
// is handled by `threadLoadingState`'s last-visible-user gate. // is handled by `threadLoadingState`'s last-visible-user gate.
const loadingSession = isRoutedSessionView && (routeSessionMismatch || (messagesEmpty && !activeSessionId)) //
// resumeExhausted: the bounded auto-retry in use-route-resume gave up on this
// routed session (gateway RPC + REST fallback failed through every attempt).
// Suppress the loader and show an explicit error + manual Retry instead of
// spinning forever. Gated on the route matching so a stale latch from another
// session can't blank the current one.
const resumeExhausted = isRoutedSessionView && resumeExhaustedSessionId === routedSessionId
const loadingSession =
!resumeExhausted && isRoutedSessionView && (routeSessionMismatch || (messagesEmpty && !activeSessionId))
const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser) const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser)
const showChatBar = !loadingSession // Hide the composer in the exhausted error state too: there's no live runtime
// to send to until a retry rebinds one.
const showChatBar = !loadingSession && !resumeExhausted
const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new') const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
const modelOptionsQuery = useQuery<ModelOptionsResponse>({ const modelOptionsQuery = useQuery<ModelOptionsResponse>({
@ -465,6 +484,21 @@ export function ChatView({
</Suspense> </Suspense>
)} )}
</ChatRuntimeBoundary> </ChatRuntimeBoundary>
{resumeExhausted && routedSessionId && (
<div className="absolute inset-0 z-10 grid place-items-center bg-(--ui-chat-surface-background) px-8 py-10">
<ErrorState
className="max-w-sm"
description={t.desktop.resumeStrandedBody}
title={t.desktop.resumeStrandedTitle}
>
<div className="grid justify-items-center">
<Button onClick={() => onRetryResume(routedSessionId)} size="sm" variant="outline">
{t.desktop.resumeRetry}
</Button>
</div>
</ErrorState>
</div>
)}
{showChatBar && <ScrollToBottomButton />} {showChatBar && <ScrollToBottomButton />}
<ChatDropOverlay kind={dragKind} /> <ChatDropOverlay kind={dragKind} />
<ChatSwapOverlay profile={gatewaySwapTarget} /> <ChatSwapOverlay profile={gatewaySwapTarget} />

View file

@ -1005,6 +1005,7 @@ export function DesktopController() {
onReload={reloadFromMessage} onReload={reloadFromMessage}
onRemoveAttachment={id => void composer.removeAttachment(id)} onRemoveAttachment={id => void composer.removeAttachment(id)}
onRestoreToMessage={restoreToMessage} onRestoreToMessage={restoreToMessage}
onRetryResume={sessionId => void resumeSession(sessionId, true)}
onSteer={steerPrompt} onSteer={steerPrompt}
onSubmit={submitText} onSubmit={submitText}
onThreadMessagesChange={handleThreadMessagesChange} onThreadMessagesChange={handleThreadMessagesChange}

View file

@ -2,6 +2,8 @@ import { cleanup, render } from '@testing-library/react'
import type { MutableRefObject } from 'react' import type { MutableRefObject } from 'react'
import { afterEach, describe, expect, it, vi } from 'vitest' import { afterEach, describe, expect, it, vi } from 'vitest'
import { $resumeExhaustedSessionId, setResumeExhaustedSessionId } from '@/store/session'
import { useRouteResume } from './use-route-resume' import { useRouteResume } from './use-route-resume'
interface HarnessProps { interface HarnessProps {
@ -263,6 +265,7 @@ describe('useRouteResume bounded auto-retry after a failed resume', () => {
cleanup() cleanup()
vi.useRealTimers() vi.useRealTimers()
vi.restoreAllMocks() vi.restoreAllMocks()
setResumeExhaustedSessionId(null)
}) })
// Common stranded-window props: gateway open, route on the session, no runtime // Common stranded-window props: gateway open, route on the session, no runtime
@ -354,5 +357,55 @@ describe('useRouteResume bounded auto-retry after a failed resume', () => {
// Capped at MAX_RESUME_RETRIES (4): a persistently dead backend can't // Capped at MAX_RESUME_RETRIES (4): a persistently dead backend can't
// hot-loop the resume forever. // hot-loop the resume forever.
expect(resumeSession.mock.calls.length).toBe(4) expect(resumeSession.mock.calls.length).toBe(4)
// Once auto-retry gives up, the exhausted latch is armed for the routed
// session so the chat view can swap the perpetual loader for an explicit
// error + manual Retry instead of spinning forever.
expect($resumeExhaustedSessionId.get()).toBe('session-1')
})
it('does not arm the exhausted latch while retries remain', () => {
vi.useFakeTimers()
const resumeSession = vi.fn(async () => undefined)
const props = strandedProps(resumeSession)
const { rerender } = render(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
resumeSession.mockClear()
// Two failure cycles — still under the 4-retry cap, so the latch must stay
// clear and the loader keeps spinning (auto-recovery hasn't given up yet).
for (let i = 0; i < 2; i += 1) {
vi.advanceTimersByTime(8_000)
rerender(<RouteResumeHarness {...props} resumeFailedSessionId={null} />)
rerender(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
}
expect($resumeExhaustedSessionId.get()).toBeNull()
})
it('clears a stale exhausted latch when the route moves off the stranded session', () => {
vi.useFakeTimers()
const resumeSession = vi.fn(async () => undefined)
const props = strandedProps(resumeSession)
// Pre-arm the latch as if this session had exhausted its retries.
setResumeExhaustedSessionId('session-1')
// Route is now on a different, healthy session that is not flagged as
// failed — the retry effect's "route moved off" branch clears the latch.
render(
<RouteResumeHarness
{...props}
activeSessionId="runtime-2"
activeSessionIdRef={{ current: 'runtime-2' }}
locationPathname="/session-2"
resumeFailedSessionId={null}
routedSessionId="session-2"
selectedStoredSessionId="session-2"
selectedStoredSessionIdRef={{ current: 'session-2' }}
/>
)
expect($resumeExhaustedSessionId.get()).toBeNull()
}) })
}) })

View file

@ -1,6 +1,7 @@
import { type MutableRefObject, useEffect, useRef } from 'react' import { type MutableRefObject, useEffect, useRef } from 'react'
import { isNewChatRoute } from '@/app/routes' import { isNewChatRoute } from '@/app/routes'
import { setResumeExhaustedSessionId } from '@/store/session'
interface RouteResumeOptions { interface RouteResumeOptions {
activeSessionId: string | null activeSessionId: string | null
@ -185,10 +186,15 @@ export function useRouteResume({
if (!stranded) { if (!stranded) {
// Route moved off the stranded session (or it recovered) — reset the // Route moved off the stranded session (or it recovered) — reset the
// counter so a future failure on another session starts fresh. // counter so a future failure on another session starts fresh, and clear
// any exhausted-latch armed for a session we're no longer viewing (never
// the current route: that's the error state we want to keep showing).
// resumeSession also clears it on a fresh attempt; this covers a plain
// route-change away from the stranded window.
if (retrySessionIdRef.current !== routedSessionId) { if (retrySessionIdRef.current !== routedSessionId) {
retrySessionIdRef.current = null retrySessionIdRef.current = null
retryAttemptRef.current = 0 retryAttemptRef.current = 0
setResumeExhaustedSessionId(current => (current && current !== routedSessionId ? null : current))
} }
return return
@ -203,6 +209,11 @@ export function useRouteResume({
if (retryAttemptRef.current >= MAX_RESUME_RETRIES) { if (retryAttemptRef.current >= MAX_RESUME_RETRIES) {
// Give up auto-retrying a persistently dead backend; the user can still // Give up auto-retrying a persistently dead backend; the user can still
// reconnect / reselect (which resets the counter via the branch above). // reconnect / reselect (which resets the counter via the branch above).
// Surface an explicit error + manual Retry in the chat view instead of
// spinning the loader forever — resumeSession (manual Retry / reconnect /
// reselect) clears this latch and resets the counter for a fresh cycle.
setResumeExhaustedSessionId(routedSessionId)
return return
} }

View file

@ -38,6 +38,7 @@ import {
setFreshDraftReady, setFreshDraftReady,
setIntroSeed, setIntroSeed,
setMessages, setMessages,
setResumeExhaustedSessionId,
setResumeFailedSessionId, setResumeFailedSessionId,
setSelectedStoredSessionId, setSelectedStoredSessionId,
setSessions, setSessions,
@ -585,6 +586,10 @@ export function useSessionActions({
// must not keep treating it as stranded. It's re-armed below only if THIS // must not keep treating it as stranded. It's re-armed below only if THIS
// attempt fails terminally (RPC reject + REST fallback failure). // attempt fails terminally (RPC reject + REST fallback failure).
setResumeFailedSessionId(current => (current === storedSessionId ? null : current)) setResumeFailedSessionId(current => (current === storedSessionId ? null : current))
// Also clear the exhausted-latch: a fresh attempt (manual Retry, reconnect,
// reselect) gives the bounded auto-retry counter a clean cycle, so the
// chat view drops the error state and shows the loader again.
setResumeExhaustedSessionId(current => (current === storedSessionId ? null : current))
const warmRuntimeId = runtimeIdByStoredSessionIdRef.current.get(storedSessionId) const warmRuntimeId = runtimeIdByStoredSessionIdRef.current.get(storedSessionId)
@ -704,6 +709,7 @@ export function useSessionActions({
...(watchWindow ? { lazy: true } : {}), ...(watchWindow ? { lazy: true } : {}),
...(sessionProfile ? { profile: sessionProfile } : {}) ...(sessionProfile ? { profile: sessionProfile } : {})
}) })
// The rejection is consumed by the `await` below; this guard only // The rejection is consumed by the `await` below; this guard only
// keeps it from surfacing as unhandled while the prefetch settles. // keeps it from surfacing as unhandled while the prefetch settles.
resumePromise.catch(() => undefined) resumePromise.catch(() => undefined)

View file

@ -1842,6 +1842,9 @@ export const en: Translations = {
regenerateFailed: 'Regenerate failed', regenerateFailed: 'Regenerate failed',
editFailed: 'Edit failed', editFailed: 'Edit failed',
resumeFailed: 'Resume failed', resumeFailed: 'Resume failed',
resumeStrandedTitle: "Couldn't load this session",
resumeStrandedBody: 'The connection to this session failed and automatic retries gave up. Check that the gateway is running, then try again.',
resumeRetry: 'Retry',
nothingToBranch: 'Nothing to branch', nothingToBranch: 'Nothing to branch',
branchNeedsChat: 'Start or resume a chat before branching.', branchNeedsChat: 'Start or resume a chat before branching.',
sessionBusy: 'Session busy', sessionBusy: 'Session busy',

View file

@ -1973,6 +1973,9 @@ export const ja = defineLocale({
regenerateFailed: '再生成に失敗しました', regenerateFailed: '再生成に失敗しました',
editFailed: '編集に失敗しました', editFailed: '編集に失敗しました',
resumeFailed: '再開に失敗しました', resumeFailed: '再開に失敗しました',
resumeStrandedTitle: 'このセッションを読み込めませんでした',
resumeStrandedBody: 'このセッションへの接続に失敗し、自動再試行も停止しました。ゲートウェイが実行中か確認してから、もう一度お試しください。',
resumeRetry: '再試行',
nothingToBranch: 'ブランチするものがありません', nothingToBranch: 'ブランチするものがありません',
branchNeedsChat: 'ブランチする前にチャットを開始または再開してください。', branchNeedsChat: 'ブランチする前にチャットを開始または再開してください。',
sessionBusy: 'セッションが使用中', sessionBusy: 'セッションが使用中',

View file

@ -1480,6 +1480,9 @@ export interface Translations {
regenerateFailed: string regenerateFailed: string
editFailed: string editFailed: string
resumeFailed: string resumeFailed: string
resumeStrandedTitle: string
resumeStrandedBody: string
resumeRetry: string
nothingToBranch: string nothingToBranch: string
branchNeedsChat: string branchNeedsChat: string
sessionBusy: string sessionBusy: string

View file

@ -1913,6 +1913,9 @@ export const zhHant = defineLocale({
regenerateFailed: '重新生成失敗', regenerateFailed: '重新生成失敗',
editFailed: '編輯失敗', editFailed: '編輯失敗',
resumeFailed: '繼續失敗', resumeFailed: '繼續失敗',
resumeStrandedTitle: '無法載入此工作階段',
resumeStrandedBody: '與此工作階段的連線失敗,自動重試已停止。請確認閘道正在執行,然後重試。',
resumeRetry: '重試',
nothingToBranch: '沒有可分支的內容', nothingToBranch: '沒有可分支的內容',
branchNeedsChat: '分支前請先開始或繼續一個聊天。', branchNeedsChat: '分支前請先開始或繼續一個聊天。',
sessionBusy: '工作階段忙碌中', sessionBusy: '工作階段忙碌中',

View file

@ -2020,6 +2020,9 @@ export const zh: Translations = {
regenerateFailed: '重新生成失败', regenerateFailed: '重新生成失败',
editFailed: '编辑失败', editFailed: '编辑失败',
resumeFailed: '恢复失败', resumeFailed: '恢复失败',
resumeStrandedTitle: '无法加载此会话',
resumeStrandedBody: '与此会话的连接失败,自动重试已停止。请确认网关正在运行,然后重试。',
resumeRetry: '重试',
nothingToBranch: '没有可分支的内容', nothingToBranch: '没有可分支的内容',
branchNeedsChat: '分支前请先开始或恢复一个对话。', branchNeedsChat: '分支前请先开始或恢复一个对话。',
sessionBusy: '会话忙碌中', sessionBusy: '会话忙碌中',

View file

@ -30,6 +30,7 @@ function workspaceCwdKey(connection: HermesConnection | null = $connection.get()
const base = encodeURIComponent(connection.baseUrl || 'remote') const base = encodeURIComponent(connection.baseUrl || 'remote')
const profile = encodeURIComponent(connection.profile || 'default') const profile = encodeURIComponent(connection.profile || 'default')
return `${WORKSPACE_CWD_KEY}.remote.${base}.${profile}` return `${WORKSPACE_CWD_KEY}.remote.${base}.${profile}`
} }
@ -75,6 +76,7 @@ export async function ensureDefaultWorkspaceCwd(): Promise<void> {
if ($connection.get()?.mode === 'remote') { if ($connection.get()?.mode === 'remote') {
seedLiveCwd(remembered) seedLiveCwd(remembered)
return return
} }
@ -151,6 +153,7 @@ export function mergeSessionPage(
} }
const incomingIds = new Set(incoming.map(session => session.id)) const incomingIds = new Set(incoming.map(session => session.id))
// Deduplicate by compression lineage: when auto-compression rotates the tip // Deduplicate by compression lineage: when auto-compression rotates the tip
// id (old #4 → new #5), the incoming page carries the new tip but the // id (old #4 → new #5), the incoming page carries the new tip but the
// previous list still holds the old one. Without lineage-level dedup both // previous list still holds the old one. Without lineage-level dedup both
@ -226,6 +229,15 @@ export const $awaitingResponse = atom(false)
// resume on the next render/focus/reconnect instead of stranding the window. // resume on the next render/focus/reconnect instead of stranding the window.
// Null whenever the active route has a healthy (or in-flight) resume. // Null whenever the active route has a healthy (or in-flight) resume.
export const $resumeFailedSessionId = atom<string | null>(null) export const $resumeFailedSessionId = atom<string | null>(null)
// Stored-session id whose resume has EXHAUSTED its bounded auto-retries (the
// terminal-failure latch above kept failing through all MAX_RESUME_RETRIES
// attempts). Distinct from $resumeFailedSessionId, which is armed *during* the
// backoff window too: this fires only once auto-recovery has given up, so the
// chat view can swap the perpetual loader for an explicit error + manual Retry
// affordance. A fresh resumeSession() (manual Retry, reconnect, reselect)
// clears it and resets the retry counter. Null whenever the active route has a
// healthy, in-flight, or still-auto-retrying resume.
export const $resumeExhaustedSessionId = atom<string | null>(null)
export const $currentModel = atom(storedString(COMPOSER_MODEL_KEY) ?? '') export const $currentModel = atom(storedString(COMPOSER_MODEL_KEY) ?? '')
export const $currentProvider = atom(storedString(COMPOSER_PROVIDER_KEY) ?? '') export const $currentProvider = atom(storedString(COMPOSER_PROVIDER_KEY) ?? '')
export const $currentReasoningEffort = atom(storedString(COMPOSER_EFFORT_KEY) ?? '') export const $currentReasoningEffort = atom(storedString(COMPOSER_EFFORT_KEY) ?? '')
@ -271,6 +283,7 @@ export const setSelectedStoredSessionId = (next: Updater<string | null>) => upda
export const setMessages = (next: Updater<ChatMessage[]>) => updateAtom($messages, next) export const setMessages = (next: Updater<ChatMessage[]>) => updateAtom($messages, next)
export const setFreshDraftReady = (next: Updater<boolean>) => updateAtom($freshDraftReady, next) export const setFreshDraftReady = (next: Updater<boolean>) => updateAtom($freshDraftReady, next)
export const setResumeFailedSessionId = (next: Updater<string | null>) => updateAtom($resumeFailedSessionId, next) export const setResumeFailedSessionId = (next: Updater<string | null>) => updateAtom($resumeFailedSessionId, next)
export const setResumeExhaustedSessionId = (next: Updater<string | null>) => updateAtom($resumeExhaustedSessionId, next)
export const setBusy = (next: Updater<boolean>) => updateAtom($busy, next) export const setBusy = (next: Updater<boolean>) => updateAtom($busy, next)
export const setAwaitingResponse = (next: Updater<boolean>) => updateAtom($awaitingResponse, next) export const setAwaitingResponse = (next: Updater<boolean>) => updateAtom($awaitingResponse, next)