mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-19 10:02:16 +00:00
feat(desktop): show error + manual Retry when resume retries exhaust
When a stranded session window's bounded auto-retry gives up (gateway resume RPC + REST fallback fail through all MAX_RESUME_RETRIES attempts), the loader latched forever. Add a $resumeExhaustedSessionId atom armed at the give-up point so the chat view swaps the perpetual spinner for an explicit error state + manual Retry button. Retry / reconnect / reselect clears the latch and resets the auto-retry counter for a fresh cycle; a route-change away from the stranded session also clears it. Distinct from $resumeFailedSessionId (armed during the backoff window) so the error UI only appears once auto-recovery has actually given up, not mid-retry. Adds i18n strings across en/ja/zh/zh-hant and 3 tests covering latch-arms-on-exhaustion, stays-clear-while-retries-remain, and clears-on-route-change.
This commit is contained in:
parent
1e2c91eaff
commit
253bfc0e3d
11 changed files with 136 additions and 3 deletions
|
|
@ -15,7 +15,9 @@ import { Backdrop } from '@/components/Backdrop'
|
|||
import { PromptOverlays } from '@/components/prompt-overlays'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Codicon } from '@/components/ui/codicon'
|
||||
import { ErrorState } from '@/components/ui/error-state'
|
||||
import { getGlobalModelOptions, type HermesGateway } from '@/hermes'
|
||||
import { useI18n } from '@/i18n'
|
||||
import type { ChatMessage } from '@/lib/chat-messages'
|
||||
import { quickModelOptions, sessionTitle, toRuntimeMessage } from '@/lib/chat-runtime'
|
||||
import { useIncrementalExternalStoreRuntime } from '@/lib/incremental-external-store-runtime'
|
||||
|
|
@ -38,6 +40,7 @@ import {
|
|||
$lastVisibleMessageIsUser,
|
||||
$messages,
|
||||
$messagesEmpty,
|
||||
$resumeExhaustedSessionId,
|
||||
$selectedStoredSessionId,
|
||||
$sessions,
|
||||
sessionPinId
|
||||
|
|
@ -86,6 +89,7 @@ interface ChatViewProps extends Omit<React.ComponentProps<'div'>, 'onSubmit'> {
|
|||
onEdit: (message: AppendMessage) => Promise<void>
|
||||
onReload: (parentId: string | null) => Promise<void>
|
||||
onRestoreToMessage?: (messageId: string) => Promise<void>
|
||||
onRetryResume: (sessionId: string) => void
|
||||
onTranscribeAudio?: (audio: Blob) => Promise<string>
|
||||
}
|
||||
|
||||
|
|
@ -272,9 +276,11 @@ export function ChatView({
|
|||
onEdit,
|
||||
onReload,
|
||||
onRestoreToMessage,
|
||||
onRetryResume,
|
||||
onTranscribeAudio
|
||||
}: ChatViewProps) {
|
||||
const location = useLocation()
|
||||
const { t } = useI18n()
|
||||
const activeSessionId = useStore($activeSessionId)
|
||||
const awaitingResponse = useStore($awaitingResponse)
|
||||
const busy = useStore($busy)
|
||||
|
|
@ -296,6 +302,7 @@ export function ChatView({
|
|||
const messagesEmpty = useStore($messagesEmpty)
|
||||
const lastVisibleIsUser = useStore($lastVisibleMessageIsUser)
|
||||
const selectedSessionId = useStore($selectedStoredSessionId)
|
||||
const resumeExhaustedSessionId = useStore($resumeExhaustedSessionId)
|
||||
const routedSessionId = routeSessionId(location.pathname)
|
||||
const isRoutedSessionView = Boolean(routedSessionId)
|
||||
|
||||
|
|
@ -315,9 +322,21 @@ export function ChatView({
|
|||
// session exists — even if it has zero messages (a brand-new routed
|
||||
// session). The flicker where `busy` flips true briefly during hydrate
|
||||
// is handled by `threadLoadingState`'s last-visible-user gate.
|
||||
const loadingSession = isRoutedSessionView && (routeSessionMismatch || (messagesEmpty && !activeSessionId))
|
||||
//
|
||||
// resumeExhausted: the bounded auto-retry in use-route-resume gave up on this
|
||||
// routed session (gateway RPC + REST fallback failed through every attempt).
|
||||
// Suppress the loader and show an explicit error + manual Retry instead of
|
||||
// spinning forever. Gated on the route matching so a stale latch from another
|
||||
// session can't blank the current one.
|
||||
const resumeExhausted = isRoutedSessionView && resumeExhaustedSessionId === routedSessionId
|
||||
|
||||
const loadingSession =
|
||||
!resumeExhausted && isRoutedSessionView && (routeSessionMismatch || (messagesEmpty && !activeSessionId))
|
||||
|
||||
const threadLoading = threadLoadingState(loadingSession, busy, awaitingResponse, lastVisibleIsUser)
|
||||
const showChatBar = !loadingSession
|
||||
// Hide the composer in the exhausted error state too: there's no live runtime
|
||||
// to send to until a retry rebinds one.
|
||||
const showChatBar = !loadingSession && !resumeExhausted
|
||||
const threadKey = selectedSessionId || activeSessionId || (isRoutedSessionView ? location.pathname : 'new')
|
||||
|
||||
const modelOptionsQuery = useQuery<ModelOptionsResponse>({
|
||||
|
|
@ -465,6 +484,21 @@ export function ChatView({
|
|||
</Suspense>
|
||||
)}
|
||||
</ChatRuntimeBoundary>
|
||||
{resumeExhausted && routedSessionId && (
|
||||
<div className="absolute inset-0 z-10 grid place-items-center bg-(--ui-chat-surface-background) px-8 py-10">
|
||||
<ErrorState
|
||||
className="max-w-sm"
|
||||
description={t.desktop.resumeStrandedBody}
|
||||
title={t.desktop.resumeStrandedTitle}
|
||||
>
|
||||
<div className="grid justify-items-center">
|
||||
<Button onClick={() => onRetryResume(routedSessionId)} size="sm" variant="outline">
|
||||
{t.desktop.resumeRetry}
|
||||
</Button>
|
||||
</div>
|
||||
</ErrorState>
|
||||
</div>
|
||||
)}
|
||||
{showChatBar && <ScrollToBottomButton />}
|
||||
<ChatDropOverlay kind={dragKind} />
|
||||
<ChatSwapOverlay profile={gatewaySwapTarget} />
|
||||
|
|
|
|||
|
|
@ -1005,6 +1005,7 @@ export function DesktopController() {
|
|||
onReload={reloadFromMessage}
|
||||
onRemoveAttachment={id => void composer.removeAttachment(id)}
|
||||
onRestoreToMessage={restoreToMessage}
|
||||
onRetryResume={sessionId => void resumeSession(sessionId, true)}
|
||||
onSteer={steerPrompt}
|
||||
onSubmit={submitText}
|
||||
onThreadMessagesChange={handleThreadMessagesChange}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ import { cleanup, render } from '@testing-library/react'
|
|||
import type { MutableRefObject } from 'react'
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest'
|
||||
|
||||
import { $resumeExhaustedSessionId, setResumeExhaustedSessionId } from '@/store/session'
|
||||
|
||||
import { useRouteResume } from './use-route-resume'
|
||||
|
||||
interface HarnessProps {
|
||||
|
|
@ -263,6 +265,7 @@ describe('useRouteResume bounded auto-retry after a failed resume', () => {
|
|||
cleanup()
|
||||
vi.useRealTimers()
|
||||
vi.restoreAllMocks()
|
||||
setResumeExhaustedSessionId(null)
|
||||
})
|
||||
|
||||
// Common stranded-window props: gateway open, route on the session, no runtime
|
||||
|
|
@ -354,5 +357,55 @@ describe('useRouteResume bounded auto-retry after a failed resume', () => {
|
|||
// Capped at MAX_RESUME_RETRIES (4): a persistently dead backend can't
|
||||
// hot-loop the resume forever.
|
||||
expect(resumeSession.mock.calls.length).toBe(4)
|
||||
|
||||
// Once auto-retry gives up, the exhausted latch is armed for the routed
|
||||
// session so the chat view can swap the perpetual loader for an explicit
|
||||
// error + manual Retry instead of spinning forever.
|
||||
expect($resumeExhaustedSessionId.get()).toBe('session-1')
|
||||
})
|
||||
|
||||
it('does not arm the exhausted latch while retries remain', () => {
|
||||
vi.useFakeTimers()
|
||||
const resumeSession = vi.fn(async () => undefined)
|
||||
const props = strandedProps(resumeSession)
|
||||
|
||||
const { rerender } = render(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
|
||||
resumeSession.mockClear()
|
||||
|
||||
// Two failure cycles — still under the 4-retry cap, so the latch must stay
|
||||
// clear and the loader keeps spinning (auto-recovery hasn't given up yet).
|
||||
for (let i = 0; i < 2; i += 1) {
|
||||
vi.advanceTimersByTime(8_000)
|
||||
rerender(<RouteResumeHarness {...props} resumeFailedSessionId={null} />)
|
||||
rerender(<RouteResumeHarness {...props} resumeFailedSessionId="session-1" />)
|
||||
}
|
||||
|
||||
expect($resumeExhaustedSessionId.get()).toBeNull()
|
||||
})
|
||||
|
||||
it('clears a stale exhausted latch when the route moves off the stranded session', () => {
|
||||
vi.useFakeTimers()
|
||||
const resumeSession = vi.fn(async () => undefined)
|
||||
const props = strandedProps(resumeSession)
|
||||
|
||||
// Pre-arm the latch as if this session had exhausted its retries.
|
||||
setResumeExhaustedSessionId('session-1')
|
||||
|
||||
// Route is now on a different, healthy session that is not flagged as
|
||||
// failed — the retry effect's "route moved off" branch clears the latch.
|
||||
render(
|
||||
<RouteResumeHarness
|
||||
{...props}
|
||||
activeSessionId="runtime-2"
|
||||
activeSessionIdRef={{ current: 'runtime-2' }}
|
||||
locationPathname="/session-2"
|
||||
resumeFailedSessionId={null}
|
||||
routedSessionId="session-2"
|
||||
selectedStoredSessionId="session-2"
|
||||
selectedStoredSessionIdRef={{ current: 'session-2' }}
|
||||
/>
|
||||
)
|
||||
|
||||
expect($resumeExhaustedSessionId.get()).toBeNull()
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { type MutableRefObject, useEffect, useRef } from 'react'
|
||||
|
||||
import { isNewChatRoute } from '@/app/routes'
|
||||
import { setResumeExhaustedSessionId } from '@/store/session'
|
||||
|
||||
interface RouteResumeOptions {
|
||||
activeSessionId: string | null
|
||||
|
|
@ -185,10 +186,15 @@ export function useRouteResume({
|
|||
|
||||
if (!stranded) {
|
||||
// Route moved off the stranded session (or it recovered) — reset the
|
||||
// counter so a future failure on another session starts fresh.
|
||||
// counter so a future failure on another session starts fresh, and clear
|
||||
// any exhausted-latch armed for a session we're no longer viewing (never
|
||||
// the current route: that's the error state we want to keep showing).
|
||||
// resumeSession also clears it on a fresh attempt; this covers a plain
|
||||
// route-change away from the stranded window.
|
||||
if (retrySessionIdRef.current !== routedSessionId) {
|
||||
retrySessionIdRef.current = null
|
||||
retryAttemptRef.current = 0
|
||||
setResumeExhaustedSessionId(current => (current && current !== routedSessionId ? null : current))
|
||||
}
|
||||
|
||||
return
|
||||
|
|
@ -203,6 +209,11 @@ export function useRouteResume({
|
|||
if (retryAttemptRef.current >= MAX_RESUME_RETRIES) {
|
||||
// Give up auto-retrying a persistently dead backend; the user can still
|
||||
// reconnect / reselect (which resets the counter via the branch above).
|
||||
// Surface an explicit error + manual Retry in the chat view instead of
|
||||
// spinning the loader forever — resumeSession (manual Retry / reconnect /
|
||||
// reselect) clears this latch and resets the counter for a fresh cycle.
|
||||
setResumeExhaustedSessionId(routedSessionId)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ import {
|
|||
setFreshDraftReady,
|
||||
setIntroSeed,
|
||||
setMessages,
|
||||
setResumeExhaustedSessionId,
|
||||
setResumeFailedSessionId,
|
||||
setSelectedStoredSessionId,
|
||||
setSessions,
|
||||
|
|
@ -585,6 +586,10 @@ export function useSessionActions({
|
|||
// must not keep treating it as stranded. It's re-armed below only if THIS
|
||||
// attempt fails terminally (RPC reject + REST fallback failure).
|
||||
setResumeFailedSessionId(current => (current === storedSessionId ? null : current))
|
||||
// Also clear the exhausted-latch: a fresh attempt (manual Retry, reconnect,
|
||||
// reselect) gives the bounded auto-retry counter a clean cycle, so the
|
||||
// chat view drops the error state and shows the loader again.
|
||||
setResumeExhaustedSessionId(current => (current === storedSessionId ? null : current))
|
||||
|
||||
const warmRuntimeId = runtimeIdByStoredSessionIdRef.current.get(storedSessionId)
|
||||
|
||||
|
|
@ -704,6 +709,7 @@ export function useSessionActions({
|
|||
...(watchWindow ? { lazy: true } : {}),
|
||||
...(sessionProfile ? { profile: sessionProfile } : {})
|
||||
})
|
||||
|
||||
// The rejection is consumed by the `await` below; this guard only
|
||||
// keeps it from surfacing as unhandled while the prefetch settles.
|
||||
resumePromise.catch(() => undefined)
|
||||
|
|
|
|||
|
|
@ -1842,6 +1842,9 @@ export const en: Translations = {
|
|||
regenerateFailed: 'Regenerate failed',
|
||||
editFailed: 'Edit failed',
|
||||
resumeFailed: 'Resume failed',
|
||||
resumeStrandedTitle: "Couldn't load this session",
|
||||
resumeStrandedBody: 'The connection to this session failed and automatic retries gave up. Check that the gateway is running, then try again.',
|
||||
resumeRetry: 'Retry',
|
||||
nothingToBranch: 'Nothing to branch',
|
||||
branchNeedsChat: 'Start or resume a chat before branching.',
|
||||
sessionBusy: 'Session busy',
|
||||
|
|
|
|||
|
|
@ -1973,6 +1973,9 @@ export const ja = defineLocale({
|
|||
regenerateFailed: '再生成に失敗しました',
|
||||
editFailed: '編集に失敗しました',
|
||||
resumeFailed: '再開に失敗しました',
|
||||
resumeStrandedTitle: 'このセッションを読み込めませんでした',
|
||||
resumeStrandedBody: 'このセッションへの接続に失敗し、自動再試行も停止しました。ゲートウェイが実行中か確認してから、もう一度お試しください。',
|
||||
resumeRetry: '再試行',
|
||||
nothingToBranch: 'ブランチするものがありません',
|
||||
branchNeedsChat: 'ブランチする前にチャットを開始または再開してください。',
|
||||
sessionBusy: 'セッションが使用中',
|
||||
|
|
|
|||
|
|
@ -1480,6 +1480,9 @@ export interface Translations {
|
|||
regenerateFailed: string
|
||||
editFailed: string
|
||||
resumeFailed: string
|
||||
resumeStrandedTitle: string
|
||||
resumeStrandedBody: string
|
||||
resumeRetry: string
|
||||
nothingToBranch: string
|
||||
branchNeedsChat: string
|
||||
sessionBusy: string
|
||||
|
|
|
|||
|
|
@ -1913,6 +1913,9 @@ export const zhHant = defineLocale({
|
|||
regenerateFailed: '重新生成失敗',
|
||||
editFailed: '編輯失敗',
|
||||
resumeFailed: '繼續失敗',
|
||||
resumeStrandedTitle: '無法載入此工作階段',
|
||||
resumeStrandedBody: '與此工作階段的連線失敗,自動重試已停止。請確認閘道正在執行,然後重試。',
|
||||
resumeRetry: '重試',
|
||||
nothingToBranch: '沒有可分支的內容',
|
||||
branchNeedsChat: '分支前請先開始或繼續一個聊天。',
|
||||
sessionBusy: '工作階段忙碌中',
|
||||
|
|
|
|||
|
|
@ -2020,6 +2020,9 @@ export const zh: Translations = {
|
|||
regenerateFailed: '重新生成失败',
|
||||
editFailed: '编辑失败',
|
||||
resumeFailed: '恢复失败',
|
||||
resumeStrandedTitle: '无法加载此会话',
|
||||
resumeStrandedBody: '与此会话的连接失败,自动重试已停止。请确认网关正在运行,然后重试。',
|
||||
resumeRetry: '重试',
|
||||
nothingToBranch: '没有可分支的内容',
|
||||
branchNeedsChat: '分支前请先开始或恢复一个对话。',
|
||||
sessionBusy: '会话忙碌中',
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ function workspaceCwdKey(connection: HermesConnection | null = $connection.get()
|
|||
|
||||
const base = encodeURIComponent(connection.baseUrl || 'remote')
|
||||
const profile = encodeURIComponent(connection.profile || 'default')
|
||||
|
||||
return `${WORKSPACE_CWD_KEY}.remote.${base}.${profile}`
|
||||
}
|
||||
|
||||
|
|
@ -75,6 +76,7 @@ export async function ensureDefaultWorkspaceCwd(): Promise<void> {
|
|||
|
||||
if ($connection.get()?.mode === 'remote') {
|
||||
seedLiveCwd(remembered)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -151,6 +153,7 @@ export function mergeSessionPage(
|
|||
}
|
||||
|
||||
const incomingIds = new Set(incoming.map(session => session.id))
|
||||
|
||||
// Deduplicate by compression lineage: when auto-compression rotates the tip
|
||||
// id (old #4 → new #5), the incoming page carries the new tip but the
|
||||
// previous list still holds the old one. Without lineage-level dedup both
|
||||
|
|
@ -226,6 +229,15 @@ export const $awaitingResponse = atom(false)
|
|||
// resume on the next render/focus/reconnect instead of stranding the window.
|
||||
// Null whenever the active route has a healthy (or in-flight) resume.
|
||||
export const $resumeFailedSessionId = atom<string | null>(null)
|
||||
// Stored-session id whose resume has EXHAUSTED its bounded auto-retries (the
|
||||
// terminal-failure latch above kept failing through all MAX_RESUME_RETRIES
|
||||
// attempts). Distinct from $resumeFailedSessionId, which is armed *during* the
|
||||
// backoff window too: this fires only once auto-recovery has given up, so the
|
||||
// chat view can swap the perpetual loader for an explicit error + manual Retry
|
||||
// affordance. A fresh resumeSession() (manual Retry, reconnect, reselect)
|
||||
// clears it and resets the retry counter. Null whenever the active route has a
|
||||
// healthy, in-flight, or still-auto-retrying resume.
|
||||
export const $resumeExhaustedSessionId = atom<string | null>(null)
|
||||
export const $currentModel = atom(storedString(COMPOSER_MODEL_KEY) ?? '')
|
||||
export const $currentProvider = atom(storedString(COMPOSER_PROVIDER_KEY) ?? '')
|
||||
export const $currentReasoningEffort = atom(storedString(COMPOSER_EFFORT_KEY) ?? '')
|
||||
|
|
@ -271,6 +283,7 @@ export const setSelectedStoredSessionId = (next: Updater<string | null>) => upda
|
|||
export const setMessages = (next: Updater<ChatMessage[]>) => updateAtom($messages, next)
|
||||
export const setFreshDraftReady = (next: Updater<boolean>) => updateAtom($freshDraftReady, next)
|
||||
export const setResumeFailedSessionId = (next: Updater<string | null>) => updateAtom($resumeFailedSessionId, next)
|
||||
export const setResumeExhaustedSessionId = (next: Updater<string | null>) => updateAtom($resumeExhaustedSessionId, next)
|
||||
export const setBusy = (next: Updater<boolean>) => updateAtom($busy, next)
|
||||
export const setAwaitingResponse = (next: Updater<boolean>) => updateAtom($awaitingResponse, next)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue