mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-03 12:23:08 +00:00
fix(desktop): reject cross-wired runtime-id cache on session resume
resumeSession's warm-cache fast-path trusted the storedSessionId -> runtimeId -> ClientSessionState mapping without checking the cached state still BELONGS to the session being resumed. A pooled profile backend that gets idle-reaped and respawned (pruneSecondaryGateways) re-mints runtime ids, so a recycled id can resolve to a live-but-DIFFERENT session's cache entry. The only existing guard was a session.usage 404 -- that catches a fully-dead runtime id, but a recycled id still 200s, so the fast-path happily painted the wrong transcript under the current route (open chat A, chat B loads). Fold the belongs-to check into a single takeWarmCache() helper used at BOTH cache reads -- the early transcript-keep decision and the fast-path itself -- so a cross-wired entry can't even briefly flash a stale transcript before the full resume repaints. On a mismatch the helper purges both stale map entries and reports a miss, falling through to a full resume that rebinds a correct runtime id. The full-resume path already guards its final paint with isCurrentResume(), so only the cached fast-path was missing the belongs-to check. Pre-existing bug from the initial desktop app (#20059); not introduced by the session-switch perf work (#49807), which left these lines untouched. Tests: two cases in use-session-actions.test.tsx driven through a harness that owns the two cache maps -- a cross-wired mapping is rejected + purged (the bug), and a correctly-wired cache still serves from memory with no needless refetch (no perf regression). Supersedes #50464 by @professorpalmer, reimplemented to also guard the early transcript-keep read (whole-class fix, not just the fast-path). Co-authored-by: professorpalmer <professorpalmer@users.noreply.github.com>
This commit is contained in:
parent
c6575df927
commit
f7bf740640
2 changed files with 180 additions and 5 deletions
|
|
@ -4,6 +4,7 @@ import { useEffect } from 'react'
|
||||||
import { afterEach, describe, expect, it, vi } from 'vitest'
|
import { afterEach, describe, expect, it, vi } from 'vitest'
|
||||||
|
|
||||||
import { getSessionMessages } from '@/hermes'
|
import { getSessionMessages } from '@/hermes'
|
||||||
|
import { createClientSessionState } from '@/lib/chat-runtime'
|
||||||
import { $activeGatewayProfile, $newChatProfile } from '@/store/profile'
|
import { $activeGatewayProfile, $newChatProfile } from '@/store/profile'
|
||||||
import { $currentCwd, $messages, $resumeFailedSessionId, setMessages, setResumeFailedSessionId } from '@/store/session'
|
import { $currentCwd, $messages, $resumeFailedSessionId, setMessages, setResumeFailedSessionId } from '@/store/session'
|
||||||
|
|
||||||
|
|
@ -282,3 +283,147 @@ describe('resumeSession failure recovery', () => {
|
||||||
expect(resumeParams).not.toHaveProperty('eager_build')
|
expect(resumeParams).not.toHaveProperty('eager_build')
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
interface CacheHarnessProps {
|
||||||
|
onReady: (resume: (storedSessionId: string, replaceRoute?: boolean) => Promise<unknown>) => void
|
||||||
|
requestGateway: <T>(method: string, params?: Record<string, unknown>) => Promise<T>
|
||||||
|
runtimeIdByStoredSessionIdRef: MutableRefObject<Map<string, string>>
|
||||||
|
selectedStoredSessionIdRef: MutableRefObject<string | null>
|
||||||
|
sessionStateByRuntimeIdRef: MutableRefObject<Map<string, ClientSessionState>>
|
||||||
|
}
|
||||||
|
|
||||||
|
// Harness that lets the test own the two cache maps so it can pre-seed a
|
||||||
|
// cross-wired runtime-id mapping and observe whether the warm fast-path trusts
|
||||||
|
// it. Mirrors the production wiring from use-session-state-cache.
|
||||||
|
function CacheHarness({
|
||||||
|
onReady,
|
||||||
|
requestGateway,
|
||||||
|
runtimeIdByStoredSessionIdRef,
|
||||||
|
selectedStoredSessionIdRef,
|
||||||
|
sessionStateByRuntimeIdRef
|
||||||
|
}: CacheHarnessProps) {
|
||||||
|
const ref = <T,>(value: T): MutableRefObject<T> => ({ current: value })
|
||||||
|
|
||||||
|
const actions = useSessionActions({
|
||||||
|
activeSessionId: null,
|
||||||
|
activeSessionIdRef: ref<string | null>(null),
|
||||||
|
busyRef: ref(false),
|
||||||
|
creatingSessionRef: ref(false),
|
||||||
|
ensureSessionState: () => ({}) as ClientSessionState,
|
||||||
|
getRouteToken: () => 'token',
|
||||||
|
navigate: vi.fn() as never,
|
||||||
|
requestGateway,
|
||||||
|
runtimeIdByStoredSessionIdRef,
|
||||||
|
selectedStoredSessionId: null,
|
||||||
|
selectedStoredSessionIdRef,
|
||||||
|
sessionStateByRuntimeIdRef,
|
||||||
|
syncSessionStateToView: vi.fn(),
|
||||||
|
updateSessionState: (_sessionId, updater) => updater({} as ClientSessionState)
|
||||||
|
})
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
onReady(actions.resumeSession)
|
||||||
|
}, [actions.resumeSession, onReady])
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
const clientState = (storedSessionId: string | null): ClientSessionState => createClientSessionState(storedSessionId)
|
||||||
|
|
||||||
|
describe('resumeSession warm-cache mapping integrity', () => {
|
||||||
|
afterEach(() => {
|
||||||
|
cleanup()
|
||||||
|
setResumeFailedSessionId(null)
|
||||||
|
setMessages([])
|
||||||
|
vi.restoreAllMocks()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('rejects a cross-wired runtime mapping and falls through to a full resume', async () => {
|
||||||
|
// A recycled runtime id ('rt-recycled') is mapped to 'stored-A', but its
|
||||||
|
// cached state actually belongs to a DIFFERENT session ('stored-B') — the
|
||||||
|
// exact "open chat A, chat B loads" corruption a reaped/respawned pooled
|
||||||
|
// backend can leave behind.
|
||||||
|
const runtimeIdByStoredSessionIdRef: MutableRefObject<Map<string, string>> = {
|
||||||
|
current: new Map([['stored-A', 'rt-recycled']])
|
||||||
|
}
|
||||||
|
|
||||||
|
const sessionStateByRuntimeIdRef: MutableRefObject<Map<string, ClientSessionState>> = {
|
||||||
|
current: new Map([['rt-recycled', clientState('stored-B')]])
|
||||||
|
}
|
||||||
|
|
||||||
|
const selectedStoredSessionIdRef: MutableRefObject<string | null> = { current: null }
|
||||||
|
|
||||||
|
const requestGateway = vi.fn(async (method: string, params?: Record<string, unknown>) => {
|
||||||
|
if (method === 'session.resume') {
|
||||||
|
return { session_id: 'rt-A-fresh', resumed: params?.session_id, messages: [], info: {} } as never
|
||||||
|
}
|
||||||
|
|
||||||
|
return {} as never
|
||||||
|
})
|
||||||
|
|
||||||
|
vi.mocked(getSessionMessages).mockResolvedValue({ messages: [] } as never)
|
||||||
|
|
||||||
|
let resume: ((storedSessionId: string, replaceRoute?: boolean) => Promise<unknown>) | null = null
|
||||||
|
render(
|
||||||
|
<CacheHarness
|
||||||
|
onReady={r => (resume = r)}
|
||||||
|
requestGateway={requestGateway}
|
||||||
|
runtimeIdByStoredSessionIdRef={runtimeIdByStoredSessionIdRef}
|
||||||
|
selectedStoredSessionIdRef={selectedStoredSessionIdRef}
|
||||||
|
sessionStateByRuntimeIdRef={sessionStateByRuntimeIdRef}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
await waitFor(() => expect(resume).not.toBeNull())
|
||||||
|
await resume!('stored-A', true)
|
||||||
|
|
||||||
|
// The fast-path did NOT short-circuit on the cross-wired cache — the full
|
||||||
|
// resume RPC ran, for the session that was actually requested.
|
||||||
|
const resumeCalls = requestGateway.mock.calls.filter(([method]) => method === 'session.resume')
|
||||||
|
expect(resumeCalls.length).toBe(1)
|
||||||
|
expect(resumeCalls[0][1]).toMatchObject({ session_id: 'stored-A' })
|
||||||
|
|
||||||
|
// The corrupt mapping was purged so it can't mis-resolve again.
|
||||||
|
expect(runtimeIdByStoredSessionIdRef.current.has('stored-A')).toBe(false)
|
||||||
|
expect(sessionStateByRuntimeIdRef.current.has('rt-recycled')).toBe(false)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('honours a warm cache entry whose stored id matches (no needless refetch)', async () => {
|
||||||
|
// Correctly-wired mapping: 'rt-A' <-> 'stored-A'. The fast-path should trust
|
||||||
|
// it and never reach session.resume (only the lightweight usage probe).
|
||||||
|
const runtimeIdByStoredSessionIdRef: MutableRefObject<Map<string, string>> = {
|
||||||
|
current: new Map([['stored-A', 'rt-A']])
|
||||||
|
}
|
||||||
|
|
||||||
|
const sessionStateByRuntimeIdRef: MutableRefObject<Map<string, ClientSessionState>> = {
|
||||||
|
current: new Map([['rt-A', clientState('stored-A')]])
|
||||||
|
}
|
||||||
|
|
||||||
|
const selectedStoredSessionIdRef: MutableRefObject<string | null> = { current: null }
|
||||||
|
|
||||||
|
const requestGateway = vi.fn(async (method: string) => {
|
||||||
|
if (method === 'session.usage') {
|
||||||
|
return { input: 0, output: 0, total: 0 } as never
|
||||||
|
}
|
||||||
|
|
||||||
|
return {} as never
|
||||||
|
})
|
||||||
|
|
||||||
|
let resume: ((storedSessionId: string, replaceRoute?: boolean) => Promise<unknown>) | null = null
|
||||||
|
render(
|
||||||
|
<CacheHarness
|
||||||
|
onReady={r => (resume = r)}
|
||||||
|
requestGateway={requestGateway}
|
||||||
|
runtimeIdByStoredSessionIdRef={runtimeIdByStoredSessionIdRef}
|
||||||
|
selectedStoredSessionIdRef={selectedStoredSessionIdRef}
|
||||||
|
sessionStateByRuntimeIdRef={sessionStateByRuntimeIdRef}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
await waitFor(() => expect(resume).not.toBeNull())
|
||||||
|
await resume!('stored-A', true)
|
||||||
|
|
||||||
|
// Fast-path served the session from cache: no full resume RPC, mapping intact.
|
||||||
|
const methods = requestGateway.mock.calls.map(([method]) => method)
|
||||||
|
expect(methods).not.toContain('session.resume')
|
||||||
|
expect(runtimeIdByStoredSessionIdRef.current.get('stored-A')).toBe('rt-A')
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
|
||||||
|
|
@ -591,9 +591,34 @@ export function useSessionActions({
|
||||||
// chat view drops the error state and shows the loader again.
|
// chat view drops the error state and shows the loader again.
|
||||||
setResumeExhaustedSessionId(current => (current === storedSessionId ? null : current))
|
setResumeExhaustedSessionId(current => (current === storedSessionId ? null : current))
|
||||||
|
|
||||||
const warmRuntimeId = runtimeIdByStoredSessionIdRef.current.get(storedSessionId)
|
// A warm cache entry is only trustworthy when it still BELONGS to the
|
||||||
|
// session being resumed. A pooled profile backend that gets idle-reaped
|
||||||
|
// and respawned (pruneSecondaryGateways) re-mints runtime ids, so a
|
||||||
|
// recycled id can resolve to a live-but-DIFFERENT session's cache entry.
|
||||||
|
// The session.usage 404 guard below only catches a fully-DEAD id — a
|
||||||
|
// recycled-live id 200s, so an unchecked hit paints the wrong transcript
|
||||||
|
// under the current route (the "open chat A, chat B loads" bug). On a
|
||||||
|
// mismatch the mapping is cross-wired: purge both sides and report a miss
|
||||||
|
// so the caller falls through to a full resume that rebinds a correct id.
|
||||||
|
const takeWarmCache = (): { runtimeId: string; state: ClientSessionState } | null => {
|
||||||
|
const runtimeId = runtimeIdByStoredSessionIdRef.current.get(storedSessionId)
|
||||||
|
const state = runtimeId ? sessionStateByRuntimeIdRef.current.get(runtimeId) : undefined
|
||||||
|
|
||||||
if (!warmRuntimeId || !sessionStateByRuntimeIdRef.current.get(warmRuntimeId)) {
|
if (!runtimeId || !state) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state.storedSessionId !== storedSessionId) {
|
||||||
|
runtimeIdByStoredSessionIdRef.current.delete(storedSessionId)
|
||||||
|
sessionStateByRuntimeIdRef.current.delete(runtimeId)
|
||||||
|
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
return { runtimeId, state }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!takeWarmCache()) {
|
||||||
setActiveSessionId(null)
|
setActiveSessionId(null)
|
||||||
activeSessionIdRef.current = null
|
activeSessionIdRef.current = null
|
||||||
setMessages([])
|
setMessages([])
|
||||||
|
|
@ -612,10 +637,14 @@ export function useSessionActions({
|
||||||
|
|
||||||
await ensureGatewayProfile(sessionProfile)
|
await ensureGatewayProfile(sessionProfile)
|
||||||
|
|
||||||
const cachedRuntimeId = runtimeIdByStoredSessionIdRef.current.get(storedSessionId)
|
// Re-check after the profile-resolve / gateway-swap awaits above: the
|
||||||
const cachedState = cachedRuntimeId && sessionStateByRuntimeIdRef.current.get(cachedRuntimeId)
|
// cache may have changed, and takeWarmCache re-validates belongs-to and
|
||||||
|
// purges a cross-wired mapping before we trust the fast-path.
|
||||||
|
const warmHit = takeWarmCache()
|
||||||
|
|
||||||
if (cachedRuntimeId && cachedState) {
|
if (warmHit) {
|
||||||
|
const cachedRuntimeId = warmHit.runtimeId
|
||||||
|
const cachedState = warmHit.state
|
||||||
const stored = $sessions.get().find(session => session.id === storedSessionId)
|
const stored = $sessions.get().find(session => session.id === storedSessionId)
|
||||||
|
|
||||||
const cachedViewState =
|
const cachedViewState =
|
||||||
|
|
@ -714,6 +743,7 @@ export function useSessionActions({
|
||||||
...(watchWindow ? { lazy: true } : {}),
|
...(watchWindow ? { lazy: true } : {}),
|
||||||
...(sessionProfile ? { profile: sessionProfile } : {})
|
...(sessionProfile ? { profile: sessionProfile } : {})
|
||||||
})
|
})
|
||||||
|
|
||||||
// The rejection is consumed by the `await` below; this guard only
|
// The rejection is consumed by the `await` below; this guard only
|
||||||
// keeps it from surfacing as unhandled while the prefetch settles.
|
// keeps it from surfacing as unhandled while the prefetch settles.
|
||||||
resumePromise.catch(() => undefined)
|
resumePromise.catch(() => undefined)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue