mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-28 11:32:22 +00:00
perf(desktop): make deferred resume the default, not an opt-in flag
Per review: gating the faster path behind a `defer_build` flag that the
only caller always sends is pointless. Flip it — `session.resume` now
defers the agent build by default for every caller (desktop + Ink TUI);
a caller that needs the agent built synchronously passes `eager_build:
true` (used by the build-race test). The desktop no longer sends a flag.
While verifying the flip, fixed two real parity gaps the deferred path
had vs the old eager (`_init_session`) path:
- `_enable_gateway_prompts()` was never called on a deferred resume, so
approvals/clarify wouldn't route through the gateway prompt callbacks.
- `_start_agent_build` never wired `background_review_callback` /
`memory_notifications`, so a deferred-built session's self-improvement
"💾 …" summary leaked to stdout instead of rendering in-transcript.
Wiring it there also fixes it for `session.create` sessions, which
build through the same path.
ACP is unaffected (it uses its own session_manager, not this RPC); the
Ink TUI already consumes the same lazy `info` shape from session.create
and upgrades on the later `session.info` event.
This commit is contained in:
parent
c4c590e4a1
commit
3bf00e459a
4 changed files with 54 additions and 32 deletions
|
|
@ -257,10 +257,11 @@ describe('resumeSession failure recovery', () => {
|
|||
expect($resumeFailedSessionId.get()).toBeNull()
|
||||
})
|
||||
|
||||
it('asks the backend to DEFER the agent build on a normal cold resume', async () => {
|
||||
// The switch-latency fix: a non-watch cold resume tells the gateway to
|
||||
// return the transcript immediately and build the agent in the background,
|
||||
// rather than blocking the RPC (and the whole switch) on _make_agent.
|
||||
it('resumes via the gateway default (deferred build) — not lazy, no eager opt-out', async () => {
|
||||
// The switch-latency fix lives backend-side: a normal cold resume gets the
|
||||
// gateway's default DEFERRED build (transcript returns immediately, agent
|
||||
// pre-warms in the background). The client must NOT force the synchronous
|
||||
// path (eager_build) and is only `lazy` for subagent watch windows.
|
||||
let resumeParams: Record<string, unknown> | undefined
|
||||
|
||||
const requestGateway = vi.fn(async (method: string, params?: Record<string, unknown>) => {
|
||||
|
|
@ -277,8 +278,7 @@ describe('resumeSession failure recovery', () => {
|
|||
|
||||
await runResume(requestGateway)
|
||||
|
||||
expect(resumeParams).toMatchObject({ defer_build: true })
|
||||
// Watch-window lazy attach is the OTHER mode; a normal resume isn't lazy.
|
||||
expect(resumeParams).not.toHaveProperty('lazy')
|
||||
expect(resumeParams).not.toHaveProperty('eager_build')
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -706,12 +706,12 @@ export function useSessionActions({
|
|||
const resumePromise = requestGateway<SessionResumeResponse>('session.resume', {
|
||||
session_id: storedSessionId,
|
||||
cols: 96,
|
||||
// Watch windows attach lazily (live mirror); every other cold resume
|
||||
// asks the backend to DEFER the agent build so the RPC returns the
|
||||
// transcript immediately instead of blocking the switch on
|
||||
// _make_agent (MCP discovery / prompt build). The agent pre-warms in
|
||||
// the background and the prefetch above paints the transcript.
|
||||
...(watchWindow ? { lazy: true } : { defer_build: true }),
|
||||
// Watch windows attach lazily (live mirror). Every other cold resume
|
||||
// gets the gateway's default deferred build: the RPC returns the
|
||||
// transcript immediately instead of blocking the switch on _make_agent
|
||||
// (MCP discovery / prompt build), and the agent pre-warms in the
|
||||
// background while the prefetch above paints the transcript.
|
||||
...(watchWindow ? { lazy: true } : {}),
|
||||
...(sessionProfile ? { profile: sessionProfile } : {})
|
||||
})
|
||||
// The rejection is consumed by the `await` below; this guard only
|
||||
|
|
|
|||
|
|
@ -323,7 +323,9 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch):
|
|||
{
|
||||
"id": "r1",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": "20260409_010101_abc123", "cols": 100},
|
||||
# eager_build: exercise the synchronous build path (this test
|
||||
# monkeypatches _make_agent/_init_session/_session_info).
|
||||
"params": {"session_id": "20260409_010101_abc123", "cols": 100, "eager_build": True},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -336,11 +338,12 @@ def test_session_resume_returns_hydrated_messages(server, monkeypatch):
|
|||
]
|
||||
|
||||
|
||||
def test_session_resume_defer_build_returns_transcript_without_blocking(server, monkeypatch):
|
||||
"""``defer_build: true`` (desktop cold resume) must return the full display
|
||||
def test_session_resume_defaults_to_deferred_build(server, monkeypatch):
|
||||
"""A normal cold resume (no ``eager_build``) must return the full display
|
||||
transcript immediately and register an upgradable live session WITHOUT
|
||||
building the agent on the response path — that eager build is the
|
||||
multi-second switch latency."""
|
||||
multi-second switch latency. Deferred is the default; ``eager_build: true``
|
||||
opts back into the synchronous path."""
|
||||
|
||||
target = "20260409_010101_abc123"
|
||||
|
||||
|
|
@ -382,7 +385,7 @@ def test_session_resume_defer_build_returns_transcript_without_blocking(server,
|
|||
{
|
||||
"id": "r1",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": target, "cols": 100, "defer_build": True},
|
||||
"params": {"session_id": target, "cols": 100},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -514,7 +517,7 @@ def test_session_resume_handles_multimodal_list_content(server, monkeypatch):
|
|||
{
|
||||
"id": "r1",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": "20260502_000000_listcontent", "cols": 100},
|
||||
"params": {"session_id": "20260502_000000_listcontent", "cols": 100, "eager_build": True},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -828,7 +831,9 @@ def test_session_resume_reuses_existing_live_session(server, monkeypatch):
|
|||
{
|
||||
"id": "first",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": target, "cols": 100},
|
||||
# eager_build: this test drives the synchronous build race +
|
||||
# double-checked locking that only the eager path exercises.
|
||||
"params": {"session_id": target, "cols": 100, "eager_build": True},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -843,7 +848,7 @@ def test_session_resume_reuses_existing_live_session(server, monkeypatch):
|
|||
{
|
||||
"id": "second",
|
||||
"method": "session.resume",
|
||||
"params": {"session_id": target, "cols": 120},
|
||||
"params": {"session_id": target, "cols": 120, "eager_build": True},
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1234,6 +1234,19 @@ def _start_agent_build(sid: str, session: dict) -> None:
|
|||
pass
|
||||
|
||||
_wire_callbacks(sid)
|
||||
# Surface the self-improvement review's "💾 …" summary as an event
|
||||
# the TUI/desktop render in-transcript, honoring
|
||||
# display.memory_notifications. _init_session wires this for the
|
||||
# eager/branch paths; deferred-built sessions (session.create and the
|
||||
# default cold resume) build through here, so without this their
|
||||
# review summaries would leak to stdout instead of the chat.
|
||||
try:
|
||||
agent.background_review_callback = lambda message, _sid=sid: _emit(
|
||||
"review.summary", _sid, {"text": str(message)}
|
||||
)
|
||||
agent.memory_notifications = _load_memory_notifications()
|
||||
except Exception:
|
||||
pass
|
||||
# Hydrate credits notices at session OPEN (not just on the first
|
||||
# message), so depletion / usage-band warnings show at "ready". Runs
|
||||
# off the build thread, after the notice_callback is wired. Fail-open.
|
||||
|
|
@ -5044,23 +5057,27 @@ def _(rid, params: dict) -> dict:
|
|||
},
|
||||
)
|
||||
|
||||
# Deferred build (desktop cold resume): register the live session and read
|
||||
# its stored transcript WITHOUT building the agent on the response path.
|
||||
# _make_agent can block for seconds (MCP discovery, prompt/skill build,
|
||||
# AIAgent construction), and the desktop awaits this RPC before it paints
|
||||
# the chat — so the eager build below is the bulk of the multi-second
|
||||
# "switching sessions is frozen" latency. We return the full display
|
||||
# transcript immediately and pre-warm the agent on a short timer (the same
|
||||
# deferred-build contract session.create uses); _sess() also builds on
|
||||
# demand if the first prompt beats the timer. Distinct from the lazy/watch
|
||||
# Cold resume default: register the live session and read its stored
|
||||
# transcript, but build the agent OFF the response path. _make_agent can
|
||||
# block for seconds (MCP discovery, prompt/skill build, AIAgent
|
||||
# construction), and every resume caller (desktop + Ink TUI) awaits this RPC
|
||||
# before it paints — so building eagerly is the bulk of the multi-second
|
||||
# "switching sessions is frozen" latency. Return the full display transcript
|
||||
# immediately and pre-warm the agent on a short timer (the same deferred-
|
||||
# build contract session.create uses); _sess() also builds on demand if the
|
||||
# first prompt beats the timer. A caller that needs the agent built
|
||||
# synchronously (e.g. tests of the build race) passes ``eager_build: true``
|
||||
# to fall through to the eager path below. Distinct from the lazy/watch
|
||||
# branch above: a normal resume restores the full ancestor history and the
|
||||
# session's persisted runtime identity, and is a real (upgradable) session,
|
||||
# not a never-built spectator window.
|
||||
if is_truthy_value(params.get("defer_build", False)):
|
||||
# session's persisted runtime identity, and is a real (upgradable) session.
|
||||
if not is_truthy_value(params.get("eager_build", False)):
|
||||
sid = uuid.uuid4().hex[:8]
|
||||
lease, limit_message = _claim_active_session_slot(target, live_session_id=sid)
|
||||
if limit_message is not None:
|
||||
return _err(rid, 4090, limit_message)
|
||||
# Interactive resume routes approvals/clarify through gateway prompts;
|
||||
# the deferred build wires the remaining per-session callbacks.
|
||||
_enable_gateway_prompts()
|
||||
try:
|
||||
db.reopen_session(target)
|
||||
history = db.get_messages_as_conversation(target)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue