mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
perf(tui): defer agent construction until first prompt
Match classic CLI perceived startup behavior: show the TUI shell and composer before constructing the full AIAgent. session.create now returns a lightweight placeholder session with lazy=true and no longer starts _make_agent eagerly. The first method that needs the agent triggers _start_agent_build() via _sess(); prompt.submit is routed through the RPC worker pool so that the initial wait for agent construction does not block the stdio dispatcher. The intro panel renders skeleton rows for tools/skills while the real session.info payload is absent, then hydrates to the real tools/skills panel once AIAgent initialization completes. Also skip the startup /voice status probe and avoid the input.detect_drop RPC for ordinary plain-text prompts to keep early startup/first-submit paths cheap. Measurements on macOS Terminal.app: - Previous full ready p50 after earlier PR commits: ~1537ms - Lazy skeleton panel p50: ~794ms - Original baseline full ready p50: ~1843ms So the visible startup surface is now ~743ms faster than the prior PR state and ~1.05s faster than the original baseline. First prompt still pays the same agent construction cost if it races the background/skeleton state, matching classic CLI's deferred behavior. Tests: - python -m py_compile tui_gateway/server.py - cd ui-tui && npm run type-check && npm run build - scripts/run_tests.sh tests/tui_gateway/test_protocol.py::test_sess_found tests/tools/test_code_execution_modes.py tests/tools/test_code_execution.py - cd ui-tui && npm test -- --run src/__tests__/useSessionLifecycle.test.ts src/__tests__/useConfigSync.test.ts
This commit is contained in:
parent
9e398e1809
commit
b66cbb7b4c
6 changed files with 148 additions and 136 deletions
|
|
@ -141,6 +141,7 @@ _SLASH_WORKER_TIMEOUT_S = max(
|
|||
_LONG_HANDLERS = frozenset(
|
||||
{
|
||||
"cli.exec",
|
||||
"prompt.submit",
|
||||
"session.branch",
|
||||
"session.resume",
|
||||
"shell.exec",
|
||||
|
|
@ -464,6 +465,117 @@ def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
|
|||
return _err(rid, 5032, err) if err else None
|
||||
|
||||
|
||||
def _start_agent_build(sid: str, session: dict) -> None:
|
||||
"""Start building the real AIAgent for a TUI session, once.
|
||||
|
||||
Classic `hermes` shows the prompt before constructing AIAgent; the TUI used
|
||||
to eagerly build it during session.create, making startup feel blocked on
|
||||
tool discovery/model metadata even though the composer was visible. Keep
|
||||
the shell responsive by deferring this work until the first prompt (or any
|
||||
command that actually needs the agent), while retaining the same ready/error
|
||||
event contract for the frontend.
|
||||
"""
|
||||
ready = session.get("agent_ready")
|
||||
if ready is None:
|
||||
return
|
||||
if ready.is_set() or session.get("agent_build_started"):
|
||||
return
|
||||
session["agent_build_started"] = True
|
||||
key = session["session_key"]
|
||||
|
||||
def _build() -> None:
|
||||
current = _sessions.get(sid)
|
||||
if current is None:
|
||||
ready.set()
|
||||
return
|
||||
|
||||
worker = None
|
||||
notify_registered = False
|
||||
try:
|
||||
tokens = _set_session_context(key)
|
||||
try:
|
||||
agent = _make_agent(sid, key)
|
||||
finally:
|
||||
_clear_session_context(tokens)
|
||||
|
||||
db = _get_db()
|
||||
if db is not None:
|
||||
db.create_session(key, source="tui", model=_resolve_model())
|
||||
pending_title = (current.get("pending_title") or "").strip()
|
||||
if pending_title:
|
||||
try:
|
||||
title_applied = db.set_session_title(key, pending_title)
|
||||
if title_applied:
|
||||
current["pending_title"] = None
|
||||
else:
|
||||
existing_row = db.get_session(key)
|
||||
existing_title = ((existing_row or {}).get("title") or "").strip()
|
||||
if existing_title == pending_title:
|
||||
current["pending_title"] = None
|
||||
else:
|
||||
logger.info(
|
||||
"Pending title still queued for session %s (wanted=%r, current=%r)",
|
||||
sid,
|
||||
pending_title,
|
||||
existing_title,
|
||||
)
|
||||
except ValueError as e:
|
||||
current["pending_title"] = None
|
||||
logger.info("Dropping pending title for session %s: %s", sid, e)
|
||||
except Exception:
|
||||
logger.warning("Failed to apply pending title for session %s", sid, exc_info=True)
|
||||
current["agent"] = agent
|
||||
|
||||
try:
|
||||
worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
|
||||
current["slash_worker"] = worker
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from tools.approval import (
|
||||
register_gateway_notify,
|
||||
load_permanent_allowlist,
|
||||
)
|
||||
register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
|
||||
notify_registered = True
|
||||
load_permanent_allowlist()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_wire_callbacks(sid)
|
||||
_notify_session_boundary("on_session_reset", key)
|
||||
|
||||
info = _session_info(agent)
|
||||
warn = _probe_credentials(agent)
|
||||
if warn:
|
||||
info["credential_warning"] = warn
|
||||
cfg_warn = _probe_config_health(_load_cfg())
|
||||
if cfg_warn:
|
||||
info["config_warning"] = cfg_warn
|
||||
logger.warning(cfg_warn)
|
||||
_emit("session.info", sid, info)
|
||||
except Exception as e:
|
||||
current["agent_error"] = str(e)
|
||||
_emit("error", sid, {"message": f"agent init failed: {e}"})
|
||||
finally:
|
||||
if _sessions.get(sid) is not current:
|
||||
if worker is not None:
|
||||
try:
|
||||
worker.close()
|
||||
except Exception:
|
||||
pass
|
||||
if notify_registered:
|
||||
try:
|
||||
from tools.approval import unregister_gateway_notify
|
||||
unregister_gateway_notify(key)
|
||||
except Exception:
|
||||
pass
|
||||
ready.set()
|
||||
|
||||
threading.Thread(target=_build, daemon=True).start()
|
||||
|
||||
|
||||
def _sess_nowait(params, rid):
|
||||
s = _sessions.get(params.get("session_id") or "")
|
||||
return (s, None) if s else (None, _err(rid, 4001, "session not found"))
|
||||
|
|
@ -471,7 +583,10 @@ def _sess_nowait(params, rid):
|
|||
|
||||
def _sess(params, rid):
|
||||
s, err = _sess_nowait(params, rid)
|
||||
return (None, err) if err else (s, _wait_agent(s, rid))
|
||||
if err:
|
||||
return (None, err)
|
||||
_start_agent_build(params.get("session_id") or "", s)
|
||||
return (s, _wait_agent(s, rid))
|
||||
|
||||
|
||||
def _normalize_completion_path(path_part: str) -> str:
|
||||
|
|
@ -1611,130 +1726,6 @@ def _(rid, params: dict) -> dict:
|
|||
"transport": current_transport() or _stdio_transport,
|
||||
}
|
||||
|
||||
def _build() -> None:
|
||||
session = _sessions.get(sid)
|
||||
if session is None:
|
||||
# session.close ran before the build thread got scheduled.
|
||||
ready.set()
|
||||
return
|
||||
|
||||
# Track what we allocate so we can clean up if session.close
|
||||
# races us to the finish line. session.close pops _sessions[sid]
|
||||
# unconditionally and tries to close the slash_worker it finds;
|
||||
# if _build is still mid-construction when close runs, close
|
||||
# finds slash_worker=None / notify unregistered and returns
|
||||
# cleanly — leaving us, the build thread, to later install the
|
||||
# worker + notify on an orphaned session dict. The finally
|
||||
# block below detects the orphan and cleans up instead of
|
||||
# leaking a subprocess and a global notify registration.
|
||||
worker = None
|
||||
notify_registered = False
|
||||
try:
|
||||
tokens = _set_session_context(key)
|
||||
try:
|
||||
agent = _make_agent(sid, key)
|
||||
finally:
|
||||
_clear_session_context(tokens)
|
||||
|
||||
db = _get_db()
|
||||
if db is not None:
|
||||
db.create_session(key, source="tui", model=_resolve_model())
|
||||
pending_title = (session.get("pending_title") or "").strip()
|
||||
if pending_title:
|
||||
try:
|
||||
title_applied = db.set_session_title(key, pending_title)
|
||||
if title_applied:
|
||||
session["pending_title"] = None
|
||||
else:
|
||||
existing_row = db.get_session(key)
|
||||
existing_title = (
|
||||
(existing_row or {}).get("title") or ""
|
||||
).strip()
|
||||
if existing_title == pending_title:
|
||||
session["pending_title"] = None
|
||||
else:
|
||||
logger.info(
|
||||
"Pending title still queued for session %s (wanted=%r, current=%r)",
|
||||
sid,
|
||||
pending_title,
|
||||
existing_title,
|
||||
)
|
||||
except ValueError as e:
|
||||
# Queued title can become invalid/duplicate between queue time
|
||||
# and DB row creation. Drop the queue and log the reason so
|
||||
# future /title reads don't surface a stuck pending value.
|
||||
session["pending_title"] = None
|
||||
logger.info(
|
||||
"Dropping pending title for session %s: %s",
|
||||
sid,
|
||||
e,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to apply pending title for session %s",
|
||||
sid,
|
||||
exc_info=True,
|
||||
)
|
||||
session["agent"] = agent
|
||||
|
||||
try:
|
||||
worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
|
||||
session["slash_worker"] = worker
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from tools.approval import (
|
||||
register_gateway_notify,
|
||||
load_permanent_allowlist,
|
||||
)
|
||||
|
||||
register_gateway_notify(
|
||||
key, lambda data: _emit("approval.request", sid, data)
|
||||
)
|
||||
notify_registered = True
|
||||
load_permanent_allowlist()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_wire_callbacks(sid)
|
||||
_notify_session_boundary("on_session_reset", key)
|
||||
|
||||
info = _session_info(agent)
|
||||
warn = _probe_credentials(agent)
|
||||
if warn:
|
||||
info["credential_warning"] = warn
|
||||
cfg_warn = _probe_config_health(_load_cfg())
|
||||
if cfg_warn:
|
||||
info["config_warning"] = cfg_warn
|
||||
logger.warning(cfg_warn)
|
||||
_emit("session.info", sid, info)
|
||||
except Exception as e:
|
||||
session["agent_error"] = str(e)
|
||||
_emit("error", sid, {"message": f"agent init failed: {e}"})
|
||||
finally:
|
||||
# Orphan check: if session.close raced us and popped
|
||||
# _sessions[sid] while we were building, the dict we just
|
||||
# populated is unreachable. Clean up the subprocess and
|
||||
# the global notify registration ourselves — session.close
|
||||
# couldn't see them at the time it ran.
|
||||
if _sessions.get(sid) is not session:
|
||||
if worker is not None:
|
||||
try:
|
||||
worker.close()
|
||||
except Exception:
|
||||
pass
|
||||
if notify_registered:
|
||||
try:
|
||||
from tools.approval import unregister_gateway_notify
|
||||
|
||||
unregister_gateway_notify(key)
|
||||
except Exception:
|
||||
pass
|
||||
ready.set()
|
||||
|
||||
threading.Thread(target=_build, daemon=True).start()
|
||||
|
||||
return _ok(
|
||||
rid,
|
||||
{
|
||||
|
|
@ -1744,6 +1735,7 @@ def _(rid, params: dict) -> dict:
|
|||
"tools": {},
|
||||
"skills": {},
|
||||
"cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
|
||||
"lazy": True,
|
||||
},
|
||||
},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -5,8 +5,7 @@ import type { GatewayClient } from '../gatewayClient.js'
|
|||
import type {
|
||||
ConfigFullResponse,
|
||||
ConfigMtimeResponse,
|
||||
ReloadMcpResponse,
|
||||
VoiceToggleResponse
|
||||
ReloadMcpResponse
|
||||
} from '../gatewayTypes.js'
|
||||
import { asRpcResult } from '../lib/rpc.js'
|
||||
|
||||
|
|
@ -105,7 +104,11 @@ export function useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid }: U
|
|||
return
|
||||
}
|
||||
|
||||
quietRpc<VoiceToggleResponse>(gw, 'voice.toggle', { action: 'status' }).then(r => setVoiceEnabled(!!r?.enabled))
|
||||
// Keep startup cheap: voice.toggle status probes optional audio/STT deps and
|
||||
// can run long enough to delay prompt.submit on the single stdio RPC pipe.
|
||||
// Environment flags are enough to initialize the UI bit; the heavier status
|
||||
// check still runs when the user opens /voice.
|
||||
setVoiceEnabled(process.env.HERMES_VOICE === '1')
|
||||
quietRpc<ConfigMtimeResponse>(gw, 'config.get', { key: 'mtime' }).then(r => {
|
||||
mtimeRef.current = Number(r?.mtime ?? 0)
|
||||
})
|
||||
|
|
|
|||
|
|
@ -126,6 +126,13 @@ export function useSubmission(opts: UseSubmissionOptions) {
|
|||
return sys('session not ready yet')
|
||||
}
|
||||
|
||||
// Plain prompts are the common path and should not pay an extra RPC
|
||||
// before prompt.submit. File-drop detection can still run for inputs
|
||||
// that contain an absolute/tilde path or file:// URI.
|
||||
if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\/)[^\s]+/.test(text)) {
|
||||
return startSubmit(text, expand(text), showUserMessage)
|
||||
}
|
||||
|
||||
gw.request<InputDetectDropResponse>('input.detect_drop', { session_id: sid, text })
|
||||
.then(r => {
|
||||
if (!r?.matched) {
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ const TranscriptPane = memo(function TranscriptPane({
|
|||
<Box flexDirection="column" paddingTop={1}>
|
||||
<Banner t={ui.theme} />
|
||||
|
||||
{row.msg.info?.version && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
|
||||
{row.msg.info && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
|
||||
</Box>
|
||||
) : row.msg.kind === 'panel' && row.msg.panelData ? (
|
||||
<Panel sections={row.msg.panelData.sections} t={ui.theme} title={row.msg.panelData.title} />
|
||||
|
|
|
|||
|
|
@ -64,9 +64,11 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
|
|||
}
|
||||
|
||||
const section = (title: string, data: Record<string, string[]>, max = 8, overflowLabel = 'more…') => {
|
||||
const skeletonRows = title === 'Tools' ? ['browser', 'terminal', 'file'] : ['apple', 'creative', 'software-development']
|
||||
const entries = Object.entries(data).sort()
|
||||
const shown = entries.slice(0, max)
|
||||
const overflow = entries.length - max
|
||||
const skeleton = info.lazy && entries.length === 0
|
||||
|
||||
return (
|
||||
<Box flexDirection="column" marginTop={1}>
|
||||
|
|
@ -74,7 +76,14 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
|
|||
Available {title}
|
||||
</Text>
|
||||
|
||||
{shown.map(([k, vs]) => (
|
||||
{skeleton
|
||||
? skeletonRows.map(k => (
|
||||
<Text dimColor key={k} wrap="truncate">
|
||||
<Text color={t.color.muted}>{k}: </Text>
|
||||
<Text color={t.color.text}>━━━━━━━━━━━━━━</Text>
|
||||
</Text>
|
||||
))
|
||||
: shown.map(([k, vs]) => (
|
||||
<Text key={k} wrap="truncate">
|
||||
<Text color={t.color.muted}>{strip(k)}: </Text>
|
||||
<Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
|
||||
|
|
|
|||
|
|
@ -143,11 +143,12 @@ export interface McpServerStatus {
|
|||
export interface SessionInfo {
|
||||
cwd?: string
|
||||
fast?: boolean
|
||||
lazy?: boolean
|
||||
mcp_servers?: McpServerStatus[]
|
||||
model: string
|
||||
reasoning_effort?: string
|
||||
service_tier?: string
|
||||
release_date?: string
|
||||
service_tier?: string
|
||||
skills: Record<string, string[]>
|
||||
tools: Record<string, string[]>
|
||||
update_behind?: number | null
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue