feat(tui): subagent spawn observability overlay

Adds a live + post-hoc audit surface for recursive delegate_task fan-out.
None of cc/oc/oclaw tackle nested subagent trees inside an Ink overlay;
this ships a view-switched dashboard that handles arbitrary depth + width.

Python
- delegate_tool: every subagent event now carries subagent_id, parent_id,
  depth, model, tool_count; subagent.complete also ships input/output/
  reasoning tokens, cost, api_calls, files_read/files_written, and a
  tail of tool-call outputs
- delegate_tool: new subagent.spawn_requested event + _active_subagents
  registry so the overlay can kill a branch by id and pause new spawns
- tui_gateway: new RPCs delegation.status, delegation.pause,
  subagent.interrupt, spawn_tree.save/list/load (disk under
  \$HERMES_HOME/spawn-trees/<session>/<ts>.json)

TUI
- /agents overlay: full-width list mode (gantt strip + row picker) and
  Enter-to-drill full-width scrollable detail mode; inverse+amber
  selection, heat-coloured branch markers, wall-clock gantt with tick
  ruler, per-branch rollups
- Detail pane: collapsible accordions (Budget, Files, Tool calls, Output,
  Progress, Summary); open-state persists across agents + mode switches
  via a shared atom
- /replay [N|last|list|load <path>] for in-memory + disk history;
  /replay-diff <a> <b> for side-by-side tree comparison
- Status-bar SpawnHud warns as depth/concurrency approaches caps;
  overlay auto-follows the just-finished turn onto history[1]
- Theme: bump DARK dim #B8860B → #CC9B1F for readable secondary text
  globally; keep LIGHT untouched

Tests: +29 new subagentTree unit tests; 215/215 passing.
This commit is contained in:
Brooklyn Nicholson 2026-04-22 10:38:17 -05:00
parent ba7e8b0df9
commit 7785654ad5
19 changed files with 4329 additions and 426 deletions

View file

@ -1,11 +1,17 @@
import { STREAM_BATCH_MS } from '../config/timing.js'
import { buildSetupRequiredSections, SETUP_REQUIRED_TITLE } from '../content/setup.js'
import type { CommandsCatalogResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js'
import type {
CommandsCatalogResponse,
DelegationStatusResponse,
GatewayEvent,
GatewaySkin
} from '../gatewayTypes.js'
import { rpcErrorMessage } from '../lib/rpc.js'
import { formatToolCall, stripAnsi } from '../lib/text.js'
import { fromSkin } from '../theme.js'
import type { Msg, SubagentProgress } from '../types.js'
import { applyDelegationStatus, getDelegationState } from './delegationStore.js'
import type { GatewayEventHandlerContext } from './interfaces.js'
import { patchOverlayState } from './overlayStore.js'
import { turnController } from './turnController.js'
@ -53,6 +59,54 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
let pendingThinkingStatus = ''
let thinkingStatusTimer: null | ReturnType<typeof setTimeout> = null
// Inject the disk-save callback into turnController so recordMessageComplete
// can fire-and-forget a persist without having to plumb a gateway ref around.
turnController.persistSpawnTree = async (subagents, sessionId) => {
try {
const startedAt = subagents.reduce<number>((min, s) => {
if (!s.startedAt) {
return min
}
return min === 0 ? s.startedAt : Math.min(min, s.startedAt)
}, 0)
const top = subagents.filter(s => !s.parentId).slice(0, 2)
const label = top.length
? top.map(s => s.goal).filter(Boolean).slice(0, 2).join(' · ')
: `${subagents.length} subagents`
await rpc('spawn_tree.save', {
finished_at: Date.now() / 1000,
label: label.slice(0, 120),
session_id: sessionId ?? 'default',
started_at: startedAt ? startedAt / 1000 : null,
subagents
})
} catch {
// Persistence is best-effort; in-memory history is the authoritative
// same-session source. A write failure doesn't block the turn.
}
}
// Refresh delegation caps at most every 5s so the status bar HUD can
// render a /warning close to the configured cap without spamming the RPC.
let lastDelegationFetchAt = 0
const refreshDelegationStatus = (force = false) => {
const now = Date.now()
if (!force && now - lastDelegationFetchAt < 5000) {
return
}
lastDelegationFetchAt = now
rpc<DelegationStatusResponse>('delegation.status', {})
.then(r => applyDelegationStatus(r))
.catch(() => {})
}
const setStatus = (status: string) => {
pendingThinkingStatus = ''
@ -329,8 +383,27 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
return
case 'subagent.spawn_requested':
// Child built but not yet running (waiting on ThreadPoolExecutor slot).
// Preserve completed state if a later event races in before this one.
turnController.upsertSubagent(ev.payload, c =>
c.status === 'completed' ? {} : { status: 'queued' }
)
// Prime the status-bar HUD: fetch caps (once every 5s) so we can
// warn as depth/concurrency approaches the configured ceiling.
if (getDelegationState().maxSpawnDepth === null) {
refreshDelegationStatus(true)
} else {
refreshDelegationStatus()
}
return
case 'subagent.start':
turnController.upsertSubagent(ev.payload, () => ({ status: 'running' }))
turnController.upsertSubagent(ev.payload, c =>
c.status === 'completed' ? {} : { status: 'running' }
)
return
case 'subagent.thinking': {