fix(tui): surface verbose tool details (#30225)
Some checks failed
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Docker Build and Publish / move-latest (push) Blocked by required conditions
Lint (ruff + ty) / ruff + ty diff (push) Waiting to run
Lint (ruff + ty) / ruff enforcement (blocking) (push) Waiting to run
Lint (ruff + ty) / Windows footguns (blocking) (push) Waiting to run
Nix Lockfile Fix / auto-fix-main (push) Waiting to run
Nix Lockfile Fix / fix (push) Waiting to run
Nix / nix (macos-latest) (push) Waiting to run
Nix / nix (ubuntu-latest) (push) Waiting to run
Tests / test (push) Waiting to run
Tests / e2e (push) Waiting to run
Deploy Site / deploy-vercel (push) Has been cancelled
Deploy Site / deploy-docs (push) Has been cancelled
OSV-Scanner / Scan lockfiles (push) Has been cancelled
uv.lock check / uv lock --check (push) Has been cancelled

* fix(tui): surface verbose tool details

Emit redacted structured verbose args/results to the TUI so /verbose verbose can show full tool detail without reopening stdout, and fail closed if redaction is unavailable.

Salvages #29011.

Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com>

* fix(tui): address verbose detail review

Label verbose tool failures as errors, cover forced verbose reasoning, and avoid new diff type warnings from the redaction regression tests.

* fix(tui): bound verbose tool payloads

Cap verbose tool detail text before emitting JSON-RPC events and preserve verbose results on inline diff completions.

* fix(tui): align termux argv test with gc flag

Update the stale TUI launch expectation so the Termux freshness path matches the current direct Node argv.

---------

Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com>
This commit is contained in:
brooklyn! 2026-05-22 00:16:52 -05:00 committed by GitHub
parent 4e2c66a098
commit 1264fab156
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 306 additions and 38 deletions

View file

@ -168,7 +168,7 @@ def test_make_tui_argv_skips_build_only_on_termux_when_fresh(
argv, cwd = main_mod._make_tui_argv(tmp_path, tui_dev=False)
assert argv == ["/bin/node", str(tmp_path / "dist" / "entry.js")]
assert argv == ["/bin/node", "--expose-gc", str(tmp_path / "dist" / "entry.js")]
assert cwd == tmp_path

View file

@ -59,6 +59,59 @@ def test_write_json_returns_false_on_broken_pipe(monkeypatch):
assert server.write_json({"ok": True}) is False
def test_tui_verbose_tool_details_fail_closed_when_redaction_fails(monkeypatch):
redact_module = types.ModuleType("agent.redact")
def fail_redaction(*_args, **_kwargs):
raise RuntimeError("redaction unavailable")
setattr(redact_module, "redact_sensitive_text", fail_redaction)
monkeypatch.setitem(sys.modules, "agent.redact", redact_module)
assert server._redact_tui_verbose_text("api_key=secret") == ""
assert server._tool_args_text({"api_key": "secret"}) == ""
assert server._tool_result_text("token=secret") == ""
def test_tui_verbose_tool_details_are_capped_before_emit(monkeypatch):
monkeypatch.setattr(server, "_TUI_VERBOSE_TEXT_MAX_CHARS", 12)
monkeypatch.setattr(server, "_TUI_VERBOSE_TEXT_MAX_LINES", 2)
capped = server._cap_tui_verbose_text("one\ntwo\nthree\nfour")
assert capped.startswith("[showing verbose tail; omitted ")
assert capped.endswith("three\nfour")
assert "one" not in capped
def test_tui_verbose_tool_events_omit_details_when_redaction_fails(monkeypatch):
redact_module = types.ModuleType("agent.redact")
def fail_redaction(*_args, **_kwargs):
raise RuntimeError("redaction unavailable")
setattr(redact_module, "redact_sensitive_text", fail_redaction)
monkeypatch.setitem(sys.modules, "agent.redact", redact_module)
events: list[tuple[str, str, dict]] = []
monkeypatch.setattr(
server, "_emit", lambda event_type, sid, payload: events.append((event_type, sid, payload))
)
monkeypatch.setitem(
server._sessions,
"redaction-test",
{"tool_progress_mode": "verbose", "tool_started_at": {}},
)
server._on_tool_start("redaction-test", "tool-1", "terminal", {"command": "pwd"})
server._on_tool_complete("redaction-test", "tool-1", "terminal", {"command": "pwd"}, "done")
assert events[0][0] == "tool.start"
assert events[1][0] == "tool.complete"
assert "args_text" not in events[0][2]
assert "result_text" not in events[1][2]
def test_dispatch_rejects_non_object_request():
resp = server.dispatch([])

View file

@ -1061,6 +1061,10 @@ def _session_tool_progress_mode(sid: str) -> str:
return str(_sessions.get(sid, {}).get("tool_progress_mode", "all") or "all")
def _session_verbose(sid: str) -> bool:
return _session_tool_progress_mode(sid) == "verbose"
def _tool_progress_enabled(sid: str) -> bool:
return _session_tool_progress_mode(sid) != "off"
@ -1492,6 +1496,74 @@ def _tool_ctx(name: str, args: dict) -> str:
return ""
_TUI_VERBOSE_TEXT_MAX_CHARS = 16_000
_TUI_VERBOSE_TEXT_MAX_LINES = 240
def _cap_tui_verbose_text(text: str) -> str:
if (
len(text) <= _TUI_VERBOSE_TEXT_MAX_CHARS
and text.count("\n") < _TUI_VERBOSE_TEXT_MAX_LINES
):
return text
idx = len(text)
start = 0
for _ in range(_TUI_VERBOSE_TEXT_MAX_LINES):
idx = text.rfind("\n", 0, idx)
if idx < 0:
start = 0
break
start = idx + 1
line_start = start
start = max(line_start, len(text) - _TUI_VERBOSE_TEXT_MAX_CHARS)
if start > line_start:
next_break = text.find("\n", start)
if 0 <= next_break < len(text) - 1:
start = next_break + 1
tail = text[start:].lstrip()
omitted_chars = max(0, len(text) - len(tail))
omitted_lines = text[:start].count("\n")
if omitted_lines:
label = (
"[showing verbose tail; omitted "
f"{omitted_lines} lines / {omitted_chars} chars]\n"
)
else:
label = f"[showing verbose tail; omitted {omitted_chars} chars]\n"
return f"{label}{tail}"
def _redact_tui_verbose_text(text: str) -> str:
try:
from agent.redact import redact_sensitive_text
redacted = redact_sensitive_text(str(text), force=True)
except Exception:
return ""
return _cap_tui_verbose_text(redacted)
def _tool_args_text(args: dict) -> str:
try:
raw = json.dumps(args or {}, indent=2, ensure_ascii=False, default=str)
except Exception:
raw = str(args or {})
return _redact_tui_verbose_text(raw)
def _tool_result_text(result: object) -> str:
try:
from agent.tool_dispatch_helpers import _multimodal_text_summary
raw = _multimodal_text_summary(result)
except Exception:
raw = str(result)
return _redact_tui_verbose_text(raw)
def _fmt_tool_duration(seconds: float | None) -> str:
if seconds is None:
return ""
@ -1553,13 +1625,18 @@ def _on_tool_start(sid: str, tool_call_id: str, name: str, args: dict):
pass
session.setdefault("tool_started_at", {})[tool_call_id] = time.time()
if _tool_progress_enabled(sid):
payload = {
"tool_id": tool_call_id,
"name": name,
"context": _tool_ctx(name, args),
}
if _session_verbose(sid):
args_text = _tool_args_text(args)
if args_text:
payload["args_text"] = args_text
# tool.complete is the source of truth for todos (full list from the
# tool result). args.todos here may be a partial merge update.
_emit(
"tool.start",
sid,
{"tool_id": tool_call_id, "name": name, "context": _tool_ctx(name, args)},
)
_emit("tool.start", sid, payload)
def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result: str):
@ -1576,6 +1653,10 @@ def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result
summary = _tool_summary(name, result, duration_s)
if summary:
payload["summary"] = summary
if _session_verbose(sid):
result_text = _tool_result_text(result)
if result_text:
payload["result_text"] = result_text
if name == "todo":
try:
data = json.loads(result)
@ -1615,7 +1696,10 @@ def _on_tool_progress(
_emit("tool.progress", sid, {"name": name, "preview": preview or ""})
return
if event_type == "reasoning.available" and preview:
_emit("reasoning.available", sid, {"text": str(preview)})
payload: dict[str, object] = {"text": str(preview)}
if _session_verbose(sid):
payload["verbose"] = True
_emit("reasoning.available", sid, payload)
return
if event_type.startswith("subagent."):
payload = {
@ -1691,7 +1775,11 @@ def _agent_cbs(sid: str) -> dict:
"tool_gen_callback": lambda name: _tool_progress_enabled(sid)
and _emit("tool.generating", sid, {"name": name}),
"thinking_callback": lambda text: _emit("thinking.delta", sid, {"text": text}),
"reasoning_callback": lambda text: _emit("reasoning.delta", sid, {"text": text}),
"reasoning_callback": lambda text: _emit(
"reasoning.delta",
sid,
{"text": text, **({"verbose": True} if _session_verbose(sid) else {})},
),
"status_callback": lambda kind, text=None: _status_update(
sid, str(kind), None if text is None else str(text)
),

View file

@ -342,6 +342,25 @@ describe('createGatewayEventHandler', () => {
expect(appended[appended.length - 1]).toMatchObject({ role: 'assistant', text: 'final answer' })
})
it('shows verbose reasoning even when normal reasoning display is off', () => {
vi.useFakeTimers()
patchUiState({ showReasoning: false })
const appended: Msg[] = []
const streamed = 'verbose-only reasoning'
try {
const onEvent = createGatewayEventHandler(buildCtx(appended))
onEvent({ payload: { text: streamed, verbose: true }, type: 'reasoning.delta' } as any)
vi.runOnlyPendingTimers()
expect(turnController.reasoningText).toBe(streamed)
expect(getTurnState().reasoning).toBe(streamed)
} finally {
vi.useRealTimers()
}
})
it('ignores fallback reasoning.available when streamed reasoning already exists', () => {
const appended: Msg[] = []
const streamed = 'short streamed reasoning'
@ -485,6 +504,25 @@ describe('createGatewayEventHandler', () => {
expect(appended[3]?.text).not.toContain('```diff')
})
it('keeps verbose result text on inline_diff tool completions', () => {
const appended: Msg[] = []
const onEvent = createGatewayEventHandler(buildCtx(appended))
const diff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
onEvent({
payload: { args_text: '{ "path": "foo.ts" }', context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
type: 'tool.start'
} as any)
onEvent({
payload: { inline_diff: diff, result_text: 'patched result', tool_id: 'tool-1' },
type: 'tool.complete'
} as any)
expect(turnController.segmentMessages[0]).toMatchObject({ kind: 'diff' })
expect(turnController.segmentMessages[0]?.tools?.[0]).toContain('Args:\n{ "path": "foo.ts" }')
expect(turnController.segmentMessages[0]?.tools?.[0]).toContain('Result:\npatched result')
})
it('keeps full final responses from duplicating flushed pre-diff narration', () => {
const appended: Msg[] = []
const onEvent = createGatewayEventHandler(buildCtx(appended))

View file

@ -3,6 +3,7 @@ import { describe, expect, it } from 'vitest'
import {
boundedLiveRenderText,
buildToolTrailLine,
buildVerboseToolTrailLine,
edgePreview,
estimateRows,
estimateTokensRough,
@ -12,8 +13,8 @@ import {
lastCotTrailIndex,
parseToolTrailResultLine,
pasteTokenLabel,
sanitizeAnsiForRender,
sameToolTrailGroup,
sanitizeAnsiForRender,
splitToolDuration,
stripAnsi,
thinkingPreview
@ -37,6 +38,39 @@ describe('buildToolTrailLine', () => {
})
})
describe('buildVerboseToolTrailLine', () => {
it('preserves multiline args and result details', () => {
const line = buildVerboseToolTrailLine(
'terminal',
'npm test',
false,
1.25,
'{\n "cmd": "npm test"\n}',
'first line\nsecond :: line'
)
expect(line).toContain('Args:\n{')
expect(line).toContain('Result:\nfirst line\nsecond :: line')
expect(parseToolTrailResultLine(line)).toEqual({
call: 'Terminal("npm test") (1.3s)',
detail: 'Args:\n{\n "cmd": "npm test"\n}\nResult:\nfirst line\nsecond :: line',
mark: '✓'
})
})
it('labels verbose failures as errors', () => {
const line = buildVerboseToolTrailLine('terminal', 'npm test', true, 0.5, undefined, 'command failed')
expect(line).toContain('Error:\ncommand failed')
expect(line).not.toContain('Result:\ncommand failed')
expect(parseToolTrailResultLine(line)).toEqual({
call: 'Terminal("npm test") (0.5s)',
detail: 'Error:\ncommand failed',
mark: '✗'
})
})
})
describe('lastCotTrailIndex', () => {
it('finds last non-result line', () => {
expect(lastCotTrailIndex(['a ✓', 'thinking…'])).toBe(1)

View file

@ -491,13 +491,13 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
case 'reasoning.delta':
if (ev.payload?.text) {
turnController.recordReasoningDelta(ev.payload.text)
turnController.recordReasoningDelta(ev.payload.text, Boolean(ev.payload.verbose))
}
return
case 'reasoning.available':
turnController.recordReasoningAvailable(String(ev.payload?.text ?? ''))
turnController.recordReasoningAvailable(String(ev.payload?.text ?? ''), Boolean(ev.payload?.verbose))
return
@ -517,12 +517,18 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
case 'tool.start':
turnController.recordTodos(ev.payload.todos)
turnController.recordToolStart(ev.payload.tool_id, ev.payload.name ?? 'tool', ev.payload.context ?? '')
turnController.recordToolStart(
ev.payload.tool_id,
ev.payload.name ?? 'tool',
ev.payload.context ?? '',
ev.payload.args_text ? stripAnsi(String(ev.payload.args_text)) : undefined
)
return
case 'tool.complete': {
const inlineDiffText =
ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''
const resultText = ev.payload.result_text ? stripAnsi(String(ev.payload.result_text)) : undefined
if (inlineDiffText) {
turnController.recordInlineDiffToolComplete(
@ -530,7 +536,8 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
ev.payload.tool_id,
ev.payload.name,
ev.payload.error,
ev.payload.duration_s
ev.payload.duration_s,
resultText
)
} else {
turnController.recordToolComplete(
@ -539,7 +546,8 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
ev.payload.error,
ev.payload.summary,
ev.payload.duration_s,
ev.payload.todos
ev.payload.todos,
resultText
)
}

View file

@ -11,6 +11,7 @@ import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js'
import {
boundedLiveRenderText,
buildToolTrailLine,
buildVerboseToolTrailLine,
estimateTokensRough,
isTransientTrailLine,
sameToolTrailGroup,
@ -542,8 +543,8 @@ class TurnController {
}
}
recordReasoningAvailable(text: string) {
if (this.interrupted || !getUiState().showReasoning) {
recordReasoningAvailable(text: string, force = false) {
if (this.interrupted || (!force && !getUiState().showReasoning)) {
return
}
@ -560,8 +561,8 @@ class TurnController {
this.pulseReasoningStreaming()
}
recordReasoningDelta(text: string) {
if (this.interrupted || !getUiState().showReasoning) {
recordReasoningDelta(text: string, force = false) {
if (this.interrupted || (!force && !getUiState().showReasoning)) {
return
}
@ -587,14 +588,15 @@ class TurnController {
error?: string,
summary?: string,
duration?: number,
todos?: unknown
todos?: unknown,
resultText?: string
) {
if (this.interrupted) {
return
}
this.recordTodos(todos)
const line = this.completeTool(toolId, fallbackName, error, summary, duration)
const line = this.completeTool(toolId, fallbackName, error, summary, duration, resultText)
this.pendingSegmentTools = [...this.pendingSegmentTools, line]
this.flushPendingToolsIntoLastSegment()
@ -606,30 +608,42 @@ class TurnController {
toolId: string,
fallbackName?: string,
error?: string,
duration?: number
duration?: number,
resultText?: string
) {
if (this.interrupted) {
return
}
this.flushStreamingSegment()
this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration)])
this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration, resultText)])
this.publishToolState()
}
private completeTool(toolId: string, fallbackName?: string, error?: string, summary?: string, duration?: number) {
private completeTool(
toolId: string,
fallbackName?: string,
error?: string,
summary?: string,
duration?: number,
resultText?: string
) {
const done = this.activeTools.find(tool => tool.id === toolId)
const name = done?.name ?? fallbackName ?? 'tool'
const label = toolTrailLabel(name)
const fallbackDuration = done?.startedAt ? (Date.now() - done.startedAt) / 1000 : undefined
const line = buildToolTrailLine(
name,
done?.context || '',
Boolean(error),
error || summary || '',
duration ?? fallbackDuration
)
const line =
done?.verboseArgs || resultText
? buildVerboseToolTrailLine(
name,
done?.context || '',
Boolean(error),
duration ?? fallbackDuration,
done?.verboseArgs,
error || resultText || summary || ''
)
: buildToolTrailLine(name, done?.context || '', Boolean(error), error || summary || '', duration ?? fallbackDuration)
this.activeTools = this.activeTools.filter(tool => tool.id !== toolId)
@ -675,7 +689,7 @@ class TurnController {
}, STREAM_BATCH_MS)
}
recordToolStart(toolId: string, name: string, context: string) {
recordToolStart(toolId: string, name: string, context: string, verboseArgs?: string) {
if (this.interrupted) {
return
}
@ -688,7 +702,7 @@ class TurnController {
const sample = `${name} ${context}`.trim()
this.toolTokenAcc += sample ? estimateTokensRough(sample) : 0
this.activeTools = [...this.activeTools, { context, id: toolId, name, startedAt: Date.now() }]
this.activeTools = [...this.activeTools, { context, id: toolId, name, startedAt: Date.now(), verboseArgs }]
patchTurnState({ toolTokens: this.toolTokenAcc, tools: this.activeTools })
}

View file

@ -856,7 +856,16 @@ export const ToolTrail = memo(function ToolTrail({
color: t.color.text,
key: tool.id,
label,
details: [],
details: tool.verboseArgs
? [
{
color: t.color.muted,
content: `Args:\n${boundedLiveRenderText(tool.verboseArgs)}`,
dimColor: true,
key: `${tool.id}-args`
}
]
: [],
content: (
<>
<Spinner color={t.color.accent} variant="tool" /> {label}

View file

@ -477,11 +477,11 @@ export type GatewayEvent =
type: 'gateway.start_timeout'
}
| { payload?: { preview?: string }; session_id?: string; type: 'gateway.protocol_error' }
| { payload?: { text?: string }; session_id?: string; type: 'reasoning.delta' | 'reasoning.available' }
| { payload?: { text?: string; verbose?: boolean }; session_id?: string; type: 'reasoning.delta' | 'reasoning.available' }
| { payload: { name?: string; preview?: string }; session_id?: string; type: 'tool.progress' }
| { payload: { name?: string }; session_id?: string; type: 'tool.generating' }
| {
payload: { context?: string; name?: string; tool_id: string; todos?: unknown[] }
payload: { args_text?: string; context?: string; name?: string; tool_id: string; todos?: unknown[] }
session_id?: string
type: 'tool.start'
}
@ -491,6 +491,7 @@ export type GatewayEvent =
error?: string
inline_diff?: string
name?: string
result_text?: string
summary?: string
tool_id: string
todos?: unknown[]

View file

@ -212,6 +212,28 @@ export const buildToolTrailLine = (
return `${formatToolCall(name, context)}${took}${detail ? ` :: ${detail}` : ''} ${error ? '✗' : '✓'}`
}
const verboseToolBlock = (label: string, text?: string) => {
const body = (text ?? '').trim()
return body ? `${label}:\n${boundedLiveRenderText(body)}` : ''
}
export const buildVerboseToolTrailLine = (
name: string,
context: string,
error?: boolean,
duration?: number,
argsText?: string,
resultText?: string
) => {
const detail = [verboseToolBlock('Args', argsText), verboseToolBlock(error ? 'Error' : 'Result', resultText)]
.filter(Boolean)
.join('\n')
const took = duration !== undefined ? ` (${duration.toFixed(1)}s)` : ''
return `${formatToolCall(name, context)}${took}${detail ? ` :: ${detail}` : ''} ${error ? '✗' : '✓'}`
}
export const isToolTrailResultLine = (line: string) => line.endsWith(' ✓') || line.endsWith(' ✗')
export const parseToolTrailResultLine = (line: string) => {
@ -221,10 +243,10 @@ export const parseToolTrailResultLine = (line: string) => {
const mark = line.endsWith(' ✗') ? '✗' : '✓'
const body = line.slice(0, -2)
const [call, detail] = body.split(' :: ', 2)
const sep = body.indexOf(' :: ')
if (detail != null) {
return { call, detail, mark }
if (sep >= 0) {
return { call: body.slice(0, sep), detail: body.slice(sep + 4), mark }
}
const legacy = body.indexOf(': ')

View file

@ -2,6 +2,7 @@ export interface ActiveTool {
context?: string
id: string
name: string
verboseArgs?: string
startedAt?: number
}