From 1264fab15660b7341aee6123e332e58b91fe2bca Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Fri, 22 May 2026 00:16:52 -0500 Subject: [PATCH] fix(tui): surface verbose tool details (#30225) * fix(tui): surface verbose tool details Emit redacted structured verbose args/results to the TUI so /verbose verbose can show full tool detail without reopening stdout, and fail closed if redaction is unavailable. Salvages #29011. Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com> * fix(tui): address verbose detail review Label verbose tool failures as errors, cover forced verbose reasoning, and avoid new diff type warnings from the redaction regression tests. * fix(tui): bound verbose tool payloads Cap verbose tool detail text before emitting JSON-RPC events and preserve verbose results on inline diff completions. * fix(tui): align termux argv test with gc flag Update the stale TUI launch expectation so the Termux freshness path matches the current direct Node argv. --------- Co-authored-by: helix4u <4317663+helix4u@users.noreply.github.com> --- tests/hermes_cli/test_tui_npm_install.py | 2 +- tests/test_tui_gateway_server.py | 53 +++++++++ tui_gateway/server.py | 102 ++++++++++++++++-- .../createGatewayEventHandler.test.ts | 38 +++++++ ui-tui/src/__tests__/text.test.ts | 36 ++++++- ui-tui/src/app/createGatewayEventHandler.ts | 18 +++- ui-tui/src/app/turnController.ts | 50 +++++---- ui-tui/src/components/thinking.tsx | 11 +- ui-tui/src/gatewayTypes.ts | 5 +- ui-tui/src/lib/text.ts | 28 ++++- ui-tui/src/types.ts | 1 + 11 files changed, 306 insertions(+), 38 deletions(-) diff --git a/tests/hermes_cli/test_tui_npm_install.py b/tests/hermes_cli/test_tui_npm_install.py index b11d3b4debb..6fca13c4927 100644 --- a/tests/hermes_cli/test_tui_npm_install.py +++ b/tests/hermes_cli/test_tui_npm_install.py @@ -168,7 +168,7 @@ def test_make_tui_argv_skips_build_only_on_termux_when_fresh( argv, cwd = main_mod._make_tui_argv(tmp_path, tui_dev=False) - assert argv == ["/bin/node", str(tmp_path / "dist" / "entry.js")] + assert argv == ["/bin/node", "--expose-gc", str(tmp_path / "dist" / "entry.js")] assert cwd == tmp_path diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 2205cb8df64..2824bd85916 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -59,6 +59,59 @@ def test_write_json_returns_false_on_broken_pipe(monkeypatch): assert server.write_json({"ok": True}) is False +def test_tui_verbose_tool_details_fail_closed_when_redaction_fails(monkeypatch): + redact_module = types.ModuleType("agent.redact") + + def fail_redaction(*_args, **_kwargs): + raise RuntimeError("redaction unavailable") + + setattr(redact_module, "redact_sensitive_text", fail_redaction) + monkeypatch.setitem(sys.modules, "agent.redact", redact_module) + + assert server._redact_tui_verbose_text("api_key=secret") == "" + assert server._tool_args_text({"api_key": "secret"}) == "" + assert server._tool_result_text("token=secret") == "" + + +def test_tui_verbose_tool_details_are_capped_before_emit(monkeypatch): + monkeypatch.setattr(server, "_TUI_VERBOSE_TEXT_MAX_CHARS", 12) + monkeypatch.setattr(server, "_TUI_VERBOSE_TEXT_MAX_LINES", 2) + + capped = server._cap_tui_verbose_text("one\ntwo\nthree\nfour") + + assert capped.startswith("[showing verbose tail; omitted ") + assert capped.endswith("three\nfour") + assert "one" not in capped + + +def test_tui_verbose_tool_events_omit_details_when_redaction_fails(monkeypatch): + redact_module = types.ModuleType("agent.redact") + + def fail_redaction(*_args, **_kwargs): + raise RuntimeError("redaction unavailable") + + setattr(redact_module, "redact_sensitive_text", fail_redaction) + monkeypatch.setitem(sys.modules, "agent.redact", redact_module) + + events: list[tuple[str, str, dict]] = [] + monkeypatch.setattr( + server, "_emit", lambda event_type, sid, payload: events.append((event_type, sid, payload)) + ) + monkeypatch.setitem( + server._sessions, + "redaction-test", + {"tool_progress_mode": "verbose", "tool_started_at": {}}, + ) + + server._on_tool_start("redaction-test", "tool-1", "terminal", {"command": "pwd"}) + server._on_tool_complete("redaction-test", "tool-1", "terminal", {"command": "pwd"}, "done") + + assert events[0][0] == "tool.start" + assert events[1][0] == "tool.complete" + assert "args_text" not in events[0][2] + assert "result_text" not in events[1][2] + + def test_dispatch_rejects_non_object_request(): resp = server.dispatch([]) diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 921853a34c5..67b1f96d264 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1061,6 +1061,10 @@ def _session_tool_progress_mode(sid: str) -> str: return str(_sessions.get(sid, {}).get("tool_progress_mode", "all") or "all") +def _session_verbose(sid: str) -> bool: + return _session_tool_progress_mode(sid) == "verbose" + + def _tool_progress_enabled(sid: str) -> bool: return _session_tool_progress_mode(sid) != "off" @@ -1492,6 +1496,74 @@ def _tool_ctx(name: str, args: dict) -> str: return "" +_TUI_VERBOSE_TEXT_MAX_CHARS = 16_000 +_TUI_VERBOSE_TEXT_MAX_LINES = 240 + + +def _cap_tui_verbose_text(text: str) -> str: + if ( + len(text) <= _TUI_VERBOSE_TEXT_MAX_CHARS + and text.count("\n") < _TUI_VERBOSE_TEXT_MAX_LINES + ): + return text + + idx = len(text) + start = 0 + for _ in range(_TUI_VERBOSE_TEXT_MAX_LINES): + idx = text.rfind("\n", 0, idx) + if idx < 0: + start = 0 + break + start = idx + 1 + + line_start = start + start = max(line_start, len(text) - _TUI_VERBOSE_TEXT_MAX_CHARS) + if start > line_start: + next_break = text.find("\n", start) + if 0 <= next_break < len(text) - 1: + start = next_break + 1 + + tail = text[start:].lstrip() + omitted_chars = max(0, len(text) - len(tail)) + omitted_lines = text[:start].count("\n") + if omitted_lines: + label = ( + "[showing verbose tail; omitted " + f"{omitted_lines} lines / {omitted_chars} chars]\n" + ) + else: + label = f"[showing verbose tail; omitted {omitted_chars} chars]\n" + return f"{label}{tail}" + + +def _redact_tui_verbose_text(text: str) -> str: + try: + from agent.redact import redact_sensitive_text + + redacted = redact_sensitive_text(str(text), force=True) + except Exception: + return "" + return _cap_tui_verbose_text(redacted) + + +def _tool_args_text(args: dict) -> str: + try: + raw = json.dumps(args or {}, indent=2, ensure_ascii=False, default=str) + except Exception: + raw = str(args or {}) + return _redact_tui_verbose_text(raw) + + +def _tool_result_text(result: object) -> str: + try: + from agent.tool_dispatch_helpers import _multimodal_text_summary + + raw = _multimodal_text_summary(result) + except Exception: + raw = str(result) + return _redact_tui_verbose_text(raw) + + def _fmt_tool_duration(seconds: float | None) -> str: if seconds is None: return "" @@ -1553,13 +1625,18 @@ def _on_tool_start(sid: str, tool_call_id: str, name: str, args: dict): pass session.setdefault("tool_started_at", {})[tool_call_id] = time.time() if _tool_progress_enabled(sid): + payload = { + "tool_id": tool_call_id, + "name": name, + "context": _tool_ctx(name, args), + } + if _session_verbose(sid): + args_text = _tool_args_text(args) + if args_text: + payload["args_text"] = args_text # tool.complete is the source of truth for todos (full list from the # tool result). args.todos here may be a partial merge update. - _emit( - "tool.start", - sid, - {"tool_id": tool_call_id, "name": name, "context": _tool_ctx(name, args)}, - ) + _emit("tool.start", sid, payload) def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result: str): @@ -1576,6 +1653,10 @@ def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result summary = _tool_summary(name, result, duration_s) if summary: payload["summary"] = summary + if _session_verbose(sid): + result_text = _tool_result_text(result) + if result_text: + payload["result_text"] = result_text if name == "todo": try: data = json.loads(result) @@ -1615,7 +1696,10 @@ def _on_tool_progress( _emit("tool.progress", sid, {"name": name, "preview": preview or ""}) return if event_type == "reasoning.available" and preview: - _emit("reasoning.available", sid, {"text": str(preview)}) + payload: dict[str, object] = {"text": str(preview)} + if _session_verbose(sid): + payload["verbose"] = True + _emit("reasoning.available", sid, payload) return if event_type.startswith("subagent."): payload = { @@ -1691,7 +1775,11 @@ def _agent_cbs(sid: str) -> dict: "tool_gen_callback": lambda name: _tool_progress_enabled(sid) and _emit("tool.generating", sid, {"name": name}), "thinking_callback": lambda text: _emit("thinking.delta", sid, {"text": text}), - "reasoning_callback": lambda text: _emit("reasoning.delta", sid, {"text": text}), + "reasoning_callback": lambda text: _emit( + "reasoning.delta", + sid, + {"text": text, **({"verbose": True} if _session_verbose(sid) else {})}, + ), "status_callback": lambda kind, text=None: _status_update( sid, str(kind), None if text is None else str(text) ), diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index 417b8c41b93..15ed7f1edd7 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -342,6 +342,25 @@ describe('createGatewayEventHandler', () => { expect(appended[appended.length - 1]).toMatchObject({ role: 'assistant', text: 'final answer' }) }) + it('shows verbose reasoning even when normal reasoning display is off', () => { + vi.useFakeTimers() + patchUiState({ showReasoning: false }) + const appended: Msg[] = [] + const streamed = 'verbose-only reasoning' + + try { + const onEvent = createGatewayEventHandler(buildCtx(appended)) + + onEvent({ payload: { text: streamed, verbose: true }, type: 'reasoning.delta' } as any) + vi.runOnlyPendingTimers() + + expect(turnController.reasoningText).toBe(streamed) + expect(getTurnState().reasoning).toBe(streamed) + } finally { + vi.useRealTimers() + } + }) + it('ignores fallback reasoning.available when streamed reasoning already exists', () => { const appended: Msg[] = [] const streamed = 'short streamed reasoning' @@ -485,6 +504,25 @@ describe('createGatewayEventHandler', () => { expect(appended[3]?.text).not.toContain('```diff') }) + it('keeps verbose result text on inline_diff tool completions', () => { + const appended: Msg[] = [] + const onEvent = createGatewayEventHandler(buildCtx(appended)) + const diff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new' + + onEvent({ + payload: { args_text: '{ "path": "foo.ts" }', context: 'foo.ts', name: 'patch', tool_id: 'tool-1' }, + type: 'tool.start' + } as any) + onEvent({ + payload: { inline_diff: diff, result_text: 'patched result', tool_id: 'tool-1' }, + type: 'tool.complete' + } as any) + + expect(turnController.segmentMessages[0]).toMatchObject({ kind: 'diff' }) + expect(turnController.segmentMessages[0]?.tools?.[0]).toContain('Args:\n{ "path": "foo.ts" }') + expect(turnController.segmentMessages[0]?.tools?.[0]).toContain('Result:\npatched result') + }) + it('keeps full final responses from duplicating flushed pre-diff narration', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) diff --git a/ui-tui/src/__tests__/text.test.ts b/ui-tui/src/__tests__/text.test.ts index 306324d353d..6fd250b5bee 100644 --- a/ui-tui/src/__tests__/text.test.ts +++ b/ui-tui/src/__tests__/text.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it } from 'vitest' import { boundedLiveRenderText, buildToolTrailLine, + buildVerboseToolTrailLine, edgePreview, estimateRows, estimateTokensRough, @@ -12,8 +13,8 @@ import { lastCotTrailIndex, parseToolTrailResultLine, pasteTokenLabel, - sanitizeAnsiForRender, sameToolTrailGroup, + sanitizeAnsiForRender, splitToolDuration, stripAnsi, thinkingPreview @@ -37,6 +38,39 @@ describe('buildToolTrailLine', () => { }) }) +describe('buildVerboseToolTrailLine', () => { + it('preserves multiline args and result details', () => { + const line = buildVerboseToolTrailLine( + 'terminal', + 'npm test', + false, + 1.25, + '{\n "cmd": "npm test"\n}', + 'first line\nsecond :: line' + ) + + expect(line).toContain('Args:\n{') + expect(line).toContain('Result:\nfirst line\nsecond :: line') + expect(parseToolTrailResultLine(line)).toEqual({ + call: 'Terminal("npm test") (1.3s)', + detail: 'Args:\n{\n "cmd": "npm test"\n}\nResult:\nfirst line\nsecond :: line', + mark: '✓' + }) + }) + + it('labels verbose failures as errors', () => { + const line = buildVerboseToolTrailLine('terminal', 'npm test', true, 0.5, undefined, 'command failed') + + expect(line).toContain('Error:\ncommand failed') + expect(line).not.toContain('Result:\ncommand failed') + expect(parseToolTrailResultLine(line)).toEqual({ + call: 'Terminal("npm test") (0.5s)', + detail: 'Error:\ncommand failed', + mark: '✗' + }) + }) +}) + describe('lastCotTrailIndex', () => { it('finds last non-result line', () => { expect(lastCotTrailIndex(['a ✓', 'thinking…'])).toBe(1) diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index 267334bfd72..deb28a7afc0 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -491,13 +491,13 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: case 'reasoning.delta': if (ev.payload?.text) { - turnController.recordReasoningDelta(ev.payload.text) + turnController.recordReasoningDelta(ev.payload.text, Boolean(ev.payload.verbose)) } return case 'reasoning.available': - turnController.recordReasoningAvailable(String(ev.payload?.text ?? '')) + turnController.recordReasoningAvailable(String(ev.payload?.text ?? ''), Boolean(ev.payload?.verbose)) return @@ -517,12 +517,18 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: case 'tool.start': turnController.recordTodos(ev.payload.todos) - turnController.recordToolStart(ev.payload.tool_id, ev.payload.name ?? 'tool', ev.payload.context ?? '') + turnController.recordToolStart( + ev.payload.tool_id, + ev.payload.name ?? 'tool', + ev.payload.context ?? '', + ev.payload.args_text ? stripAnsi(String(ev.payload.args_text)) : undefined + ) return case 'tool.complete': { const inlineDiffText = ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : '' + const resultText = ev.payload.result_text ? stripAnsi(String(ev.payload.result_text)) : undefined if (inlineDiffText) { turnController.recordInlineDiffToolComplete( @@ -530,7 +536,8 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: ev.payload.tool_id, ev.payload.name, ev.payload.error, - ev.payload.duration_s + ev.payload.duration_s, + resultText ) } else { turnController.recordToolComplete( @@ -539,7 +546,8 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: ev.payload.error, ev.payload.summary, ev.payload.duration_s, - ev.payload.todos + ev.payload.todos, + resultText ) } diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts index b9e0aa04c19..4e22d3312cd 100644 --- a/ui-tui/src/app/turnController.ts +++ b/ui-tui/src/app/turnController.ts @@ -11,6 +11,7 @@ import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js' import { boundedLiveRenderText, buildToolTrailLine, + buildVerboseToolTrailLine, estimateTokensRough, isTransientTrailLine, sameToolTrailGroup, @@ -542,8 +543,8 @@ class TurnController { } } - recordReasoningAvailable(text: string) { - if (this.interrupted || !getUiState().showReasoning) { + recordReasoningAvailable(text: string, force = false) { + if (this.interrupted || (!force && !getUiState().showReasoning)) { return } @@ -560,8 +561,8 @@ class TurnController { this.pulseReasoningStreaming() } - recordReasoningDelta(text: string) { - if (this.interrupted || !getUiState().showReasoning) { + recordReasoningDelta(text: string, force = false) { + if (this.interrupted || (!force && !getUiState().showReasoning)) { return } @@ -587,14 +588,15 @@ class TurnController { error?: string, summary?: string, duration?: number, - todos?: unknown + todos?: unknown, + resultText?: string ) { if (this.interrupted) { return } this.recordTodos(todos) - const line = this.completeTool(toolId, fallbackName, error, summary, duration) + const line = this.completeTool(toolId, fallbackName, error, summary, duration, resultText) this.pendingSegmentTools = [...this.pendingSegmentTools, line] this.flushPendingToolsIntoLastSegment() @@ -606,30 +608,42 @@ class TurnController { toolId: string, fallbackName?: string, error?: string, - duration?: number + duration?: number, + resultText?: string ) { if (this.interrupted) { return } this.flushStreamingSegment() - this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration)]) + this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration, resultText)]) this.publishToolState() } - private completeTool(toolId: string, fallbackName?: string, error?: string, summary?: string, duration?: number) { + private completeTool( + toolId: string, + fallbackName?: string, + error?: string, + summary?: string, + duration?: number, + resultText?: string + ) { const done = this.activeTools.find(tool => tool.id === toolId) const name = done?.name ?? fallbackName ?? 'tool' const label = toolTrailLabel(name) const fallbackDuration = done?.startedAt ? (Date.now() - done.startedAt) / 1000 : undefined - const line = buildToolTrailLine( - name, - done?.context || '', - Boolean(error), - error || summary || '', - duration ?? fallbackDuration - ) + const line = + done?.verboseArgs || resultText + ? buildVerboseToolTrailLine( + name, + done?.context || '', + Boolean(error), + duration ?? fallbackDuration, + done?.verboseArgs, + error || resultText || summary || '' + ) + : buildToolTrailLine(name, done?.context || '', Boolean(error), error || summary || '', duration ?? fallbackDuration) this.activeTools = this.activeTools.filter(tool => tool.id !== toolId) @@ -675,7 +689,7 @@ class TurnController { }, STREAM_BATCH_MS) } - recordToolStart(toolId: string, name: string, context: string) { + recordToolStart(toolId: string, name: string, context: string, verboseArgs?: string) { if (this.interrupted) { return } @@ -688,7 +702,7 @@ class TurnController { const sample = `${name} ${context}`.trim() this.toolTokenAcc += sample ? estimateTokensRough(sample) : 0 - this.activeTools = [...this.activeTools, { context, id: toolId, name, startedAt: Date.now() }] + this.activeTools = [...this.activeTools, { context, id: toolId, name, startedAt: Date.now(), verboseArgs }] patchTurnState({ toolTokens: this.toolTokenAcc, tools: this.activeTools }) } diff --git a/ui-tui/src/components/thinking.tsx b/ui-tui/src/components/thinking.tsx index 6908795f621..0d9ecee87c9 100644 --- a/ui-tui/src/components/thinking.tsx +++ b/ui-tui/src/components/thinking.tsx @@ -856,7 +856,16 @@ export const ToolTrail = memo(function ToolTrail({ color: t.color.text, key: tool.id, label, - details: [], + details: tool.verboseArgs + ? [ + { + color: t.color.muted, + content: `Args:\n${boundedLiveRenderText(tool.verboseArgs)}`, + dimColor: true, + key: `${tool.id}-args` + } + ] + : [], content: ( <> {label} diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts index ab85c39fbdd..9de1c85112d 100644 --- a/ui-tui/src/gatewayTypes.ts +++ b/ui-tui/src/gatewayTypes.ts @@ -477,11 +477,11 @@ export type GatewayEvent = type: 'gateway.start_timeout' } | { payload?: { preview?: string }; session_id?: string; type: 'gateway.protocol_error' } - | { payload?: { text?: string }; session_id?: string; type: 'reasoning.delta' | 'reasoning.available' } + | { payload?: { text?: string; verbose?: boolean }; session_id?: string; type: 'reasoning.delta' | 'reasoning.available' } | { payload: { name?: string; preview?: string }; session_id?: string; type: 'tool.progress' } | { payload: { name?: string }; session_id?: string; type: 'tool.generating' } | { - payload: { context?: string; name?: string; tool_id: string; todos?: unknown[] } + payload: { args_text?: string; context?: string; name?: string; tool_id: string; todos?: unknown[] } session_id?: string type: 'tool.start' } @@ -491,6 +491,7 @@ export type GatewayEvent = error?: string inline_diff?: string name?: string + result_text?: string summary?: string tool_id: string todos?: unknown[] diff --git a/ui-tui/src/lib/text.ts b/ui-tui/src/lib/text.ts index 5b52c236719..2b1ae33c592 100644 --- a/ui-tui/src/lib/text.ts +++ b/ui-tui/src/lib/text.ts @@ -212,6 +212,28 @@ export const buildToolTrailLine = ( return `${formatToolCall(name, context)}${took}${detail ? ` :: ${detail}` : ''} ${error ? '✗' : '✓'}` } +const verboseToolBlock = (label: string, text?: string) => { + const body = (text ?? '').trim() + + return body ? `${label}:\n${boundedLiveRenderText(body)}` : '' +} + +export const buildVerboseToolTrailLine = ( + name: string, + context: string, + error?: boolean, + duration?: number, + argsText?: string, + resultText?: string +) => { + const detail = [verboseToolBlock('Args', argsText), verboseToolBlock(error ? 'Error' : 'Result', resultText)] + .filter(Boolean) + .join('\n') + const took = duration !== undefined ? ` (${duration.toFixed(1)}s)` : '' + + return `${formatToolCall(name, context)}${took}${detail ? ` :: ${detail}` : ''} ${error ? '✗' : '✓'}` +} + export const isToolTrailResultLine = (line: string) => line.endsWith(' ✓') || line.endsWith(' ✗') export const parseToolTrailResultLine = (line: string) => { @@ -221,10 +243,10 @@ export const parseToolTrailResultLine = (line: string) => { const mark = line.endsWith(' ✗') ? '✗' : '✓' const body = line.slice(0, -2) - const [call, detail] = body.split(' :: ', 2) + const sep = body.indexOf(' :: ') - if (detail != null) { - return { call, detail, mark } + if (sep >= 0) { + return { call: body.slice(0, sep), detail: body.slice(sep + 4), mark } } const legacy = body.indexOf(': ') diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts index f0651bef9c5..0bfab6c271d 100644 --- a/ui-tui/src/types.ts +++ b/ui-tui/src/types.ts @@ -2,6 +2,7 @@ export interface ActiveTool { context?: string id: string name: string + verboseArgs?: string startedAt?: number }