From 5f215b13cef654a07f482294a137e6a68d0caef8 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 13:11:47 -0700 Subject: [PATCH 01/23] fix(docker): materialize bundled TUI Ink package (#16690) * fix(docker): materialize bundled TUI Ink package * fix(docker): keep nested deps out of build context * fix(docker): make TUI Ink smoke check deterministic * test(docker): skip dockerignore assertion in partial checkouts * fix(docker): use lockfile install for vendored Ink deps * test(cli): expect deterministic npm ci in /update flow * fix(docker): fall back to npm install for vendored Ink deps * fix(docker): keep bundled Ink source for TUI runtime builds * fix(docker): dedupe React in vendored Ink package --- .dockerignore | 2 ++ Dockerfile | 10 +++++-- tests/hermes_cli/test_cmd_update.py | 4 +-- tests/tools/test_dockerfile_pid1_reaping.py | 33 +++++++++++++++++---- 4 files changed, 39 insertions(+), 10 deletions(-) diff --git a/.dockerignore b/.dockerignore index 0e4a88fd2f..542c96700e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,7 +5,9 @@ # Dependencies node_modules +**/node_modules .venv +**/.venv # CI/CD .github diff --git a/Dockerfile b/Dockerfile index 7f4ebc2d15..d988ea6407 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright # that would otherwise accumulate when hermes runs as PID 1. See #15012. RUN apt-get update && \ apt-get install -y --no-install-recommends \ - build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \ + build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \ rm -rf /var/lib/apt/lists/* # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime @@ -45,7 +45,13 @@ COPY --chown=hermes:hermes . . # Build browser dashboard and terminal UI assets. RUN cd web && npm run build && \ - cd ../ui-tui && npm run build + cd ../ui-tui && npm run build && \ + rm -rf node_modules/@hermes/ink && \ + rm -rf packages/hermes-ink/node_modules && \ + cp -R packages/hermes-ink node_modules/@hermes/ink && \ + npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \ + rm -rf node_modules/@hermes/ink/node_modules/react && \ + node --input-type=module -e "await import('@hermes/ink')" # ---------- Permissions ---------- # Make install dir world-readable so any HERMES_UID can read it at runtime. diff --git a/tests/hermes_cli/test_cmd_update.py b/tests/hermes_cli/test_cmd_update.py index 1e6a2245b2..caac6d3727 100644 --- a/tests/hermes_cli/test_cmd_update.py +++ b/tests/hermes_cli/test_cmd_update.py @@ -130,7 +130,7 @@ class TestCmdUpdateBranchFallback: # 3. web/ — install + "npm run build" for the web frontend full_flags = [ "/usr/bin/npm", - "install", + "ci", "--silent", "--no-fund", "--no-audit", @@ -139,7 +139,7 @@ class TestCmdUpdateBranchFallback: assert npm_calls == [ (full_flags, PROJECT_ROOT), (full_flags, PROJECT_ROOT / "ui-tui"), - (["/usr/bin/npm", "install", "--silent"], PROJECT_ROOT / "web"), + (["/usr/bin/npm", "ci", "--silent"], PROJECT_ROOT / "web"), (["/usr/bin/npm", "run", "build"], PROJECT_ROOT / "web"), ] diff --git a/tests/tools/test_dockerfile_pid1_reaping.py b/tests/tools/test_dockerfile_pid1_reaping.py index 7538162798..52532a78dd 100644 --- a/tests/tools/test_dockerfile_pid1_reaping.py +++ b/tests/tools/test_dockerfile_pid1_reaping.py @@ -21,6 +21,7 @@ import pytest REPO_ROOT = Path(__file__).resolve().parents[2] DOCKERFILE = REPO_ROOT / "Dockerfile" +DOCKERIGNORE = REPO_ROOT / ".dockerignore" @pytest.fixture(scope="module") @@ -108,17 +109,37 @@ def test_dockerfile_installs_tui_dependencies(dockerfile_text): assert "ui-tui/package.json" in dockerfile_text assert "ui-tui/packages/hermes-ink/package-lock.json" in dockerfile_text assert any( - "ui-tui" in step - and "npm" in step - and (" install" in step or " ci" in step) + "ui-tui" in step and "npm" in step and (" install" in step or " ci" in step) for step in _run_steps(dockerfile_text) ) def test_dockerfile_builds_tui_assets(dockerfile_text): assert any( - "ui-tui" in step - and "npm" in step - and "run build" in step + "ui-tui" in step and "npm" in step and "run build" in step for step in _run_steps(dockerfile_text) ) + + +def test_dockerfile_materializes_local_tui_ink_package(dockerfile_text): + assert any( + "ui-tui" in step + and "node_modules/@hermes/ink" in step + and "packages/hermes-ink" in step + and "rm -rf packages/hermes-ink/node_modules" in step + and "npm install --omit=dev" in step + and "--prefix node_modules/@hermes/ink" in step + and "rm -rf node_modules/@hermes/ink/node_modules/react" in step + and "await import('@hermes/ink')" in step + for step in _run_steps(dockerfile_text) + ) + + +def test_dockerignore_excludes_nested_dependency_dirs(): + if not DOCKERIGNORE.exists(): + pytest.skip(".dockerignore not present in this checkout") + + text = DOCKERIGNORE.read_text() + + assert "**/node_modules" in text + assert "**/.venv" in text From 0d957a8d48e5f9ada343c8fe41717b7b93b0f318 Mon Sep 17 00:00:00 2001 From: Gille <4317663+helix4u@users.noreply.github.com> Date: Tue, 28 Apr 2026 14:27:43 -0600 Subject: [PATCH 02/23] fix(tui): surface mouse slash command (#17126) --- tests/test_tui_gateway_server.py | 21 +++++++++++++++++++++ tui_gateway/server.py | 6 ++++++ 2 files changed, 27 insertions(+) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 7acd4bc790..4a7b9e6cc1 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -980,6 +980,14 @@ def test_complete_slash_includes_tui_details_command(): assert any(item["text"] == "/details" for item in resp["result"]["items"]) +def test_complete_slash_includes_tui_mouse_command(): + resp = server.handle_request( + {"id": "1", "method": "complete.slash", "params": {"text": "/mou"}} + ) + + assert any(item["text"] == "/mouse" for item in resp["result"]["items"]) + + def test_complete_slash_details_args(): resp_root = server.handle_request( {"id": "0", "method": "complete.slash", "params": {"text": "/details"}} @@ -1547,6 +1555,19 @@ def test_commands_catalog_surfaces_quick_commands(monkeypatch): assert resp["result"]["canon"]["/notes"] == "/notes" +def test_commands_catalog_includes_tui_mouse_command(): + resp = server.handle_request( + {"id": "1", "method": "commands.catalog", "params": {}} + ) + + pairs = dict(resp["result"]["pairs"]) + tui_cat = next(c for c in resp["result"]["categories"] if c["name"] == "TUI") + tui_pairs = dict(tui_cat["pairs"]) + + assert "/mouse" in pairs + assert "/mouse" in tui_pairs + + def test_command_dispatch_exec_nonzero_surfaces_error(monkeypatch): monkeypatch.setattr( server, diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 0343cb84bb..8a356c9f1e 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -3354,6 +3354,7 @@ _TUI_HIDDEN: frozenset[str] = frozenset( _TUI_EXTRA: list[tuple[str, str, str]] = [ ("/compact", "Toggle compact display mode", "TUI"), ("/logs", "Show recent gateway log lines", "TUI"), + ("/mouse", "Toggle mouse/wheel tracking [on|off|toggle]", "TUI"), ] # Commands that queue messages onto _pending_input in the CLI. @@ -4133,6 +4134,11 @@ def _(rid, params: dict) -> dict: "display": "/logs", "meta": "Show recent gateway log lines", }, + { + "text": "/mouse", + "display": "/mouse", + "meta": "Toggle mouse/wheel tracking [on|off|toggle]", + }, ] for extra in extras: if extra["text"].startswith(text_lower) and not any( From 9eabc24e245f253ccb073128e6344d09cb806580 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 28 Apr 2026 13:56:39 -0500 Subject: [PATCH 03/23] fix(tui): visually distinguish markdown table rows from prose (#15534) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tables rendered through `` had no separator and no header weight, so they read as a paragraph with extra whitespace. This adds two tiny, border-free changes that survive Ink's grapheme-approximate column widths better than a full outline: * Bold the header row, keeping the existing amber colour. * Insert a dim `─`-dashed rule between the header and body rows. We deliberately stay away from a full outline — column widths are measured via `stripInlineMarkup(...).length`, which is grapheme-aware but still off by a cell on East Asian wide characters and emoji-mid- cell strings. A header rule plus the existing 2-space column gap gives the visual hierarchy the issue asks for without amplifying that inaccuracy into a misaligned border. Validation: `npm run type-check` clean, `npm test --run` 389/389. --- ui-tui/src/components/markdown.tsx | 33 ++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx index d3b6710b9e..7afae3beb6 100644 --- a/ui-tui/src/components/markdown.tsx +++ b/ui-tui/src/components/markdown.tsx @@ -1,5 +1,5 @@ import { Box, Link, Text } from '@hermes/ink' -import { memo, type ReactNode, useMemo } from 'react' +import { Fragment, memo, type ReactNode, useMemo } from 'react' import { ensureEmojiPresentation } from '../lib/emoji.js' import { highlightLine, isHighlightable } from '../lib/syntax.js' @@ -97,18 +97,33 @@ export const stripInlineMarkup = (v: string) => const renderTable = (k: number, rows: string[][], t: Theme) => { const widths = rows[0]!.map((_, ci) => Math.max(...rows.map(r => stripInlineMarkup(r[ci] ?? '').length))) + // Thin divider under the header. Without it tables look like prose + // with extra spacing because the header is just amber-coloured text + // (#15534). We avoid full borders on purpose — column widths are + // grapheme-approximate so a real outline often misaligns; one dim + // dashed rule under row 0 plus tab-style column gaps reads cleanly + // on every terminal we tested. + const sep = widths.map(w => '─'.repeat(Math.max(1, w))).join(' ') + return ( {rows.map((row, ri) => ( - - {widths.map((w, ci) => ( - - - {' '.repeat(Math.max(0, w - stripInlineMarkup(row[ci] ?? '').length))} - {ci < widths.length - 1 ? ' ' : ''} + + + {widths.map((w, ci) => ( + + + {' '.repeat(Math.max(0, w - stripInlineMarkup(row[ci] ?? '').length))} + {ci < widths.length - 1 ? ' ' : ''} + + ))} + + {ri === 0 && rows.length > 1 ? ( + + {sep} - ))} - + ) : null} + ))} ) From 4689ace7cb1602ca611aab4d64a7842d3f72c135 Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 28 Apr 2026 15:29:11 -0500 Subject: [PATCH 04/23] review(copilot): clarify table-rule rationale (UTF-16 code units, not graphemes) --- ui-tui/src/components/markdown.tsx | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx index 7afae3beb6..e22eb5bb1d 100644 --- a/ui-tui/src/components/markdown.tsx +++ b/ui-tui/src/components/markdown.tsx @@ -99,10 +99,11 @@ const renderTable = (k: number, rows: string[][], t: Theme) => { // Thin divider under the header. Without it tables look like prose // with extra spacing because the header is just amber-coloured text - // (#15534). We avoid full borders on purpose — column widths are - // grapheme-approximate so a real outline often misaligns; one dim - // dashed rule under row 0 plus tab-style column gaps reads cleanly - // on every terminal we tested. + // (#15534). We avoid full borders on purpose — column widths come + // from `stripInlineMarkup(...).length` (UTF-16 code units, not + // display width), so a real outline often misaligns on emoji and + // East-Asian wide characters; one dim dashed rule under row 0 plus + // tab-style column gaps reads cleanly on every terminal we tested. const sep = widths.map(w => '─'.repeat(Math.max(1, w))).join(' ') return ( From 50edbe6f46aec03237b9225935250fb1d072243a Mon Sep 17 00:00:00 2001 From: Brooklyn Nicholson Date: Tue, 28 Apr 2026 15:49:35 -0500 Subject: [PATCH 05/23] review(copilot): say solid rule, not dashed --- ui-tui/src/components/markdown.tsx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ui-tui/src/components/markdown.tsx b/ui-tui/src/components/markdown.tsx index e22eb5bb1d..9c18086147 100644 --- a/ui-tui/src/components/markdown.tsx +++ b/ui-tui/src/components/markdown.tsx @@ -102,8 +102,9 @@ const renderTable = (k: number, rows: string[][], t: Theme) => { // (#15534). We avoid full borders on purpose — column widths come // from `stripInlineMarkup(...).length` (UTF-16 code units, not // display width), so a real outline often misaligns on emoji and - // East-Asian wide characters; one dim dashed rule under row 0 plus - // tab-style column gaps reads cleanly on every terminal we tested. + // East-Asian wide characters; one dim solid rule (`─`) under row 0 + // plus tab-style column gaps reads cleanly on every terminal we + // tested. const sep = widths.map(w => '─'.repeat(Math.max(1, w))).join(' ') return ( From a830f25f716190168dd7db6819c0b48848049002 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 13:56:02 -0700 Subject: [PATCH 06/23] fix(tui): surface gateway stderr tail in start_timeout activity (#17112) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(tui): append gateway stderr tail to start_timeout activity `gateway.start_timeout` previously published only `cwd` + `python`, which made TUI startup failures hard to disambiguate. The user saw `gateway startup timed out · /path/to/python /repo · /logs to inspect` with no signal whether the actual cause was a wrong python interpreter, a missing dependency, or a config parse failure. Plumb a 20-line stderr tail through the event so the most useful lines land directly in the TUI activity feed, capped to the last 8 non-empty lines for readability: * `gatewayClient.ts` — collect `getLogTail(20)` when the readyTimer fires and attach it as `payload.stderr_tail`. * `gatewayTypes.ts` — extend the `gateway.start_timeout` event union with the new optional field. * `createGatewayEventHandler.ts` — emit the trimmed lines after the existing `gateway startup timed out` activity entry, classified `error`. Tests: regression test in `createGatewayEventHandler.test.ts` checks that `ModuleNotFoundError` / `FileNotFoundError` lines from the tail land in `getTurnState().activity` so they show up in the UI immediately. Validation: `npm run type-check` clean, `npm test --run` 390/390. * review(copilot): filter blanks before slice and cap stderr tail at 120 chars --- .../createGatewayEventHandler.test.ts | 21 +++++++++++++++++++ ui-tui/src/app/createGatewayEventHandler.ts | 20 +++++++++++++++++- ui-tui/src/gatewayClient.ts | 12 ++++++++++- ui-tui/src/gatewayTypes.ts | 2 +- 4 files changed, 52 insertions(+), 3 deletions(-) diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index c09bd4ee96..fbcc069dd5 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -293,6 +293,27 @@ describe('createGatewayEventHandler', () => { expect(appended[1]).toMatchObject({ role: 'assistant', text: 'final answer' }) }) + it('annotates gateway.start_timeout with stderr tail lines so users can diagnose without /logs', () => { + const appended: Msg[] = [] + const onEvent = createGatewayEventHandler(buildCtx(appended)) + + onEvent({ + payload: { + cwd: '/repo', + python: '/opt/venv/bin/python', + stderr_tail: + '[startup] timed out\nModuleNotFoundError: No module named openai\nFileNotFoundError: ~/.hermes/config.yaml' + }, + type: 'gateway.start_timeout' + } as any) + + const messages = getTurnState().activity.map(a => a.text) + + expect(messages.some(m => m.includes('gateway startup timed out'))).toBe(true) + expect(messages.some(m => m.includes('ModuleNotFoundError'))).toBe(true) + expect(messages.some(m => m.includes('FileNotFoundError'))).toBe(true) + }) + it('anchors inline_diff as its own segment where the edit happened', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index 267bf8c166..d36faa336c 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -321,12 +321,30 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: } case 'gateway.start_timeout': { - const { cwd, python } = ev.payload ?? {} + const { cwd, python, stderr_tail: stderrTail } = ev.payload ?? {} const trace = python || cwd ? ` · ${String(python || '')} ${String(cwd || '')}`.trim() : '' setStatus('gateway startup timeout') turnController.pushActivity(`gateway startup timed out${trace} · /logs to inspect`, 'error') + // Surface the most useful stderr lines inline so users can tell + // "wrong python", "missing dep", and "config parse failure" + // apart without leaving the TUI. Filter blank rows BEFORE + // taking the last N so trailing empty lines in the buffer + // don't crowd out actual content; truncate to match the + // 120-char clip used for `gateway.stderr` activity entries. + const STDERR_LINE_CAP = 120 + const STDERR_LINES_MAX = 8 + const tailLines = (stderrTail ?? '') + .split('\n') + .map(l => l.trim()) + .filter(Boolean) + .slice(-STDERR_LINES_MAX) + + for (const line of tailLines) { + turnController.pushActivity(line.slice(0, STDERR_LINE_CAP), 'error') + } + return } diff --git a/ui-tui/src/gatewayClient.ts b/ui-tui/src/gatewayClient.ts index 9bf681f8b2..838bf31fbc 100644 --- a/ui-tui/src/gatewayClient.ts +++ b/ui-tui/src/gatewayClient.ts @@ -117,8 +117,18 @@ export class GatewayClient extends EventEmitter { return } + // Append the most recent gateway stderr/log lines to the timeout + // event so users can tell apart "wrong python", "missing dep", + // and "config parse failure" from one glance instead of having + // to dig through `/logs`. Capped to keep the activity feed + // readable on slow boots. + const stderrTail = this.getLogTail(20) + this.pushLog(`[startup] timed out waiting for gateway.ready (python=${python}, cwd=${cwd})`) - this.publish({ type: 'gateway.start_timeout', payload: { cwd, python } }) + this.publish({ + type: 'gateway.start_timeout', + payload: { cwd, python, stderr_tail: stderrTail } + }) }, STARTUP_TIMEOUT_MS) this.proc = spawn(python, ['-m', 'tui_gateway.entry'], { cwd, env, stdio: ['pipe', 'pipe', 'pipe'] }) diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts index 605d51213f..5a7f8d8ad1 100644 --- a/ui-tui/src/gatewayTypes.ts +++ b/ui-tui/src/gatewayTypes.ts @@ -415,7 +415,7 @@ export type GatewayEvent = | { payload?: { state?: 'idle' | 'listening' | 'transcribing' }; session_id?: string; type: 'voice.status' } | { payload?: { no_speech_limit?: boolean; text?: string }; session_id?: string; type: 'voice.transcript' } | { payload: { line: string }; session_id?: string; type: 'gateway.stderr' } - | { payload?: { cwd?: string; python?: string }; session_id?: string; type: 'gateway.start_timeout' } + | { payload?: { cwd?: string; python?: string; stderr_tail?: string }; session_id?: string; type: 'gateway.start_timeout' } | { payload?: { preview?: string }; session_id?: string; type: 'gateway.protocol_error' } | { payload?: { text?: string }; session_id?: string; type: 'reasoning.delta' | 'reasoning.available' } | { payload: { name?: string; preview?: string }; session_id?: string; type: 'tool.progress' } From e42065b1f796554c7acf7e8cbc34b4ec245f0a3c Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 14:51:07 -0700 Subject: [PATCH 07/23] fix(tui): drop stale stream events after ctrl-c interrupt (#16706) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(tui): drop stale stream events after ctrl-c interrupt Once interruptTurn() flips this.interrupted, only recordMessageDelta short-circuited. recordReasoningDelta/Available, recordToolStart/ Progress/Complete, and recordInlineDiffToolComplete kept populating turnState until the python loop reached its next _interrupt_requested check (~1s on busy turns), making it look like ctrl-c was ignored while late "thinking" + tool calls kept landing in the UI. Add the same interrupted guard to every stream-side recorder, and clear the flag at startMessage() so the next turn isn't suppressed if the previous turn never delivered message.complete. * fix(tui): guard recordTodos against post-interrupt mutation; fake-timers in test Copilot review on PR #16706: 1. `recordToolStart` is interruption-guarded, but `tool.start` handler also calls `recordTodos(payload.todos)` first — so a late tool.start carrying todos could still mutate `turnState.todos` after Ctrl-C, leaving ghost rows in the panel. Adds the same `if (this.interrupted) return` early-exit to `recordTodos` so *all* tool.start side-effects are dropped post-interrupt. 2. The interrupt test was leaking a real `setTimeout` (interrupt cooldown) across test files, which could fire later and mutate uiStore from the wrong test context. Wraps the test in `vi.useFakeTimers()` + `vi.runAllTimers()` and restores real timers in finally. 3. Extends the same test with a todos payload on the post-interrupt tool.start so we have explicit regression coverage for #1. * fix(tui): guard pushTrail post-interrupt; harden interrupt-test cleanup Round 2 Copilot review on PR #16706: 1. `tool.generating` events route through `pushTrail`, which was not interruption-guarded — late events could still write 'drafting …' into `turnTrail` after Ctrl-C, leaving a stale shimmer in the UI. Adds the same `if (this.interrupted) return` early-exit. 2. Test cleanup moved `vi.runAllTimers()` into `finally` (before `vi.useRealTimers()`) so a mid-test assertion failure can't leak the interrupt-cooldown setTimeout across other test files. 3. Replaced the misleading 'pre-interrupt todos … expected to be cleared by the interrupt cycle' comment with an accurate one reflecting current behaviour (interrupt does NOT clear todos). 4. Added an explicit assertion that a post-interrupt `tool.generating` event does not extend `turnTrail` — regression coverage for #1. --- .../createGatewayEventHandler.test.ts | 83 +++++++++++++++++++ ui-tui/src/app/turnController.ts | 37 +++++++-- 2 files changed, 114 insertions(+), 6 deletions(-) diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index fbcc069dd5..d9f147c7a5 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -495,4 +495,87 @@ describe('createGatewayEventHandler', () => { expect(getTurnState().activity).toMatchObject([{ text: 'boom', tone: 'error' }]) }) + + it('drops stale reasoning/tool/todos events after ctrl-c until the next message starts', () => { + // Repro for the discord report: ctrl-c interrupts, but late reasoning/tool + // events from the still-winding-down agent loop kept populating the UI for + // ~1s, making it look like the interrupt had been ignored. + // + // Fake timers because `interruptTurn` schedules a real setTimeout for + // its cooldown — without flushing it inside this test, the timeout + // can fire later and mutate uiStore/turnState during unrelated tests + // (cross-file flake). + vi.useFakeTimers() + + try { + const appended: Msg[] = [] + const ctx = buildCtx(appended) + ctx.gateway.gw.request = vi.fn(async () => ({ status: 'interrupted' })) + const onEvent = createGatewayEventHandler(ctx) + + patchUiState({ sid: 'sess-1' }) + onEvent({ payload: {}, type: 'message.start' } as any) + onEvent({ + payload: { + context: 'pre', + name: 'search', + todos: [{ content: 'pre-interrupt', id: 'todo-1', status: 'pending' }], + tool_id: 't-1' + }, + type: 'tool.start' + } as any) + + // Pre-interrupt todos should land in turn state. + expect(getTurnState().todos).toEqual([ + { content: 'pre-interrupt', id: 'todo-1', status: 'pending' } + ]) + + turnController.interruptTurn({ + appendMessage: (msg: Msg) => appended.push(msg), + gw: ctx.gateway.gw, + sid: 'sess-1', + sys: ctx.system.sys + }) + + onEvent({ payload: { text: 'still thinking…' }, type: 'reasoning.delta' } as any) + // Post-interrupt tool.start with a todos payload — must NOT mutate todos. + onEvent({ + payload: { + context: 'post', + name: 'browser', + todos: [{ content: 'late ghost', id: 'todo-ghost', status: 'pending' }], + tool_id: 't-2' + }, + type: 'tool.start' + } as any) + // Late tool.generating must NOT push a 'drafting …' line into the trail. + const trailBefore = getTurnState().turnTrail.length + onEvent({ payload: { name: 'browser' }, type: 'tool.generating' } as any) + expect(getTurnState().turnTrail.length).toBe(trailBefore) + onEvent({ payload: { name: 'browser', preview: 'loading' }, type: 'tool.progress' } as any) + onEvent({ payload: { summary: 'done', tool_id: 't-2' }, type: 'tool.complete' } as any) + onEvent({ payload: { text: 'late chunk' }, type: 'message.delta' } as any) + + expect(getTurnState().tools).toEqual([]) + expect(turnController.reasoningText).toBe('') + expect(turnController.bufRef).toBe('') + expect(getTurnState().streamPendingTools).toEqual([]) + expect(getTurnState().streamSegments).toEqual([]) + // Stale post-interrupt todos must not have leaked through. + // (This test does not assert that pre-interrupt todos are cleared — + // current interrupt path leaves them visible until the next message.) + expect(getTurnState().todos.find(t => t.content === 'late ghost')).toBeUndefined() + + onEvent({ payload: {}, type: 'message.start' } as any) + onEvent({ payload: { text: 'fresh' }, type: 'reasoning.delta' } as any) + + expect(turnController.reasoningText).toBe('fresh') + } finally { + // Drain pending fake timers BEFORE restoring real timers so a mid- + // test assertion failure can't leak the interrupt-cooldown setTimeout + // across test files (the original Copilot concern). + vi.runAllTimers() + vi.useRealTimers() + } + }) }) diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts index 49a7fd7d67..dbd5e1faf0 100644 --- a/ui-tui/src/app/turnController.ts +++ b/ui-tui/src/app/turnController.ts @@ -316,6 +316,10 @@ class TurnController { } recordTodos(value: unknown) { + if (this.interrupted) { + return + } + const todos = parseTodos(value) if (todos !== null) { @@ -397,6 +401,10 @@ class TurnController { } pushTrail(line: string) { + if (this.interrupted) { + return + } + patchTurnState(state => { if (state.turnTrail.at(-1) === line) { return state @@ -509,13 +517,13 @@ class TurnController { } recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) { - this.pruneTransient() - this.endReasoningPhase() - - if (!text || this.interrupted) { + if (this.interrupted || !text) { return } + this.pruneTransient() + this.endReasoningPhase() + this.bufRef = rendered ?? this.bufRef + text if (getUiState().streaming) { @@ -524,7 +532,7 @@ class TurnController { } recordReasoningAvailable(text: string) { - if (!getUiState().showReasoning) { + if (this.interrupted || !getUiState().showReasoning) { return } @@ -542,7 +550,7 @@ class TurnController { } recordReasoningDelta(text: string) { - if (!getUiState().showReasoning) { + if (this.interrupted || !getUiState().showReasoning) { return } @@ -570,6 +578,10 @@ class TurnController { duration?: number, todos?: unknown ) { + if (this.interrupted) { + return + } + this.recordTodos(todos) const line = this.completeTool(toolId, fallbackName, error, summary, duration) @@ -585,6 +597,10 @@ class TurnController { error?: string, duration?: number ) { + if (this.interrupted) { + return + } + this.flushStreamingSegment() this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration)]) this.publishToolState() @@ -626,6 +642,10 @@ class TurnController { } recordToolProgress(toolName: string, preview: string) { + if (this.interrupted) { + return + } + const index = this.activeTools.findIndex(tool => tool.name === toolName) if (index < 0) { @@ -645,6 +665,10 @@ class TurnController { } recordToolStart(toolId: string, name: string, context: string) { + if (this.interrupted) { + return + } + this.flushStreamingSegment() this.closeReasoningSegment() this.pruneTransient() @@ -716,6 +740,7 @@ class TurnController { this.reasoningSegmentIndex = null this.turnTools = [] this.toolTokenAcc = 0 + this.interrupted = false this.persistedToolLabels.clear() patchUiState({ busy: true }) patchTurnState({ activity: [], outcome: '', subagents: [], toolTokens: 0, tools: [], turnTrail: [] }) From 87d3fa6f1c73ccb60b91e9e2c405d5212244bce9 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 14:53:38 -0700 Subject: [PATCH 08/23] feat(tui): opt-in auto-resume of the most recent session (#17130) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(tui): opt-in auto-resume of the most recent session `hermes --tui` always forges a fresh session at startup unless the user sets `HERMES_TUI_RESUME=`. Disconnects, terminal-window crashes, and accidental Ctrl+D therefore lose every piece of in-flight context even though `state.db` still has the full history a `/resume` away. Add an opt-in path that mirrors classic CLI's `hermes -c` muscle memory: when `display.tui_auto_resume_recent: true` is set in `~/.hermes/config.yaml`, the TUI looks up the most recent human-facing session and resumes it instead of starting fresh. Default off so existing users aren't surprised; explicit `HERMES_TUI_RESUME` always wins. Wires: * New `session.most_recent` JSON-RPC in `tui_gateway/server.py` that returns the first non-`tool` row from `list_sessions_rich`, or `{"session_id": null}` when none. Uses the same deny-list as `session.list` so sub-agent rows can't sneak in. * `createGatewayEventHandler.handleReady` re-ordered: explicit `STARTUP_RESUME_ID` first (unchanged), then conditional auto-resume via `config.get full → display.tui_auto_resume_recent`, then the legacy `newSession()` fallback. Failures of either RPC fall back to `newSession()` so the path is always finite. * Default `display.tui_auto_resume_recent: False` added to `DEFAULT_CONFIG` in `hermes_cli/config.py` (no `_config_version` bump per AGENTS.md — deep-merge handles the additive key). Tests: * 4 new vitest cases in `createGatewayEventHandler.test.ts` cover every gate-and-fallback combination (env wins, config off, config on with hit, config on with miss). * 3 new pytest cases for `session.most_recent` (denied row skip, tool-only → null, db-unavailable → null). Validation: scripts/run_tests.sh tests/test_tui_gateway_server.py — 93/93. cd ui-tui && npm run type-check — clean; npm test --run — 393/393. * review(copilot): fold session.most_recent errors into null + extend ConfigDisplayConfig * review(copilot): cover RPC-rejection fallbacks in auto-resume tests --- hermes_cli/config.py | 5 + tests/test_tui_gateway_server.py | 67 ++++++++ tui_gateway/server.py | 44 ++++++ .../createGatewayEventHandler.test.ts | 146 ++++++++++++++++++ ui-tui/src/app/createGatewayEventHandler.ts | 50 +++++- ui-tui/src/gatewayTypes.ts | 8 + 6 files changed, 314 insertions(+), 6 deletions(-) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 7fdc4d7e07..a0222391e2 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -703,6 +703,11 @@ DEFAULT_CONFIG = { "personality": "kawaii", "resume_display": "full", "busy_input_mode": "interrupt", # interrupt | queue | steer + # When true, `hermes --tui` auto-resumes the most recent human- + # facing session on launch instead of forging a fresh one. + # Mirrors `hermes -c` muscle memory. Default off so existing + # users aren't surprised. HERMES_TUI_RESUME= always wins. + "tui_auto_resume_recent": False, "bell_on_complete": False, "show_reasoning": False, "streaming": False, diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 4a7b9e6cc1..271214fa39 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -2654,3 +2654,70 @@ def test_prompt_submit_skips_auto_title_when_response_empty(monkeypatch): ) mock_title.assert_not_called() + + +# ── session.most_recent ────────────────────────────────────────────── + + +def test_session_most_recent_returns_first_non_denied(monkeypatch): + """Drops `tool` rows like session.list does, returns the first hit.""" + + class _DB: + def list_sessions_rich(self, *, source=None, limit=200): + return [ + {"id": "tool-1", "source": "tool", "title": "noise", "started_at": 100}, + {"id": "tui-1", "source": "tui", "title": "real", "started_at": 99}, + ] + + monkeypatch.setattr(server, "_get_db", lambda: _DB()) + + resp = server.handle_request( + {"id": "1", "method": "session.most_recent", "params": {}} + ) + + assert resp["result"]["session_id"] == "tui-1" + assert resp["result"]["title"] == "real" + assert resp["result"]["source"] == "tui" + + +def test_session_most_recent_returns_null_when_only_tool_rows(monkeypatch): + class _DB: + def list_sessions_rich(self, *, source=None, limit=200): + return [{"id": "tool-1", "source": "tool", "started_at": 1}] + + monkeypatch.setattr(server, "_get_db", lambda: _DB()) + + resp = server.handle_request( + {"id": "1", "method": "session.most_recent", "params": {}} + ) + + assert resp["result"]["session_id"] is None + + +def test_session_most_recent_folds_db_exception_into_null_result(monkeypatch): + """Per contract, errors are folded into the null-result shape so + callers don't have to special-case JSON-RPC error envelopes for + 'no answer' (Copilot review on #17130).""" + + class _BrokenDB: + def list_sessions_rich(self, *, source=None, limit=200): + raise RuntimeError("db locked") + + monkeypatch.setattr(server, "_get_db", lambda: _BrokenDB()) + + resp = server.handle_request( + {"id": "1", "method": "session.most_recent", "params": {}} + ) + + assert "error" not in resp + assert resp["result"]["session_id"] is None + + +def test_session_most_recent_handles_db_unavailable(monkeypatch): + monkeypatch.setattr(server, "_get_db", lambda: None) + + resp = server.handle_request( + {"id": "1", "method": "session.most_recent", "params": {}} + ) + + assert resp["result"]["session_id"] is None diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 8a356c9f1e..fc70168144 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1788,6 +1788,50 @@ def _(rid, params: dict) -> dict: return _err(rid, 5006, str(e)) +@method("session.most_recent") +def _(rid, params: dict) -> dict: + """Return the most recent human-facing session id, or ``None``. + + Mirrors ``session.list``'s deny-list behaviour (drops ``tool`` + sub-agent rows). Used by TUI auto-resume when + ``display.tui_auto_resume_recent`` is on; the field is also handy + for any CLI tooling that wants "latest session" without paginating + the full list. + + Contract: a ``{"session_id": null}`` result means "no eligible + session found right now". Errors are also folded into that + null-result shape (and logged) so callers don't have to special- + case JSON-RPC error envelopes for what is a normal "no answer". + """ + db = _get_db() + if db is None: + return _ok(rid, {"session_id": None}) + try: + deny = frozenset({"tool"}) + # Over-fetch by a generous bounded amount so heavy sub-agent + # users (lots of recent ``tool`` rows) don't get a false + # "no eligible session" answer. ``session.list`` uses a + # similar over-fetch strategy. + rows = db.list_sessions_rich(source=None, limit=200) + for row in rows: + src = (row.get("source") or "").strip().lower() + if src in deny: + continue + return _ok( + rid, + { + "session_id": row.get("id"), + "title": row.get("title") or "", + "started_at": row.get("started_at") or 0, + "source": row.get("source") or "", + }, + ) + return _ok(rid, {"session_id": None}) + except Exception: + logger.exception("session.most_recent failed") + return _ok(rid, {"session_id": None}) + + @method("session.resume") def _(rid, params: dict) -> dict: target = params.get("session_id", "") diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index d9f147c7a5..ffda1055a0 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -458,6 +458,152 @@ describe('createGatewayEventHandler', () => { }) }) + it('on gateway.ready with no STARTUP_RESUME_ID and auto_resume off, forges a new session', async () => { + const appended: Msg[] = [] + const newSession = vi.fn() + const resumeById = vi.fn() + const ctx = buildCtx(appended) + + ctx.session.newSession = newSession + ctx.session.resumeById = resumeById + ctx.session.STARTUP_RESUME_ID = '' + ctx.gateway.rpc = vi.fn(async (method: string) => { + if (method === 'config.get') { + return { config: { display: { tui_auto_resume_recent: false } } } + } + + return null + }) + + createGatewayEventHandler(ctx)({ payload: {}, type: 'gateway.ready' } as any) + + await vi.waitFor(() => expect(newSession).toHaveBeenCalled()) + expect(resumeById).not.toHaveBeenCalled() + }) + + it('on gateway.ready with auto_resume on and a recent session, resumes it', async () => { + const appended: Msg[] = [] + const newSession = vi.fn() + const resumeById = vi.fn() + const ctx = buildCtx(appended) + + ctx.session.newSession = newSession + ctx.session.resumeById = resumeById + ctx.session.STARTUP_RESUME_ID = '' + ctx.gateway.rpc = vi.fn(async (method: string) => { + if (method === 'config.get') { + return { config: { display: { tui_auto_resume_recent: true } } } + } + + if (method === 'session.most_recent') { + return { session_id: 'sess-most-recent' } + } + + return null + }) + + createGatewayEventHandler(ctx)({ payload: {}, type: 'gateway.ready' } as any) + + await vi.waitFor(() => expect(resumeById).toHaveBeenCalledWith('sess-most-recent')) + expect(newSession).not.toHaveBeenCalled() + }) + + it('on gateway.ready with auto_resume on but no eligible session, falls back to new', async () => { + const appended: Msg[] = [] + const newSession = vi.fn() + const resumeById = vi.fn() + const ctx = buildCtx(appended) + + ctx.session.newSession = newSession + ctx.session.resumeById = resumeById + ctx.session.STARTUP_RESUME_ID = '' + ctx.gateway.rpc = vi.fn(async (method: string) => { + if (method === 'config.get') { + return { config: { display: { tui_auto_resume_recent: true } } } + } + + if (method === 'session.most_recent') { + return { session_id: null } + } + + return null + }) + + createGatewayEventHandler(ctx)({ payload: {}, type: 'gateway.ready' } as any) + + await vi.waitFor(() => expect(newSession).toHaveBeenCalled()) + expect(resumeById).not.toHaveBeenCalled() + }) + + it('on gateway.ready when config.get rejects, falls back to new session', async () => { + const appended: Msg[] = [] + const newSession = vi.fn() + const resumeById = vi.fn() + const ctx = buildCtx(appended) + + ctx.session.newSession = newSession + ctx.session.resumeById = resumeById + ctx.session.STARTUP_RESUME_ID = '' + ctx.gateway.rpc = vi.fn(async (method: string) => { + if (method === 'config.get') { + throw new Error('gateway timeout') + } + + return null + }) + + createGatewayEventHandler(ctx)({ payload: {}, type: 'gateway.ready' } as any) + + await vi.waitFor(() => expect(newSession).toHaveBeenCalled()) + expect(resumeById).not.toHaveBeenCalled() + }) + + it('on gateway.ready when session.most_recent rejects, falls back to new session', async () => { + const appended: Msg[] = [] + const newSession = vi.fn() + const resumeById = vi.fn() + const ctx = buildCtx(appended) + + ctx.session.newSession = newSession + ctx.session.resumeById = resumeById + ctx.session.STARTUP_RESUME_ID = '' + ctx.gateway.rpc = vi.fn(async (method: string) => { + if (method === 'config.get') { + return { config: { display: { tui_auto_resume_recent: true } } } + } + + if (method === 'session.most_recent') { + throw new Error('db locked') + } + + return null + }) + + createGatewayEventHandler(ctx)({ payload: {}, type: 'gateway.ready' } as any) + + await vi.waitFor(() => expect(newSession).toHaveBeenCalled()) + expect(resumeById).not.toHaveBeenCalled() + }) + + it('on gateway.ready with STARTUP_RESUME_ID set, the env wins over config auto_resume', async () => { + const appended: Msg[] = [] + const newSession = vi.fn() + const resumeById = vi.fn() + const ctx = buildCtx(appended) + + ctx.session.newSession = newSession + ctx.session.resumeById = resumeById + ctx.session.STARTUP_RESUME_ID = 'env-explicit' + ctx.gateway.rpc = vi.fn(async () => ({ + config: { display: { tui_auto_resume_recent: true } } + })) + + createGatewayEventHandler(ctx)({ payload: {}, type: 'gateway.ready' } as any) + + await vi.waitFor(() => expect(resumeById).toHaveBeenCalledWith('env-explicit')) + expect(newSession).not.toHaveBeenCalled() + }) + it('keeps gateway noise informational and approval out of Activity', async () => { const appended: Msg[] = [] const ctx = buildCtx(appended) diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index d36faa336c..3abfc18561 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -1,6 +1,13 @@ import { STREAM_BATCH_MS } from '../config/timing.js' import { buildSetupRequiredSections, SETUP_REQUIRED_TITLE } from '../content/setup.js' -import type { CommandsCatalogResponse, DelegationStatusResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js' +import type { + CommandsCatalogResponse, + ConfigFullResponse, + DelegationStatusResponse, + GatewayEvent, + GatewaySkin, + SessionMostRecentResponse +} from '../gatewayTypes.js' import { rpcErrorMessage } from '../lib/rpc.js' import { topLevelSubagents } from '../lib/subagentTree.js' import { formatToolCall, stripAnsi } from '../lib/text.js' @@ -171,15 +178,46 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: }) .catch((e: unknown) => turnController.pushActivity(`command catalog unavailable: ${rpcErrorMessage(e)}`, 'info')) - if (!STARTUP_RESUME_ID) { - patchUiState({ status: 'forging session…' }) - newSession() + if (STARTUP_RESUME_ID) { + patchUiState({ status: 'resuming…' }) + resumeById(STARTUP_RESUME_ID) return } - patchUiState({ status: 'resuming…' }) - resumeById(STARTUP_RESUME_ID) + // Opt-in: when `display.tui_auto_resume_recent` is true, look up + // the most recent human-facing session and resume it instead of + // forging a brand-new one. Mirrors classic CLI's `hermes -c` / + // `hermes --tui` muscle memory and addresses the audit's "session + // unrecoverable after disconnection" gap. Default off so existing + // users aren't surprised. + rpc('config.get', { key: 'full' }) + .then(cfg => { + if (!cfg?.config?.display?.tui_auto_resume_recent) { + patchUiState({ status: 'forging session…' }) + newSession() + + return + } + + return rpc('session.most_recent', {}).then(r => { + const target = r?.session_id + + if (target) { + patchUiState({ status: 'resuming most recent…' }) + resumeById(target) + + return + } + + patchUiState({ status: 'forging session…' }) + newSession() + }) + }) + .catch(() => { + patchUiState({ status: 'forging session…' }) + newSession() + }) } return (ev: GatewayEvent) => { diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts index 5a7f8d8ad1..6a4fa2ae02 100644 --- a/ui-tui/src/gatewayTypes.ts +++ b/ui-tui/src/gatewayTypes.ts @@ -60,6 +60,7 @@ export interface ConfigDisplayConfig { show_reasoning?: boolean streaming?: boolean thinking_mode?: string + tui_auto_resume_recent?: boolean tui_compact?: boolean tui_mouse?: boolean tui_statusbar?: 'bottom' | 'off' | 'on' | 'top' | boolean @@ -119,6 +120,13 @@ export interface SessionListResponse { sessions?: SessionListItem[] } +export interface SessionMostRecentResponse { + session_id?: null | string + source?: string + started_at?: number + title?: string +} + export interface SessionTitleResponse { pending?: boolean session_key?: string From 15ef11a8b86d073266e99ef2d9393d8fb0dab976 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 15:46:57 -0700 Subject: [PATCH 09/23] fix(tui): make /browser connect actually take effect on the live agent (#17120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(tui): make /browser connect actually take effect on the live agent Reports were that `/browser connect ` (and "changes to CDP url don't get picked up") didn't propagate to the live agent in `--tui`, forcing users to fall back to setting `browser.cdp_url` in `config.yaml` and restarting. Tracing the path on current main shows the protocol wiring is already correct — `/browser` is registered in `ui-tui/src/app/slash/commands/ops.ts` and dispatches `browser.manage` through the gateway RPC, NOT the slash worker (covered by the `browser.manage` row in `slashParity.test.ts`). But three real gaps left the experience flaky: 1. `cleanup_all_browsers()` ran AFTER `os.environ["BROWSER_CDP_URL"]` was rewritten. `_ensure_cdp_supervisor(...)` reads the env to resolve its target URL, so a tool call landing in that brief window could re-attach the supervisor to the OLD CDP endpoint just before we reaped sessions, leaving the agent talking to a dead URL. Reorder to clean first, swap env, clean again so the supervisor for the default task is definitively closed. 2. `browser.manage status` reported only the env var, ignoring `browser.cdp_url` from config.yaml. `_get_cdp_override()` (the resolver the agent itself uses) consults both — match it so `/browser status` answers the same question the next `browser_navigate` will see. Closes a stealth bug where users saw "browser not connected" while their CDP URL was perfectly set in config.yaml. 3. `/browser disconnect` only cleared `BROWSER_CDP_URL` and reaped once, leaving the same swap window as connect. Symmetrical double-cleanup here too. Frontend (`ops.ts`): * Echo "next browser tool call will use this CDP endpoint" on success so users see immediate confirmation that the gateway accepted the swap, even before any tool runs. * Mention `browser.cdp_url` in `config.yaml` in the usage hint and the not-connected status line. Persistent config is the correct fix for some terminal-multiplexer / sub-agent flows where env inheritance is unreliable; surfacing it makes that workaround discoverable. Tests (4 new, all hermetic): * `status` returns the resolved URL when only `browser.cdp_url` is set in config.yaml. * `connect` writes env AND cleans before/after, in that order. * `connect` against an unreachable endpoint does NOT mutate env or reap. * `disconnect` removes env and cleans twice. Validation: scripts/run_tests.sh tests/test_tui_gateway_server.py — 94/94 pass. cd ui-tui && npm run type-check — clean; npm test --run — 389/389. * review(copilot): always defer to _get_cdp_override; normalize bare host:port * review(copilot): collapse discovery-style CDP paths so /json/version isn't duplicated * fix(tui): /browser status must not perform CDP discovery I/O Copilot review on PR #17120: previous version routed through `tools.browser_tool._get_cdp_override`, which calls `_resolve_cdp_override` and performs an HTTP probe to /json/version with a multi-second timeout for discovery-style URLs. That blocks the TUI on `/browser status` whenever the configured host is slow or unreachable. Status now reads env-then-config directly with no network I/O. The WS normalization still happens in `browser_navigate` for actual tool calls, so behaviour-on-call is unchanged. * fix(tui): skip /json/version probe for concrete ws://devtools/browser endpoints Round 2 Copilot review on PR #17120: hosted CDP providers (Browserbase, browserless, etc.) return concrete `ws[s]://.../devtools/browser/` URLs which are already directly connectable but don't serve the HTTP discovery path. The previous `/json/version` probe rejected these valid endpoints with 'could not reach browser CDP'. For `ws[s]://...` URLs whose path starts with `/devtools/browser/` we now do a TCP-level reachability check (`socket.create_connection`) instead of the HTTP probe. The actual CDP handshake happens on the next `browser_navigate` call, so we still surface unreachable hosts as 5031 errors — just without the false negatives. Discovery-style URLs (`http://host:port[/json[/version]]`) keep the HTTP probe path unchanged. Updated existing test + added two new ones (TCP-only success, TCP unreachable → 5031). --- tests/test_tui_gateway_server.py | 289 +++++++++++++++++++++++++++ tui_gateway/server.py | 142 +++++++++++-- ui-tui/src/app/slash/commands/ops.ts | 22 +- 3 files changed, 426 insertions(+), 27 deletions(-) diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 271214fa39..da863360fb 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -2721,3 +2721,292 @@ def test_session_most_recent_handles_db_unavailable(monkeypatch): ) assert resp["result"]["session_id"] is None +# ── browser.manage ─────────────────────────────────────────────────── + + +def _stub_urlopen(monkeypatch, *, ok: bool): + """Patch urllib.request.urlopen used by browser.manage to short-circuit probes.""" + + class _Resp: + status = 200 if ok else 503 + + def __enter__(self): + return self + + def __exit__(self, *_): + return False + + def _opener(_url, timeout=2.0): # noqa: ARG001 — match urllib signature + if not ok: + raise OSError("probe failed") + return _Resp() + + import urllib.request + + monkeypatch.setattr(urllib.request, "urlopen", _opener) + + +def test_browser_manage_status_reads_env_var(monkeypatch): + """Status returns the env var verbatim (no network I/O).""" + monkeypatch.setenv("BROWSER_CDP_URL", "http://127.0.0.1:9222") + + resp = server.handle_request( + {"id": "1", "method": "browser.manage", "params": {"action": "status"}} + ) + + assert resp["result"] == {"connected": True, "url": "http://127.0.0.1:9222"} + + +def test_browser_manage_status_falls_back_to_config_cdp_url(monkeypatch): + """When env is unset, status surfaces ``browser.cdp_url`` from + config.yaml so users see what the next tool call will read.""" + monkeypatch.delenv("BROWSER_CDP_URL", raising=False) + + fake_cfg = types.SimpleNamespace( + read_raw_config=lambda: {"browser": {"cdp_url": "http://lan:9222"}} + ) + with patch.dict(sys.modules, {"hermes_cli.config": fake_cfg}): + resp = server.handle_request( + {"id": "1", "method": "browser.manage", "params": {"action": "status"}} + ) + + assert resp["result"] == {"connected": True, "url": "http://lan:9222"} + + +def test_browser_manage_status_does_not_call_get_cdp_override(monkeypatch): + """Regression guard for Copilot's "status must not block" review: + status must NOT route through `_get_cdp_override`, which performs a + `/json/version` HTTP probe with a multi-second timeout.""" + monkeypatch.setenv("BROWSER_CDP_URL", "http://127.0.0.1:9222") + + fake = types.SimpleNamespace( + _get_cdp_override=lambda: pytest.fail( # noqa: PT015 — fail loudly if called + "_get_cdp_override must not run on /browser status (network I/O)" + ) + ) + with patch.dict(sys.modules, {"tools.browser_tool": fake}): + resp = server.handle_request( + {"id": "1", "method": "browser.manage", "params": {"action": "status"}} + ) + + assert resp["result"]["connected"] is True + + +def test_browser_manage_connect_sets_env_and_cleans_twice(monkeypatch): + """`/browser connect` must reach the live process: set env, reap browser + sessions before AND after publishing the new URL. The double-cleanup + closes the supervisor swap window where ``_ensure_cdp_supervisor`` + could re-attach to the *old* CDP endpoint between steps.""" + monkeypatch.delenv("BROWSER_CDP_URL", raising=False) + cleanup_calls: list[str] = [] + + def _cleanup_all(): + cleanup_calls.append(os.environ.get("BROWSER_CDP_URL", "")) + + fake = types.SimpleNamespace( + cleanup_all_browsers=_cleanup_all, + _get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""), + ) + with patch.dict(sys.modules, {"tools.browser_tool": fake}): + _stub_urlopen(monkeypatch, ok=True) + resp = server.handle_request( + { + "id": "1", + "method": "browser.manage", + "params": {"action": "connect", "url": "http://127.0.0.1:9222"}, + } + ) + + assert resp["result"] == {"connected": True, "url": "http://127.0.0.1:9222"} + assert os.environ.get("BROWSER_CDP_URL") == "http://127.0.0.1:9222" + # First cleanup runs against the OLD env (none here), second against the NEW. + assert cleanup_calls == ["", "http://127.0.0.1:9222"] + + +def test_browser_manage_connect_rejects_unreachable_endpoint(monkeypatch): + """An unreachable endpoint must NOT mutate the env or reap sessions.""" + monkeypatch.setenv("BROWSER_CDP_URL", "http://existing:9222") + cleanup_calls: list[str] = [] + fake = types.SimpleNamespace( + cleanup_all_browsers=lambda: cleanup_calls.append(os.environ.get("BROWSER_CDP_URL", "")), + _get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""), + ) + with patch.dict(sys.modules, {"tools.browser_tool": fake}): + _stub_urlopen(monkeypatch, ok=False) + resp = server.handle_request( + { + "id": "1", + "method": "browser.manage", + "params": {"action": "connect", "url": "http://unreachable:9222"}, + } + ) + + assert "error" in resp + # Env preserved; nothing reaped. + assert os.environ["BROWSER_CDP_URL"] == "http://existing:9222" + assert cleanup_calls == [] + + +def test_browser_manage_connect_normalizes_bare_host_port(monkeypatch): + """Persist a parsed `scheme://host:port` URL so `_get_cdp_override` + can normalize it; storing a bare host:port would break subsequent + tool calls (Copilot review on #17120).""" + monkeypatch.delenv("BROWSER_CDP_URL", raising=False) + fake = types.SimpleNamespace( + cleanup_all_browsers=lambda: None, + _get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""), + ) + with patch.dict(sys.modules, {"tools.browser_tool": fake}): + _stub_urlopen(monkeypatch, ok=True) + resp = server.handle_request( + { + "id": "1", + "method": "browser.manage", + "params": {"action": "connect", "url": "127.0.0.1:9222"}, + } + ) + + assert resp["result"]["connected"] is True + # Bare host:port got promoted to a full URL with explicit scheme. + assert resp["result"]["url"].startswith("http://") + assert os.environ["BROWSER_CDP_URL"].startswith("http://") + + +def test_browser_manage_connect_strips_discovery_path(monkeypatch): + """User-supplied discovery paths like `/json` or `/json/version` + must collapse to bare `scheme://host:port`; otherwise + ``_resolve_cdp_override`` will append ``/json/version`` again and + produce a duplicate path (Copilot review round-2 on #17120).""" + monkeypatch.delenv("BROWSER_CDP_URL", raising=False) + fake = types.SimpleNamespace( + cleanup_all_browsers=lambda: None, + _get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""), + ) + with patch.dict(sys.modules, {"tools.browser_tool": fake}): + _stub_urlopen(monkeypatch, ok=True) + resp = server.handle_request( + { + "id": "1", + "method": "browser.manage", + "params": {"action": "connect", "url": "http://127.0.0.1:9222/json"}, + } + ) + + assert resp["result"]["connected"] is True + assert resp["result"]["url"] == "http://127.0.0.1:9222" + assert os.environ["BROWSER_CDP_URL"] == "http://127.0.0.1:9222" + + +def test_browser_manage_connect_preserves_devtools_browser_endpoint(monkeypatch): + """Concrete devtools websocket endpoints (e.g. Browserbase) must + survive verbatim — we only collapse discovery-style paths.""" + monkeypatch.delenv("BROWSER_CDP_URL", raising=False) + fake = types.SimpleNamespace( + cleanup_all_browsers=lambda: None, + _get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""), + ) + concrete = "ws://browserbase.example/devtools/browser/abc123" + + class _OkSocket: + def __enter__(self): return self + def __exit__(self, *a): return False + + with patch.dict(sys.modules, {"tools.browser_tool": fake}): + # If urlopen is reached for a concrete ws endpoint, the test + # would still pass because _stub_urlopen returned ok=True before; + # patch it to assert-fail so we prove the HTTP probe is skipped. + with patch("urllib.request.urlopen", side_effect=AssertionError("urlopen called")): + with patch("socket.create_connection", return_value=_OkSocket()): + resp = server.handle_request( + { + "id": "1", + "method": "browser.manage", + "params": {"action": "connect", "url": concrete}, + } + ) + + assert resp["result"]["connected"] is True + assert resp["result"]["url"] == concrete + assert os.environ["BROWSER_CDP_URL"] == concrete + + +def test_browser_manage_connect_concrete_ws_skips_http_probe(monkeypatch): + """Regression for round-2 Copilot review: a hosted CDP endpoint + (no HTTP discovery) must connect via TCP-only reachability check. + The HTTP probe used to reject these even though they're valid.""" + monkeypatch.delenv("BROWSER_CDP_URL", raising=False) + fake = types.SimpleNamespace( + cleanup_all_browsers=lambda: None, + _get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""), + ) + concrete = "wss://chrome.browserless.io/devtools/browser/sess-1" + + seen_targets: list[tuple[str, int]] = [] + + class _OkSocket: + def __enter__(self): return self + def __exit__(self, *a): return False + + def _fake_create_connection(addr, timeout=None): + seen_targets.append(addr) + return _OkSocket() + + with patch.dict(sys.modules, {"tools.browser_tool": fake}): + # urlopen would 404/ECONNREFUSED on a real hosted CDP endpoint; + # asserting it's never called proves the probe was skipped. + with patch("urllib.request.urlopen", side_effect=AssertionError("urlopen called")): + with patch("socket.create_connection", side_effect=_fake_create_connection): + resp = server.handle_request( + { + "id": "1", + "method": "browser.manage", + "params": {"action": "connect", "url": concrete}, + } + ) + + assert resp["result"] == {"connected": True, "url": concrete} + # wss → port 443, host preserved verbatim. + assert seen_targets == [("chrome.browserless.io", 443)] + + +def test_browser_manage_connect_concrete_ws_tcp_unreachable(monkeypatch): + """If the TCP reachability check fails for a concrete ws endpoint, + return a clear 5031 error — no fallback to the HTTP probe (which + can never succeed for these URLs anyway).""" + monkeypatch.delenv("BROWSER_CDP_URL", raising=False) + fake = types.SimpleNamespace( + cleanup_all_browsers=lambda: None, + _get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""), + ) + concrete = "ws://offline.example/devtools/browser/missing" + + with patch.dict(sys.modules, {"tools.browser_tool": fake}): + with patch("socket.create_connection", side_effect=OSError("ECONNREFUSED")): + resp = server.handle_request( + { + "id": "1", + "method": "browser.manage", + "params": {"action": "connect", "url": concrete}, + } + ) + + assert "error" in resp + assert resp["error"]["code"] == 5031 + + +def test_browser_manage_disconnect_drops_env_and_cleans(monkeypatch): + monkeypatch.setenv("BROWSER_CDP_URL", "http://127.0.0.1:9222") + cleanup_count = {"n": 0} + fake = types.SimpleNamespace( + cleanup_all_browsers=lambda: cleanup_count.__setitem__("n", cleanup_count["n"] + 1), + _get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""), + ) + with patch.dict(sys.modules, {"tools.browser_tool": fake}): + resp = server.handle_request( + {"id": "1", "method": "browser.manage", "params": {"action": "disconnect"}} + ) + + assert resp["result"] == {"connected": False} + assert "BROWSER_CDP_URL" not in os.environ + # Two cleanups: once before env removal, once after, matching connect. + assert cleanup_count["n"] == 2 diff --git a/tui_gateway/server.py b/tui_gateway/server.py index fc70168144..c2d8dc73af 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -4674,12 +4674,51 @@ def _(rid, params: dict) -> dict: # ── Methods: browser / plugins / cron / skills ─────────────────────── +def _resolve_browser_cdp_url() -> str: + """Return the configured browser CDP override without network I/O. + + ``/browser status`` must be fast — calling + ``tools.browser_tool._get_cdp_override`` would invoke + ``_resolve_cdp_override``, which performs an HTTP probe to + ``.../json/version`` for discovery-style URLs. That probe has + a multi-second timeout and would block the TUI on a slow or + unreachable host even though status only needs to report whether + an override is set. + + Mirrors the env/config precedence of ``_get_cdp_override`` (env + var first, then ``browser.cdp_url`` from config.yaml) without the + websocket-resolution step, so the answer reflects user intent + even when the configured host is not currently reachable. The + actual WS normalization happens in ``browser_navigate`` on the + next tool call. + """ + env_url = os.environ.get("BROWSER_CDP_URL", "").strip() + if env_url: + return env_url + try: + from hermes_cli.config import read_raw_config + + cfg = read_raw_config() + browser_cfg = cfg.get("browser", {}) if isinstance(cfg, dict) else {} + if isinstance(browser_cfg, dict): + return str(browser_cfg.get("cdp_url", "") or "").strip() + except Exception: + pass + return "" + + @method("browser.manage") def _(rid, params: dict) -> dict: action = params.get("action", "status") if action == "status": - url = os.environ.get("BROWSER_CDP_URL", "") - return _ok(rid, {"connected": bool(url), "url": url}) + resolved_url = _resolve_browser_cdp_url() + return _ok( + rid, + { + "connected": bool(resolved_url), + "url": resolved_url, + }, + ) if action == "connect": url = params.get("url", "http://localhost:9222") try: @@ -4690,36 +4729,97 @@ def _(rid, params: dict) -> dict: parsed = urlparse(url if "://" in url else f"http://{url}") if parsed.scheme not in {"http", "https", "ws", "wss"}: return _err(rid, 4015, f"unsupported browser url: {url}") - probe_root = f"{'https' if parsed.scheme == 'wss' else 'http' if parsed.scheme == 'ws' else parsed.scheme}://{parsed.netloc}" - probe_urls = [ - f"{probe_root.rstrip('/')}/json/version", - f"{probe_root.rstrip('/')}/json", - ] - ok = False - for probe in probe_urls: - try: - with urllib.request.urlopen(probe, timeout=2.0) as resp: - if 200 <= getattr(resp, "status", 200) < 300: - ok = True - break - except Exception: - continue - if not ok: - return _err(rid, 5031, f"could not reach browser CDP at {url}") - os.environ["BROWSER_CDP_URL"] = url + # A concrete ``ws[s]://.../devtools/browser/`` endpoint is + # already directly connectable — those are the URLs Browserbase + # / browserless / hosted CDP providers return, and they + # generally DON'T serve the discovery-style ``/json/version`` + # path. Probing it would just reject valid endpoints. Skip + # the HTTP probe and do a TCP-level reachability check instead; + # the actual CDP handshake happens on the next ``browser_navigate``. + is_concrete_ws = ( + parsed.scheme in {"ws", "wss"} + and parsed.path.startswith("/devtools/browser/") + ) + if is_concrete_ws: + import socket + + host = parsed.hostname + port = parsed.port or (443 if parsed.scheme == "wss" else 80) + if not host: + return _err(rid, 4015, f"missing host in browser url: {url}") + try: + with socket.create_connection((host, port), timeout=2.0): + pass + except OSError as e: + return _err(rid, 5031, f"could not reach browser CDP at {url}: {e}") + else: + probe_root = f"{'https' if parsed.scheme == 'wss' else 'http' if parsed.scheme == 'ws' else parsed.scheme}://{parsed.netloc}" + probe_urls = [ + f"{probe_root.rstrip('/')}/json/version", + f"{probe_root.rstrip('/')}/json", + ] + ok = False + for probe in probe_urls: + try: + with urllib.request.urlopen(probe, timeout=2.0) as resp: + if 200 <= getattr(resp, "status", 200) < 300: + ok = True + break + except Exception: + continue + if not ok: + return _err(rid, 5031, f"could not reach browser CDP at {url}") + + # Persist a normalized URL for downstream CDP resolution. + # Discovery-style inputs (`http://host:port` or + # `http://host:port/json[/version]`) collapse to bare + # ``scheme://host:port`` so ``_resolve_cdp_override`` can + # safely append ``/json/version`` without producing a + # double-discovery path like ``.../json/json/version``. + # Concrete websocket endpoints (``/devtools/browser/`` + # — what Browserbase and other cloud providers return) + # are preserved verbatim. + if parsed.path.startswith("/devtools/browser/"): + normalized = parsed.geturl() + else: + normalized = parsed._replace( + path="", + params="", + query="", + fragment="", + ).geturl() + + # Order matters: clear any cached browser sessions BEFORE + # publishing the new env var so an in-flight tool call + # observing the old supervisor is reaped first, and the + # next call freshly resolves the new URL. The previous + # ordering left a brief window where ``_ensure_cdp_supervisor`` + # could re-attach to the *old* supervisor. + cleanup_all_browsers() + os.environ["BROWSER_CDP_URL"] = normalized + # Drain any further cached state that could outlive the + # cleanup pass (CDP supervisor for the default task, + # cached agent-browser timeouts, etc.) so the next + # ``browser_navigate`` definitively reaches ``normalized``. cleanup_all_browsers() except Exception as e: return _err(rid, 5031, str(e)) - return _ok(rid, {"connected": True, "url": url}) + return _ok(rid, {"connected": True, "url": normalized}) if action == "disconnect": - os.environ.pop("BROWSER_CDP_URL", None) try: from tools.browser_tool import cleanup_all_browsers cleanup_all_browsers() except Exception: pass + os.environ.pop("BROWSER_CDP_URL", None) + try: + from tools.browser_tool import cleanup_all_browsers as _again + + _again() + except Exception: + pass return _ok(rid, {"connected": False}) return _err(rid, 4015, f"unknown action: {action}") diff --git a/ui-tui/src/app/slash/commands/ops.ts b/ui-tui/src/app/slash/commands/ops.ts index ef1b240604..7443552740 100644 --- a/ui-tui/src/app/slash/commands/ops.ts +++ b/ui-tui/src/app/slash/commands/ops.ts @@ -98,13 +98,16 @@ export const opsCommands: SlashCommand[] = [ const action = (rawAction || 'status').toLowerCase() if (!['connect', 'disconnect', 'status'].includes(action)) { - return ctx.transcript.sys('usage: /browser [connect|disconnect|status] [url]') + return ctx.transcript.sys( + 'usage: /browser [connect|disconnect|status] [url] · persistent: set browser.cdp_url in config.yaml' + ) } const payload: Record = { action } + const requested = rest.join(' ').trim() if (action === 'connect') { - payload.url = rest.join(' ').trim() || 'http://localhost:9222' + payload.url = requested || 'http://localhost:9222' } ctx.gateway @@ -113,14 +116,21 @@ export const opsCommands: SlashCommand[] = [ ctx.guarded(r => { if (action === 'status') { return ctx.transcript.sys( - r.connected ? `browser connected: ${r.url || '(url unavailable)'}` : 'browser not connected' + r.connected + ? `browser connected: ${r.url || '(url unavailable)'}` + : 'browser not connected (try /browser connect or set browser.cdp_url in config.yaml)' ) } if (action === 'connect') { - return ctx.transcript.sys( - r.connected ? `browser connected: ${r.url || '(url unavailable)'}` : 'browser connect failed' - ) + if (r.connected) { + ctx.transcript.sys(`browser connected: ${r.url || '(url unavailable)'}`) + ctx.transcript.sys('next browser tool call will use this CDP endpoint') + + return + } + + return ctx.transcript.sys('browser connect failed') } ctx.transcript.sys('browser disconnected') From 8d591fe3c74f795eebf0322e87a7ba0f03b4b332 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 15:47:50 -0700 Subject: [PATCH 10/23] fix(tui): prefer raw text over Rich-rendered ANSI in TUI message display (#17111) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `turnController.recordMessageComplete` and `recordMessageDelta` both prioritised `payload.rendered` over `payload.text`. `payload.rendered` is the Rich-Console output `tui_gateway` builds for terminals that can't render markdown themselves; the TUI already renders markdown via ``. Two real bugs follow: 1. **Final answer garbled when `display.final_response_markdown: render` is set** (#16391). Raw ANSI escape sequences pass through into the React tree and the user sees overlapping coloured text instead of their answer. 2. **Streaming silently drops content.** Per-delta `rendered` is an *incremental* Rich fragment. The previous code did `this.bufRef = rendered ?? this.bufRef + text`, which on every tick replaced the whole accumulated buffer with the latest mid-sequence ANSI fragment. Long replies arrived truncated and looked half-painted — easy to miss as "model is being terse" instead of a client bug. Fix: * `recordMessageComplete` now prefers `payload.text`, falling back to `payload.rendered` only when the gateway elected not to send any. * `recordMessageDelta` always accumulates `text`; `rendered` is ignored on the streaming path entirely (Ink does its own markdown render via `` / `streamingMarkdown.tsx`). Tests: * `prefers raw text over Rich-rendered ANSI on message.complete` — the assistant message reflects raw markdown, not ANSI. * `falls back to payload.rendered when text is missing` — preserves the legacy "no `text`, only ANSI" path used by some adapters. * `always accumulates raw text in message.delta and ignores rendered` — pre-fix code would have made this assertion fail because each delta overwrote the buffer. Validation: `npm run type-check` clean, `npm test --run` 392/392 pass. --- .../createGatewayEventHandler.test.ts | 42 +++++++++++++++++++ ui-tui/src/app/turnController.ts | 17 ++++++-- 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts index ffda1055a0..f2326dc5ac 100644 --- a/ui-tui/src/__tests__/createGatewayEventHandler.test.ts +++ b/ui-tui/src/__tests__/createGatewayEventHandler.test.ts @@ -314,6 +314,48 @@ describe('createGatewayEventHandler', () => { expect(messages.some(m => m.includes('FileNotFoundError'))).toBe(true) }) + it('prefers raw text over Rich-rendered ANSI on message.complete (#16391)', () => { + const appended: Msg[] = [] + const onEvent = createGatewayEventHandler(buildCtx(appended)) + const raw = 'Hermes here.\n\nLine two.' + // Rich-rendered ANSI (`final_response_markdown: render`) used to win, + // which left visible escape codes in Ink output. Raw text must win. + const rendered = '\u001b[33mHermes here.\u001b[0m\n\n\u001b[2mLine two.\u001b[0m' + + onEvent({ payload: { rendered, text: raw }, type: 'message.complete' } as any) + + const assistant = appended.find(msg => msg.role === 'assistant') + expect(assistant?.text).toBe(raw) + expect(assistant?.text).not.toContain('\u001b[') + }) + + it('falls back to payload.rendered when text is missing on message.complete', () => { + const appended: Msg[] = [] + const onEvent = createGatewayEventHandler(buildCtx(appended)) + const rendered = 'fallback when gateway omitted text' + + onEvent({ payload: { rendered }, type: 'message.complete' } as any) + + const assistant = appended.find(msg => msg.role === 'assistant') + expect(assistant?.text).toBe(rendered) + }) + + it('always accumulates raw text in message.delta and ignores `rendered` (#16391)', () => { + const appended: Msg[] = [] + const onEvent = createGatewayEventHandler(buildCtx(appended)) + + // Stream of partial text deltas; each delta carries an incremental + // Rich-ANSI fragment. Pre-fix code would replace the whole bufRef + // with the latest fragment, dropping prior text. + onEvent({ payload: { rendered: '\u001b[33mFi\u001b[0m', text: 'Fi' }, type: 'message.delta' } as any) + onEvent({ payload: { rendered: '\u001b[33mrst.\u001b[0m', text: 'rst.' }, type: 'message.delta' } as any) + onEvent({ payload: { text: ' second.' }, type: 'message.delta' } as any) + onEvent({ payload: {}, type: 'message.complete' } as any) + + const assistant = appended.find(msg => msg.role === 'assistant') + expect(assistant?.text).toBe('First. second.') + }) + it('anchors inline_diff as its own segment where the edit happened', () => { const appended: Msg[] = [] const onEvent = createGatewayEventHandler(buildCtx(appended)) diff --git a/ui-tui/src/app/turnController.ts b/ui-tui/src/app/turnController.ts index dbd5e1faf0..b9e0aa04c1 100644 --- a/ui-tui/src/app/turnController.ts +++ b/ui-tui/src/app/turnController.ts @@ -431,7 +431,13 @@ class TurnController { recordMessageComplete(payload: { rendered?: string; reasoning?: string; text?: string }) { this.closeReasoningSegment() - const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart() + // Ink renders markdown via ; the gateway's Rich-rendered ANSI + // (`payload.rendered`) is for terminals that can't. Prioritising + // `rendered` here garbles output whenever a user opts into + // `display.final_response_markdown: render` because raw ANSI escapes + // pass through into the React tree. Prefer raw text and fall back + // only when the gateway elected not to send any (#16391). + const rawText = (payload.text ?? payload.rendered ?? this.bufRef).trimStart() const split = splitReasoning(rawText) const finalText = finalTail(split.text, this.segmentMessages) const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim() @@ -516,7 +522,7 @@ class TurnController { return { finalMessages, finalText, wasInterrupted } } - recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) { + recordMessageDelta({ text }: { rendered?: string; text?: string }) { if (this.interrupted || !text) { return } @@ -524,7 +530,12 @@ class TurnController { this.pruneTransient() this.endReasoningPhase() - this.bufRef = rendered ?? this.bufRef + text + // Always accumulate the raw text delta. The pre-#16391 path replaced + // the entire buffer with `rendered` (an *incremental* Rich ANSI + // fragment), which on every tick discarded everything streamed so far + // — visible as overlapping coloured text and lost prose under + // `display.final_response_markdown: render`. + this.bufRef += text if (getUiState().streaming) { this.scheduleStreaming() From af6b1a3343728bd8d7ab767db1366a57ad0f8e4f Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 15:52:13 -0700 Subject: [PATCH 11/23] fix(tui): honor display.busy_input_mode in TUI v2 (#17110) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(tui): honor display.busy_input_mode in TUI v2 The TUI v2 frontend hard-coded `composerActions.enqueue(full)` whenever `ui.busy` was true. The classic CLI and gateway adapters honor the `display.busy_input_mode` config key (`interrupt` | `queue` | `steer`), but Ink ignored it — sending a message during a long-running turn always landed in the queue regardless of config. The config default is already `interrupt` (hermes_cli/config.py), so users who explicitly opted into that experience were silently stuck on the legacy queue path. This wires the value through the existing config-sync surface: * `applyDisplay` now reads `display.busy_input_mode`, defaults to `interrupt` (matching `_load_busy_input_mode` in tui_gateway), and drops it into a new `UiState.busyInputMode` field. * `dispatchSubmission` and the queue-edit fall-through call a shared `handleBusyInput` helper that branches on the mode: * `queue` — legacy behavior, append to the queue. * `steer` — call `session.steer`; on rejection, fall back to queue with a sys note. * `interrupt` — `turnController.interruptTurn(...)` then `send()`, so the new prompt actually moves. * Mtime polling in `useConfigSync` already re-applies `config.full`, so flipping `display.busy_input_mode` in `~/.hermes/config.yaml` takes effect on the next 5s tick without restarting the TUI. Tests: * `applyDisplay → busy_input_mode` covers normalization + UiState fan-out. * `normalizeBusyInputMode` mirrors the Python side's allow-list. Validation: * `npm run type-check` (in `ui-tui/`) — clean. * `npm test --run` (in `ui-tui/`) — 394/394. * review(copilot): narrow busy_input_mode type, preserve queue order on steer fallback * review(copilot): clarify handleBusyInput comment (option, not return value) * fix(tui): default busy_input_mode to queue in TUI (CLI keeps interrupt) In a full-screen TUI users typically author the next prompt while the agent is still streaming, so an unintended interrupt loses in-flight typing. TUI fallback now defaults to `queue`; CLI / messaging adapters keep `interrupt` as the framework default. Override per-config via `display.busy_input_mode: interrupt` (or `steer`) — the normalize/wire path is unchanged, only the missing- value branch differs from the Python default. uiStore initial value also flipped to `queue` so first-frame render before `config.full` lands matches the eventual normalized value. --- ui-tui/src/__tests__/useConfigSync.test.ts | 54 +++++++++++- ui-tui/src/app/interfaces.ts | 3 + ui-tui/src/app/uiStore.ts | 1 + ui-tui/src/app/useConfigSync.ts | 24 +++++- ui-tui/src/app/useSubmission.ts | 96 ++++++++++++++++++++-- ui-tui/src/gatewayTypes.ts | 1 + 6 files changed, 172 insertions(+), 7 deletions(-) diff --git a/ui-tui/src/__tests__/useConfigSync.test.ts b/ui-tui/src/__tests__/useConfigSync.test.ts index 5682517441..b46427b09e 100644 --- a/ui-tui/src/__tests__/useConfigSync.test.ts +++ b/ui-tui/src/__tests__/useConfigSync.test.ts @@ -1,7 +1,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' import { $uiState, resetUiState } from '../app/uiStore.js' -import { applyDisplay, normalizeStatusBar } from '../app/useConfigSync.js' +import { applyDisplay, normalizeBusyInputMode, normalizeStatusBar } from '../app/useConfigSync.js' describe('applyDisplay', () => { beforeEach(() => { @@ -160,3 +160,55 @@ describe('normalizeStatusBar', () => { expect(normalizeStatusBar('OFF')).toBe('off') }) }) + +describe('normalizeBusyInputMode', () => { + it('passes through the canonical CLI parity values', () => { + expect(normalizeBusyInputMode('queue')).toBe('queue') + expect(normalizeBusyInputMode('steer')).toBe('steer') + expect(normalizeBusyInputMode('interrupt')).toBe('interrupt') + }) + + it('trims and lowercases input', () => { + expect(normalizeBusyInputMode(' Queue ')).toBe('queue') + expect(normalizeBusyInputMode('STEER')).toBe('steer') + }) + + it('defaults to queue for missing/unknown values (TUI-only override)', () => { + // CLI / messaging adapters keep `interrupt` as the framework default + // (see hermes_cli/config.py + tui_gateway/server.py::_load_busy_input_mode); + // the TUI ships `queue` because typing a follow-up while the agent + // streams is the common authoring pattern and an unintended interrupt + // loses work. + expect(normalizeBusyInputMode(undefined)).toBe('queue') + expect(normalizeBusyInputMode(null)).toBe('queue') + expect(normalizeBusyInputMode('')).toBe('queue') + expect(normalizeBusyInputMode('drop')).toBe('queue') + expect(normalizeBusyInputMode(42)).toBe('queue') + }) +}) + +describe('applyDisplay → busy_input_mode', () => { + beforeEach(() => { + resetUiState() + }) + + it('threads display.busy_input_mode into $uiState', () => { + const setBell = vi.fn() + + applyDisplay({ config: { display: { busy_input_mode: 'queue' } } }, setBell) + expect($uiState.get().busyInputMode).toBe('queue') + + applyDisplay({ config: { display: { busy_input_mode: 'steer' } } }, setBell) + expect($uiState.get().busyInputMode).toBe('steer') + }) + + it('falls back to queue when value is missing or invalid (TUI-only default)', () => { + const setBell = vi.fn() + + applyDisplay({ config: { display: {} } }, setBell) + expect($uiState.get().busyInputMode).toBe('queue') + + applyDisplay({ config: { display: { busy_input_mode: 'drop' } } }, setBell) + expect($uiState.get().busyInputMode).toBe('queue') + }) +}) diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts index 9b987f87d3..fb75656ecc 100644 --- a/ui-tui/src/app/interfaces.ts +++ b/ui-tui/src/app/interfaces.ts @@ -27,6 +27,8 @@ export interface StateSetter { export type StatusBarMode = 'bottom' | 'off' | 'top' +export type BusyInputMode = 'interrupt' | 'queue' | 'steer' + export interface SelectionApi { captureScrolledRows: (firstRow: number, lastRow: number, side: 'above' | 'below') => void clearSelection: () => void @@ -85,6 +87,7 @@ export interface TranscriptRow { export interface UiState { bgTasks: Set busy: boolean + busyInputMode: BusyInputMode compact: boolean detailsMode: DetailsMode detailsModeCommandOverride: boolean diff --git a/ui-tui/src/app/uiStore.ts b/ui-tui/src/app/uiStore.ts index 1b3a841e18..f937e0cb52 100644 --- a/ui-tui/src/app/uiStore.ts +++ b/ui-tui/src/app/uiStore.ts @@ -9,6 +9,7 @@ import type { UiState } from './interfaces.js' const buildUiState = (): UiState => ({ bgTasks: new Set(), busy: false, + busyInputMode: 'queue', compact: false, detailsMode: 'collapsed', detailsModeCommandOverride: false, diff --git a/ui-tui/src/app/useConfigSync.ts b/ui-tui/src/app/useConfigSync.ts index 26d02d6204..4d61bdb1d1 100644 --- a/ui-tui/src/app/useConfigSync.ts +++ b/ui-tui/src/app/useConfigSync.ts @@ -10,7 +10,7 @@ import type { } from '../gatewayTypes.js' import { asRpcResult } from '../lib/rpc.js' -import type { StatusBarMode } from './interfaces.js' +import type { BusyInputMode, StatusBarMode } from './interfaces.js' import { turnController } from './turnController.js' import { patchUiState } from './uiStore.js' @@ -24,6 +24,27 @@ const STATUSBAR_ALIAS: Record = { export const normalizeStatusBar = (raw: unknown): StatusBarMode => raw === false ? 'off' : typeof raw === 'string' ? (STATUSBAR_ALIAS[raw.trim().toLowerCase()] ?? 'top') : 'top' +const BUSY_MODES = new Set(['interrupt', 'queue', 'steer']) + +// TUI defaults to `queue` even though the framework default +// (`hermes_cli/config.py`) is `interrupt`. Rationale: in a full-screen +// TUI you're typically authoring the next prompt while the agent is +// still streaming, and an unintended interrupt loses work. Set +// `display.busy_input_mode: interrupt` (or `steer`) explicitly to +// opt out per-config; CLI / messaging adapters keep their `interrupt` +// default unchanged. +const TUI_BUSY_DEFAULT: BusyInputMode = 'queue' + +export const normalizeBusyInputMode = (raw: unknown): BusyInputMode => { + if (typeof raw !== 'string') { + return TUI_BUSY_DEFAULT + } + + const v = raw.trim().toLowerCase() as BusyInputMode + + return BUSY_MODES.has(v) ? v : TUI_BUSY_DEFAULT +} + const MTIME_POLL_MS = 5000 const quietRpc = async = Record>( @@ -43,6 +64,7 @@ export const applyDisplay = (cfg: ConfigFullResponse | null, setBell: (v: boolea setBell(!!d.bell_on_complete) patchUiState({ + busyInputMode: normalizeBusyInputMode(d.busy_input_mode), compact: !!d.tui_compact, detailsMode: resolveDetailsMode(d), detailsModeCommandOverride: false, diff --git a/ui-tui/src/app/useSubmission.ts b/ui-tui/src/app/useSubmission.ts index f2468f27e6..2c2c6d48d9 100644 --- a/ui-tui/src/app/useSubmission.ts +++ b/ui-tui/src/app/useSubmission.ts @@ -4,7 +4,12 @@ import { TYPING_IDLE_MS } from '../config/timing.js' import { attachedImageNotice } from '../domain/messages.js' import { looksLikeSlashCommand } from '../domain/slash.js' import type { GatewayClient } from '../gatewayClient.js' -import type { InputDetectDropResponse, PromptSubmitResponse, ShellExecResponse } from '../gatewayTypes.js' +import type { + InputDetectDropResponse, + PromptSubmitResponse, + SessionSteerResponse, + ShellExecResponse +} from '../gatewayTypes.js' import { asRpcResult } from '../lib/rpc.js' import { hasInterpolation, INTERPOLATION_RE } from '../protocol/interpolation.js' import { PASTE_SNIPPET_RE } from '../protocol/paste.js' @@ -207,6 +212,70 @@ export function useSubmission(opts: UseSubmissionOptions) { [interpolate, send, shellExec] ) + // Honors `display.busy_input_mode` from config.yaml (CLI parity): + // - 'queue' (legacy): append to queueRef; drains on busy → false + // - 'steer' : inject into the current turn via session.steer; falls + // back to queue when steer is rejected (no agent / no + // tool window). + // - 'interrupt' (default): cancel the in-flight turn, then send the + // new text as a fresh prompt so it actually moves. + // + // `opts.fallbackToFront` controls whether a steer fallback re-inserts + // at the front of the queue (used by the queue-edit path to preserve + // a picked item's position); the mainline submit path always appends. + const handleBusyInput = useCallback( + (full: string, opts: { fallbackToFront?: boolean } = {}) => { + const live = getUiState() + const mode = live.busyInputMode + const fallback = (note: string) => { + if (opts.fallbackToFront) { + composerRefs.queueRef.current.unshift(full) + composerActions.syncQueue() + } else { + composerActions.enqueue(full) + } + sys(note) + } + + if (mode === 'queue') { + return composerActions.enqueue(full) + } + + if (mode === 'steer' && live.sid) { + gw.request('session.steer', { session_id: live.sid, text: full }) + .then(raw => { + const r = asRpcResult(raw) + + if (r?.status !== 'queued') { + fallback('steer rejected — message queued for next turn') + } + }) + .catch(() => fallback('steer failed — message queued for next turn')) + + return + } + + // 'interrupt' (default): tear down the current turn, then send. + // `interruptTurn` fires `session.interrupt` without awaiting; if + // the gateway is still mid-response when `prompt.submit` lands, + // `send()`'s catch path re-queues with a "queued: ..." sys note + // (`isSessionBusyError`) — so a lost race degrades to queue + // semantics, not a dropped message. + if (live.sid) { + turnController.interruptTurn({ appendMessage, gw, sid: live.sid, sys }) + } + + if (hasInterpolation(full)) { + patchUiState({ busy: true }) + + return interpolate(full, send) + } + + send(full) + }, + [appendMessage, composerActions, composerRefs, gw, interpolate, send, sys] + ) + const dispatchSubmission = useCallback( (full: string) => { if (!full.trim()) { @@ -252,9 +321,16 @@ export function useSubmission(opts: UseSubmissionOptions) { } if (getUiState().busy) { - composerRefs.queueRef.current.unshift(picked) + // 'interrupt' / 'steer' should reach the live turn instead of + // silently going back to the queue. handleBusyInput resolves + // mode-specific behavior (interrupt-and-send, steer, or queue). + if (getUiState().busyInputMode === 'queue') { + composerRefs.queueRef.current.unshift(picked) - return composerActions.syncQueue() + return composerActions.syncQueue() + } + + return handleBusyInput(picked, { fallbackToFront: true }) } return sendQueued(picked) @@ -263,7 +339,7 @@ export function useSubmission(opts: UseSubmissionOptions) { composerActions.pushHistory(full) if (getUiState().busy) { - return composerActions.enqueue(full) + return handleBusyInput(full) } if (hasInterpolation(full)) { @@ -274,7 +350,17 @@ export function useSubmission(opts: UseSubmissionOptions) { send(full) }, - [appendMessage, composerActions, composerRefs, interpolate, send, sendQueued, shellExec, slashRef] + [ + appendMessage, + composerActions, + composerRefs, + handleBusyInput, + interpolate, + send, + sendQueued, + shellExec, + slashRef + ] ) const submit = useCallback( diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts index 6a4fa2ae02..b2202af9f2 100644 --- a/ui-tui/src/gatewayTypes.ts +++ b/ui-tui/src/gatewayTypes.ts @@ -53,6 +53,7 @@ export type CommandDispatchResponse = export interface ConfigDisplayConfig { bell_on_complete?: boolean + busy_input_mode?: string details_mode?: string inline_diffs?: boolean sections?: Record From 1e326c686df8a7304d5018f846246b3126dc5cfd Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 15:54:06 -0700 Subject: [PATCH 12/23] fix(tui-gateway): harden stdio transport against half-closed pipes + SIGTERM races (#17118) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(tui-gateway): harden stdio transport against half-closed pipes + SIGTERM races `tui_gateway` reports `tui_gateway_crash.log` traces where the main thread sits in `sys.stdin` while a worker holds `_stdout_lock` mid- flush, and SIGTERM then calls `sys.exit(0)` while the lock is still held — the interpreter shutdown stalls behind the wedged write. Two narrowly scoped hardenings: **`tui_gateway/transport.py`** * Move JSON serialisation outside the lock — long messages no longer block sibling writers while we serialise. * Treat `BrokenPipeError`, `ValueError` ("I/O on closed file") and generic `OSError` from both `write` and `flush` as "peer is gone": return `False` instead of bubbling, matching what `write_json`'s callers in `entry.py` already expect. * Split `flush` into its own try block so a stuck flush never strands a partial write or holds the lock indefinitely on its way out. * Optional `HERMES_TUI_GATEWAY_NO_FLUSH=1` env knob to skip explicit `flush()` entirely on environments where a half-closed read pipe produces an indefinite kernel-level block. Default unchanged. **`tui_gateway/entry.py`** * `_log_signal` now spawns a 1-second daemon timer that calls `os._exit(0)` if the orderly `sys.exit(0)` path is itself stuck behind a wedged worker. Atexit handlers run inside the grace window when they can; the timer is the safety net so a deadlocked flush no longer strands the gateway process. Tests: * `test_write_json_closed_stream_returns_false` — ValueError path. * `test_write_json_oserror_on_flush_returns_false` — OSError on flush must not strand the lock; the write portion still landed before the flush failure. * `test_write_json_no_flush_env_skips_flush` — env knob bypass. Validation: `scripts/run_tests.sh tests/tui_gateway/test_protocol.py` (42/42 pass; one pre-existing failure on `test_session_resume_returns_hydrated_messages` is unrelated to this change — same `include_ancestors` mock kwarg issue tracked elsewhere). `scripts/run_tests.sh tests/test_tui_gateway_server.py` 90/90 pass. * review(copilot): tighten transport hardening comments + test cleanup * review(copilot): narrow exception capture, configurable grace, simpler no-flush test * fix(tui-gateway): narrow ValueError to closed-stream; surface UnicodeEncodeError Copilot review on PR #17118: `UnicodeEncodeError` is a ValueError subclass, so a non-UTF-8 stdout (mismatched PYTHONIOENCODING / locale) would have been silently swallowed as 'peer gone' under `except ValueError`. That hides a real environment bug. Now: - UnicodeEncodeError → log with exc_info (warning) and drop the frame - ValueError where str(e) contains 'closed file' → peer gone, return False - Any other ValueError → log loudly, drop frame (defensive, but visible) Same shape applied to flush. Adds two regression tests. * fix(tui-gateway): reserve write() False for peer-gone; re-raise programming errors Round 2 Copilot review on PR #17118: `Transport.write()` returning `False` is documented as 'peer is gone', and `entry.py` reacts by calling `sys.exit(0)`. But the implementation also returned False for non-IO conditions (non-JSON-safe payloads, UnicodeEncodeError, unrelated ValueErrors), so a programming error or local env bug would present as a clean disconnect — exactly the diagnosis pain we wanted to eliminate. Now: - `json.dumps` failure → re-raises (TypeError/ValueError surfaces in crash log) - `BrokenPipeError` → False (peer gone) - `ValueError('...closed file...')` → False (peer gone) - `UnicodeEncodeError` and any other ValueError → re-raise - `OSError` → False (existing IO-failure semantics, debug-logged) Tests updated to assert the re-raise behaviour and added a non-serializable-payload regression test. * fix(tui-gateway): narrow OSError to peer-gone errnos; honest test naming Round 3 Copilot review on PR #17118: - Docstring claimed False = peer gone, but generic OSError on write/flush also returned False — meaning ENOSPC/EACCES/EIO would silently exit. Added `_PEER_GONE_ERRNOS = {EPIPE, ECONNRESET, EBADF, ESHUTDOWN, +WSA}` and narrowed the OSError handlers; non-peer-gone errnos re-raise. Docstring now lists OSError as peer-gone branch with the errno set. - The `_DISABLE_FLUSH` test was named after the env var but actually patched the module constant. Renamed it to reflect the contract being tested (skips flush when constant is true) AND added a real end-to-end test that sets the env var, reloads transport.py, and asserts the constant flips. Cleanup reload restores defaults so parallel tests stay isolated. Self-review (avoid round 4): - Verified TeeTransport's secondary-swallow stays intentional. - _log_signal grace path already covered by separate tests. --- tests/tui_gateway/test_protocol.py | 128 +++++++++++++++++++++++++++++ tui_gateway/entry.py | 57 ++++++++++++- tui_gateway/transport.py | 106 ++++++++++++++++++++++-- 3 files changed, 283 insertions(+), 8 deletions(-) diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py index 42caaacc58..6c94ec0710 100644 --- a/tests/tui_gateway/test_protocol.py +++ b/tests/tui_gateway/test_protocol.py @@ -83,6 +83,134 @@ def test_write_json_broken_pipe(server): assert server.write_json({"x": 1}) is False +def test_write_json_closed_stream_returns_false(server): + """ValueError ('I/O on closed file') used to bubble up; treat as gone.""" + + class _Closed: + def write(self, _): raise ValueError("I/O operation on closed file") + def flush(self): raise ValueError("I/O operation on closed file") + + server._real_stdout = _Closed() + assert server.write_json({"x": 1}) is False + + +def test_write_json_unicode_encode_error_re_raises(server): + """A non-UTF-8 stdout encoding raises UnicodeEncodeError (a ValueError + subclass). It must NOT be swallowed as 'peer gone' — that would let + `entry.py` exit cleanly via the False path and hide the real config + bug. We re-raise so the existing crash-log infrastructure records it.""" + + class _AsciiOnly: + def write(self, line): + line.encode("ascii") # raises UnicodeEncodeError on non-ascii + def flush(self): pass + + server._real_stdout = _AsciiOnly() + with pytest.raises(UnicodeEncodeError): + server.write_json({"msg": "héllo"}) + + +def test_write_json_unrelated_value_error_re_raises(server): + """Only ValueError('...closed file...') means peer gone. Other + ValueErrors are programming errors and must surface.""" + + class _BadValue: + def write(self, _): raise ValueError("something else entirely") + def flush(self): pass + + server._real_stdout = _BadValue() + with pytest.raises(ValueError, match="something else entirely"): + server.write_json({"x": 1}) + + +def test_write_json_non_serializable_payload_re_raises(server): + """Non-JSON-safe payloads are programming errors — they must NOT be + silently dropped via the False path (which would trigger a clean exit + in entry.py and mask the real bug).""" + import io + + server._real_stdout = io.StringIO() + with pytest.raises(TypeError): + server.write_json({"obj": object()}) + + +def test_write_json_peer_gone_oserror_on_flush_returns_false(server): + """A flush that raises a peer-gone OSError (EPIPE) must not strand + the lock or crash; it returns False so the dispatcher exits cleanly.""" + import errno + + written = [] + + class _FlushPeerGone: + def write(self, line): written.append(line) + def flush(self): raise OSError(errno.EPIPE, "broken pipe") + + server._real_stdout = _FlushPeerGone() + assert server.write_json({"x": 1}) is False + assert written and json.loads(written[0]) == {"x": 1} + + +def test_write_json_non_peer_gone_oserror_re_raises(server): + """Host I/O failures (ENOSPC, EACCES, EIO …) are NOT peer-gone — they + must re-raise so the crash log records them instead of looking like + a clean disconnect via the False path.""" + import errno + + class _DiskFull: + def write(self, _): raise OSError(errno.ENOSPC, "no space left") + def flush(self): pass + + server._real_stdout = _DiskFull() + with pytest.raises(OSError, match="no space"): + server.write_json({"x": 1}) + + +def test_write_json_skips_flush_when_disable_flush_true(monkeypatch): + """`StdioTransport` skips flush when `_DISABLE_FLUSH` is true. + + Tests the runtime *behaviour* via direct module-attr patch. The env + var → module constant wiring is covered by the dedicated env test + below; reloading server.py here would re-register atexit hooks and + recreate the worker pool. + """ + import importlib + + transport_mod = importlib.import_module("tui_gateway.transport") + monkeypatch.setattr(transport_mod, "_DISABLE_FLUSH", True) + + flushed = {"count": 0} + written = [] + + class _Stream: + def write(self, line): written.append(line) + def flush(self): flushed["count"] += 1 + + stream = _Stream() + transport = transport_mod.StdioTransport(lambda: stream, threading.Lock()) + + assert transport.write({"x": 1}) is True + assert flushed["count"] == 0 + + +def test_disable_flush_env_var_actually_wires_to_module_constant(monkeypatch): + """End-to-end: setting `HERMES_TUI_GATEWAY_NO_FLUSH=1` and importing + `tui_gateway.transport` fresh actually flips `_DISABLE_FLUSH` true. + + Reloads only the transport module — server.py is untouched so its + atexit hooks/worker pool stay intact.""" + import importlib + + monkeypatch.setenv("HERMES_TUI_GATEWAY_NO_FLUSH", "1") + transport_mod = importlib.reload(importlib.import_module("tui_gateway.transport")) + + try: + assert transport_mod._DISABLE_FLUSH is True + finally: + # Restore the env-disabled state so other tests see the default. + monkeypatch.delenv("HERMES_TUI_GATEWAY_NO_FLUSH", raising=False) + importlib.reload(transport_mod) + + # ── _emit ──────────────────────────────────────────────────────────── diff --git a/tui_gateway/entry.py b/tui_gateway/entry.py index 38e00ecfac..70fc851820 100644 --- a/tui_gateway/entry.py +++ b/tui_gateway/entry.py @@ -29,6 +29,28 @@ def _install_sidecar_publisher() -> None: ) +# How long to wait for orderly shutdown (atexit + finalisers) before +# falling back to ``os._exit(0)`` so a wedged worker mid-flush can't +# strand the process. 1s covers the gateway's own shutdown work +# (thread-pool drain + session finalize) on every machine we've +# tested; override via ``HERMES_TUI_GATEWAY_SHUTDOWN_GRACE_S`` if a +# slower environment needs more headroom (e.g. encrypted disks +# flushing checkpoints) and accept that a longer grace also means a +# longer wait when shutdown actually deadlocks. +_DEFAULT_SHUTDOWN_GRACE_S = 1.0 + + +def _shutdown_grace_seconds() -> float: + raw = (os.environ.get("HERMES_TUI_GATEWAY_SHUTDOWN_GRACE_S") or "").strip() + if not raw: + return _DEFAULT_SHUTDOWN_GRACE_S + try: + value = float(raw) + except ValueError: + return _DEFAULT_SHUTDOWN_GRACE_S + return value if value > 0 else _DEFAULT_SHUTDOWN_GRACE_S + + def _log_signal(signum: int, frame) -> None: """Capture WHICH thread and WHERE a termination signal hit us. @@ -38,6 +60,15 @@ def _log_signal(signum: int, frame) -> None: handler the gateway-exited banner in the TUI has no trace — the crash log never sees a Python exception because the kernel reaps the process before the interpreter runs anything. + + Termination semantics: ``sys.exit(0)`` here used to race the worker + pool — a thread holding ``_stdout_lock`` mid-flush would block the + interpreter shutdown indefinitely. We now log the stack, give the + process the configured shutdown grace + (``HERMES_TUI_GATEWAY_SHUTDOWN_GRACE_S``, default + ``_DEFAULT_SHUTDOWN_GRACE_S``) to drain naturally on a background + thread, and fall back to ``os._exit(0)`` so a wedged write/flush + can never strand the process. """ name = { signal.SIGPIPE: "SIGPIPE", @@ -62,7 +93,31 @@ def _log_signal(signum: int, frame) -> None: except Exception: pass print(f"[gateway-signal] {name}", file=sys.stderr, flush=True) - sys.exit(0) + + import threading as _threading + + def _hard_exit() -> None: + # If a worker thread is still mid-flush on a half-closed pipe, + # ``sys.exit(0)`` would wait forever for it to drop the GIL on + # interpreter shutdown. ``os._exit`` skips atexit handlers but + # breaks the deadlock. The crash log + stderr line above are + # the forensic trail. + os._exit(0) + + timer = _threading.Timer(_shutdown_grace_seconds(), _hard_exit) + timer.daemon = True + timer.start() + + try: + sys.exit(0) + except SystemExit: + # Re-raise so the main-thread interpreter unwinds and runs + # atexit + finalisers inside the grace window. Python signal + # handlers always run on the main thread, but a worker thread + # holding ``_stdout_lock`` mid-flush can keep that unwind + # waiting indefinitely; the daemon timer above is the safety + # net for that exact case. + raise # SIGPIPE: ignore, don't exit. The old SIG_DFL killed the process diff --git a/tui_gateway/transport.py b/tui_gateway/transport.py index a1b4b283db..ce93e518a3 100644 --- a/tui_gateway/transport.py +++ b/tui_gateway/transport.py @@ -23,10 +23,45 @@ the stream lazily through a callback. from __future__ import annotations import contextvars +import errno import json +import logging +import os import threading from typing import Any, Callable, Optional, Protocol, runtime_checkable +# Errno values that mean "the peer is gone" rather than "the host has a +# real I/O problem". Anything outside this set re-raises so it surfaces +# in the crash log instead of looking like a clean disconnect. +_PEER_GONE_ERRNOS = frozenset({ + errno.EPIPE, # write to closed pipe (POSIX) + errno.ECONNRESET, # peer reset the connection + errno.EBADF, # fd closed under us + errno.ESHUTDOWN, # transport endpoint shut down + getattr(errno, "WSAECONNRESET", -1), # win32 mapping (no-op on POSIX) + getattr(errno, "WSAESHUTDOWN", -1), +} - {-1}) + +logger = logging.getLogger(__name__) + +# Optional knob: when true, StdioTransport does not call ``stream.flush`` +# after writing. Use this on environments where a half-closed pipe (TUI +# Node parent quit while the gateway is still emitting events) makes +# flush block long enough to starve the rest of the worker pool. +# +# IMPORTANT: Python text stdout is fully buffered when attached to a +# pipe (the TUI case), so this knob ONLY makes sense when the gateway +# is launched with ``-u`` or ``PYTHONUNBUFFERED=1``. Without one of +# those, JSON-RPC frames will accumulate in the buffer and the TUI +# will hang waiting for ``gateway.ready``. Default stays off so the +# existing flush-after-write behaviour is unchanged. +_DISABLE_FLUSH = (os.environ.get("HERMES_TUI_GATEWAY_NO_FLUSH", "") or "").strip().lower() in { + "1", + "true", + "yes", + "on", +} + @runtime_checkable class Transport(Protocol): @@ -77,15 +112,72 @@ class StdioTransport: self._lock = lock def write(self, obj: dict) -> bool: + """Return ``True`` on success, ``False`` ONLY when the peer is gone. + + Returning ``False`` is the dispatcher's "broken stdout pipe" signal + — ``entry.py`` calls ``sys.exit(0)`` when ``write_json`` reports + ``False``. So programming errors (non-JSON-safe payloads, encoding + misconfig, unexpected ValueErrors, host I/O bugs like ENOSPC) MUST + NOT return ``False``, otherwise a real bug looks like a clean + disconnect and is harder to diagnose. Those re-raise so the + existing crash-log infrastructure records the traceback. + + Peer-gone branches: + * ``BrokenPipeError`` + * ``ValueError("...closed file...")`` + * ``OSError`` whose errno is in :data:`_PEER_GONE_ERRNOS` + (EPIPE / ECONNRESET / EBADF / ESHUTDOWN; plus WSA mappings + on Windows). Other OSError errnos (ENOSPC, EACCES, ...) are + real host problems and re-raise. + """ + # Serialization is OUTSIDE the lock so a large payload can't + # block other threads emitting their own frames. A non-JSON-safe + # payload is a programming error: re-raise so the crash log + # captures it instead of silently exiting via the False path. line = json.dumps(obj, ensure_ascii=False) + "\n" - try: - with self._lock: - stream = self._stream_getter() + + with self._lock: + stream = self._stream_getter() + try: stream.write(line) - stream.flush() - return True - except BrokenPipeError: - return False + except BrokenPipeError: + return False + except ValueError as e: + # ValueError("I/O operation on closed file") is the + # ONLY ValueError that means "peer gone". Anything + # else — including UnicodeEncodeError, which is a + # ValueError subclass for misconfigured locales — + # is a real bug; re-raise so it surfaces in the crash log. + if isinstance(e, UnicodeEncodeError) or "closed file" not in str(e): + raise + return False + except OSError as e: + if e.errno not in _PEER_GONE_ERRNOS: + raise + logger.debug("StdioTransport write peer gone: %s", e) + return False + + # A flush that *raises* with a peer-gone errno means the + # dispatcher should exit cleanly. A flush that *hangs* on + # a half-closed pipe holds the lock until it returns — see + # ``_DISABLE_FLUSH`` for the "skip flush entirely" escape + # hatch. + if not _DISABLE_FLUSH: + try: + stream.flush() + except BrokenPipeError: + return False + except ValueError as e: + if isinstance(e, UnicodeEncodeError) or "closed file" not in str(e): + raise + return False + except OSError as e: + if e.errno not in _PEER_GONE_ERRNOS: + raise + logger.debug("StdioTransport flush peer gone: %s", e) + return False + + return True def close(self) -> None: return None From 258efb2575c7b2839c947e76b400f541efa87259 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 16:02:06 -0700 Subject: [PATCH 13/23] feat(tui): expand light-terminal auto-detection (HERMES_TUI_THEME, background hex) (#17113) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(tui): expand light-terminal auto-detection (HERMES_TUI_THEME, BG hex) Modern terminals (Ghostty, Warp, iTerm2) don't set COLORFGBG, so the auto-light path was effectively COLORFGBG-only and silently broken for many users. Two pragmatic additions, both opt-in, plus a clearer priority chain: 1. **`HERMES_TUI_THEME=light|dark`** as a symmetric explicit override. The existing `HERMES_TUI_LIGHT` is fine but reads as boolean noise; a named theme env var matches `display.skin` muscle memory. 2. **`HERMES_TUI_BACKGROUND` hex/rgb hint.** Lets advanced users (or a future OSC11 query helper that caches the answer) state a ground-truth background colour. Decoded to Rec. 709 luma; ≥ 0.6 counts as light. Priority order is now fully ordered and explainable: 1. `HERMES_TUI_LIGHT` (1/0/true/false/on/off). 2. `HERMES_TUI_THEME=light|dark`. 3. `HERMES_TUI_BACKGROUND` luminance. 4. `COLORFGBG` last field — light slots 7/15 → light, 0–15 → dark (authoritative when set, so the new TERM_PROGRAM path can never stomp on a terminal that already volunteered a dark answer). 5. `TERM_PROGRAM` allow-list — empty by default. The slot is left in place because folks asked for it but populating it risks wrongly flipping users on Apple_Terminal / iTerm2 dark profiles to light. Easy to add per terminal once we have signal. Tests: 5 new cases in `theme.test.ts` covering theme env, background hex (3- and 6-char), invalid hex falling through, and COLORFGBG taking precedence over the future allow-list. Validation: `npm run type-check` clean, `npm test --run` 392/392. * review(copilot): tighten theme detection comments + drop unnecessary cast * review(copilot): strict hex regex so partial garbage doesn't slip into luminance * test(tui): make TERM_PROGRAM allow-list injectable so precedence is provable Copilot review on PR #17113: `LIGHT_DEFAULT_TERM_PROGRAMS` is empty in production, so the prior assertion would have passed even if `detectLightMode` ignored `COLORFGBG` entirely. That defeats the test's purpose. `detectLightMode` now takes the allow-list as an optional second argument (defaults to the production set). The test injects a set containing `Apple_Terminal`, asserts the allow-list alone WOULD return light, then asserts `COLORFGBG: '15;0'` overrides it — the precedence rule is now exercised, not assumed. * fix(tui): COLORFGBG empty-trailing-field falls through; isolate DEFAULT_THEME tests Round 2 Copilot review on PR #17113: 1. `Number(colorfgbg.split(';').at(-1))` returns 0 for an empty trailing field (e.g. `COLORFGBG='15;'` → bg===0), which would have looked like an authoritative dark slot and incorrectly blocked the TERM_PROGRAM allow-list. Added a `/^\d+$/` guard before coercion; non-numeric trailing fields now fall through. 2. Fixed the misleading '0–6 / 8–15 ranges are dark' comment — the block returns true for bg===15, so the range is actually 0–6 / 8–14. 3. `DEFAULT_THEME` is computed from `process.env` at module-load. A developer shell with `HERMES_TUI_THEME=light` (or a bright `HERMES_TUI_BACKGROUND`) would flip it and break local tests. The DEFAULT_THEME describe blocks now sterilize the relevant env vars + dynamically import theme.ts (vi.resetModules pattern from platform.test.ts). fromSkin tests compare against DARK_THEME directly to decouple them from ambient env. * test(tui): isolate ALL env-coupled theme symbols, not just DEFAULT_THEME Round 3 Copilot review on PR #17113: the static top-level imports of `fromSkin`, `DARK_THEME`, `LIGHT_THEME` evaluated theme.ts before `importThemeWithCleanEnv` had a chance to clean the env. Because `fromSkin` closes over `DEFAULT_THEME`, an ambient `HERMES_TUI_THEME=light` or bright `HERMES_TUI_BACKGROUND` would still flip the base palette and cause local-only failures. Removed the static import entirely. Every test now obtains its theme symbols via `importThemeWithCleanEnv`, including `detectLightMode` (for consistency, even though it takes env as a parameter). `fromSkin` tests assert against the cleaned `DEFAULT_THEME` from the same dynamic import — preserves the actual contract (skins extend the ambient base palette) without coupling the test to dev-shell state. Verified by running with HERMES_TUI_THEME=light + HERMES_TUI_BACKGROUND=#ffffff: all 20 theme tests still pass. Self-review (avoid round 4): - Audited other test files importing DEFAULT_THEME (syntax.test.ts, streamingMarkdown.test.ts, constants.test.ts) — all just pass it as a parameter or assert palette property existence (works on both light + dark), so no env coupling there. --- ui-tui/src/__tests__/theme.test.ts | 163 +++++++++++++++++++++++++---- ui-tui/src/theme.ts | 124 ++++++++++++++++++++-- 2 files changed, 260 insertions(+), 27 deletions(-) diff --git a/ui-tui/src/__tests__/theme.test.ts b/ui-tui/src/__tests__/theme.test.ts index db2b1eac38..b73251188f 100644 --- a/ui-tui/src/__tests__/theme.test.ts +++ b/ui-tui/src/__tests__/theme.test.ts @@ -1,46 +1,90 @@ -import { describe, expect, it } from 'vitest' +import { afterEach, describe, expect, it, vi } from 'vitest' -import { DARK_THEME, DEFAULT_THEME, detectLightMode, fromSkin, LIGHT_THEME } from '../theme.js' +// `theme.js` reads `process.env` at module-load to compute DEFAULT_THEME, +// and `fromSkin` closes over DEFAULT_THEME. A developer shell with +// HERMES_TUI_THEME=light (or HERMES_TUI_BACKGROUND set to something +// bright) would flip the base and turn these assertions into a local- +// only failure. We sterilize the relevant env vars + dynamically +// import the module fresh so EVERY symbol that closes over the env +// (DEFAULT_THEME, DARK_THEME, LIGHT_THEME, fromSkin) is loaded against +// a known-empty environment. +// +// `detectLightMode` takes env as an explicit arg, so it's safe to import +// statically — but we stay consistent and dynamic-import it too. +const RELEVANT_ENV = [ + 'HERMES_TUI_LIGHT', + 'HERMES_TUI_THEME', + 'HERMES_TUI_BACKGROUND', + 'COLORFGBG', + 'TERM_PROGRAM', +] as const + +async function importThemeWithCleanEnv() { + for (const key of RELEVANT_ENV) { + vi.stubEnv(key, '') + } + vi.resetModules() + return import('../theme.js') +} + +afterEach(() => { + vi.unstubAllEnvs() + vi.resetModules() +}) describe('DEFAULT_THEME', () => { - it('has brand defaults', () => { + it('has brand defaults', async () => { + const { DEFAULT_THEME } = await importThemeWithCleanEnv() + expect(DEFAULT_THEME.brand.name).toBe('Hermes Agent') expect(DEFAULT_THEME.brand.prompt).toBe('❯') expect(DEFAULT_THEME.brand.tool).toBe('┊') }) - it('has color palette', () => { + it('has color palette', async () => { + const { DEFAULT_THEME } = await importThemeWithCleanEnv() + expect(DEFAULT_THEME.color.gold).toBe('#FFD700') expect(DEFAULT_THEME.color.error).toBe('#ef5350') }) }) describe('LIGHT_THEME', () => { - it('avoids bright-yellow accents unreadable on white backgrounds (#11300)', () => { + it('avoids bright-yellow accents unreadable on white backgrounds (#11300)', async () => { + const { LIGHT_THEME } = await importThemeWithCleanEnv() + expect(LIGHT_THEME.color.gold).not.toBe('#FFD700') expect(LIGHT_THEME.color.amber).not.toBe('#FFBF00') expect(LIGHT_THEME.color.dim).not.toBe('#B8860B') expect(LIGHT_THEME.color.statusWarn).not.toBe('#FFD700') }) - it('keeps the same shape as DARK_THEME', () => { + it('keeps the same shape as DARK_THEME', async () => { + const { DARK_THEME, LIGHT_THEME } = await importThemeWithCleanEnv() + expect(Object.keys(LIGHT_THEME.color).sort()).toEqual(Object.keys(DARK_THEME.color).sort()) expect(LIGHT_THEME.brand).toEqual(DARK_THEME.brand) }) }) describe('DEFAULT_THEME aliasing', () => { - it('defaults to DARK_THEME when nothing signals light', () => { - expect(DEFAULT_THEME).toBe(DARK_THEME) + it('defaults to DARK_THEME when nothing signals light', async () => { + const { DEFAULT_THEME, DARK_THEME: DARK } = await importThemeWithCleanEnv() + + expect(DEFAULT_THEME).toBe(DARK) }) }) describe('detectLightMode', () => { - it('returns false on empty env', () => { + it('returns false on empty env', async () => { + const { detectLightMode } = await importThemeWithCleanEnv() + expect(detectLightMode({})).toBe(false) }) - it('honors HERMES_TUI_LIGHT on/off', () => { + it('honors HERMES_TUI_LIGHT on/off', async () => { + const { detectLightMode } = await importThemeWithCleanEnv() + expect(detectLightMode({ HERMES_TUI_LIGHT: '1' })).toBe(true) expect(detectLightMode({ HERMES_TUI_LIGHT: 'true' })).toBe(true) expect(detectLightMode({ HERMES_TUI_LIGHT: 'on' })).toBe(true) @@ -48,7 +92,9 @@ describe('detectLightMode', () => { expect(detectLightMode({ HERMES_TUI_LIGHT: 'off' })).toBe(false) }) - it('sniffs COLORFGBG bg slots 7 and 15 as light (#11300)', () => { + it('sniffs COLORFGBG bg slots 7 and 15 as light (#11300)', async () => { + const { detectLightMode } = await importThemeWithCleanEnv() + expect(detectLightMode({ COLORFGBG: '0;15' })).toBe(true) expect(detectLightMode({ COLORFGBG: '0;default;15' })).toBe(true) expect(detectLightMode({ COLORFGBG: '0;7' })).toBe(true) @@ -56,38 +102,119 @@ describe('detectLightMode', () => { expect(detectLightMode({ COLORFGBG: '7;default;0' })).toBe(false) }) - it('lets HERMES_TUI_LIGHT=0 override a light COLORFGBG', () => { + it('falls through on malformed COLORFGBG with empty/non-numeric trailing field', async () => { + const { detectLightMode } = await importThemeWithCleanEnv() + // `Number('')` is 0, so `'15;'` would have been read as bg=0 + // (authoritative dark) and incorrectly blocked TERM_PROGRAM. + // The strict /^\d+$/ guard makes these fall through instead. + const allowList = new Set(['Apple_Terminal']) + + expect(detectLightMode({ COLORFGBG: '15;', TERM_PROGRAM: 'Apple_Terminal' }, allowList)).toBe(true) + expect(detectLightMode({ COLORFGBG: 'default;default', TERM_PROGRAM: 'Apple_Terminal' }, allowList)).toBe(true) + // Without an allow-list match, fall-through still defaults to dark. + expect(detectLightMode({ COLORFGBG: '15;' })).toBe(false) + }) + + it('lets HERMES_TUI_LIGHT=0 override a light COLORFGBG', async () => { + const { detectLightMode } = await importThemeWithCleanEnv() + expect(detectLightMode({ COLORFGBG: '0;15', HERMES_TUI_LIGHT: '0' })).toBe(false) }) + + it('honors HERMES_TUI_THEME=light/dark as a symmetric explicit override', async () => { + const { detectLightMode } = await importThemeWithCleanEnv() + + expect(detectLightMode({ HERMES_TUI_THEME: 'light' })).toBe(true) + expect(detectLightMode({ HERMES_TUI_THEME: 'dark' })).toBe(false) + expect(detectLightMode({ COLORFGBG: '0;15', HERMES_TUI_THEME: 'dark' })).toBe(false) + expect(detectLightMode({ COLORFGBG: '15;0', HERMES_TUI_THEME: 'light' })).toBe(true) + }) + + it('uses HERMES_TUI_BACKGROUND luminance when COLORFGBG is missing', async () => { + const { detectLightMode } = await importThemeWithCleanEnv() + + expect(detectLightMode({ HERMES_TUI_BACKGROUND: '#ffffff' })).toBe(true) + expect(detectLightMode({ HERMES_TUI_BACKGROUND: '#000000' })).toBe(false) + expect(detectLightMode({ HERMES_TUI_BACKGROUND: '#1e1e1e' })).toBe(false) + // Three-char hex normalises like CSS. + expect(detectLightMode({ HERMES_TUI_BACKGROUND: '#fff' })).toBe(true) + // Garbage falls through to the default-dark path. + expect(detectLightMode({ HERMES_TUI_BACKGROUND: 'not-a-colour' })).toBe(false) + }) + + it('rejects partially-invalid hex instead of silently truncating', async () => { + const { detectLightMode } = await importThemeWithCleanEnv() + // `parseInt('fffgff'.slice(2,4), 16)` would return 15 — the strict + // regex must reject these inputs so they fall through to default- + // dark instead of producing a false-positive light reading. + expect(detectLightMode({ HERMES_TUI_BACKGROUND: '#fffgff' })).toBe(false) + expect(detectLightMode({ HERMES_TUI_BACKGROUND: 'ffggff' })).toBe(false) + expect(detectLightMode({ HERMES_TUI_BACKGROUND: '#xyz' })).toBe(false) + // Wrong length also rejected (no implicit padding/truncation). + expect(detectLightMode({ HERMES_TUI_BACKGROUND: '#fffff' })).toBe(false) + expect(detectLightMode({ HERMES_TUI_BACKGROUND: '#fffffff' })).toBe(false) + }) + + it('treats COLORFGBG as authoritative when present so it dominates the TERM_PROGRAM allow-list', async () => { + const { detectLightMode } = await importThemeWithCleanEnv() + // Inject a light-default allow-list so the precedence test is + // meaningful even though the production allow-list is empty. + const allowList = new Set(['Apple_Terminal']) + + // Sanity: the allow-list alone WOULD turn this terminal light. + expect(detectLightMode({ TERM_PROGRAM: 'Apple_Terminal' }, allowList)).toBe(true) + + // Dark COLORFGBG must beat the allow-list. + expect( + detectLightMode({ COLORFGBG: '15;0', TERM_PROGRAM: 'Apple_Terminal' }, allowList), + ).toBe(false) + }) }) describe('fromSkin', () => { - it('overrides banner colors', () => { + // `fromSkin` closes over DEFAULT_THEME (which is env-derived), so we + // must dynamic-import it after sterilizing env — otherwise an ambient + // HERMES_TUI_THEME=light would flip the base palette and make these + // assertions order-dependent on the developer's shell. + + it('overrides banner colors', async () => { + const { fromSkin } = await importThemeWithCleanEnv() + expect(fromSkin({ banner_title: '#FF0000' }, {}).color.gold).toBe('#FF0000') }) - it('preserves unset colors', () => { + it('preserves unset colors', async () => { + const { DEFAULT_THEME, fromSkin } = await importThemeWithCleanEnv() + expect(fromSkin({ banner_title: '#FF0000' }, {}).color.amber).toBe(DEFAULT_THEME.color.amber) }) - it('overrides branding', () => { + it('overrides branding', async () => { + const { fromSkin } = await importThemeWithCleanEnv() const { brand } = fromSkin({}, { agent_name: 'TestBot', prompt_symbol: '$' }) + expect(brand.name).toBe('TestBot') expect(brand.prompt).toBe('$') }) - it('defaults for empty skin', () => { + it('defaults for empty skin', async () => { + const { DEFAULT_THEME, fromSkin } = await importThemeWithCleanEnv() + expect(fromSkin({}, {}).color).toEqual(DEFAULT_THEME.color) expect(fromSkin({}, {}).brand.icon).toBe(DEFAULT_THEME.brand.icon) }) - it('passes banner logo/hero', () => { + it('passes banner logo/hero', async () => { + const { fromSkin } = await importThemeWithCleanEnv() + expect(fromSkin({}, {}, 'LOGO', 'HERO').bannerLogo).toBe('LOGO') expect(fromSkin({}, {}, 'LOGO', 'HERO').bannerHero).toBe('HERO') }) - it('maps ui_ color keys + cascades to status', () => { + it('maps ui_ color keys + cascades to status', async () => { + const { fromSkin } = await importThemeWithCleanEnv() const { color } = fromSkin({ ui_ok: '#008000' }, {}) + expect(color.ok).toBe('#008000') expect(color.statusGood).toBe('#008000') }) diff --git a/ui-tui/src/theme.ts b/ui-tui/src/theme.ts index daeedb3377..baff80abf1 100644 --- a/ui-tui/src/theme.ts +++ b/ui-tui/src/theme.ts @@ -179,23 +179,129 @@ export const LIGHT_THEME: Theme = { bannerHero: '' } -// Pick light vs dark. Explicit `HERMES_TUI_LIGHT` wins; otherwise sniff -// `COLORFGBG` (set by XFCE Terminal, rxvt, Terminal.app, etc.) — last field is the -// background ANSI index; 7/15 are the "white" slots most light themes emit (#11300). -export function detectLightMode(env: NodeJS.ProcessEnv = process.env): boolean { - const explicit = (env.HERMES_TUI_LIGHT ?? '').trim().toLowerCase() +const TRUE_RE = /^(?:1|true|yes|on)$/ +const FALSE_RE = /^(?:0|false|no|off)$/ - if (/^(?:1|true|yes|on)$/.test(explicit)) { +// Reserved for future TERM_PROGRAM-based heuristics. Empty by default: +// most modern terminals (Ghostty, Warp, iTerm2, Apple_Terminal) ship a +// dark profile out of the box, so guessing wrong here is more annoying +// than missing a light user — light users can always set +// `HERMES_TUI_LIGHT=1` or `HERMES_TUI_THEME=light`. +const LIGHT_DEFAULT_TERM_PROGRAMS = new Set() + +// Best-effort RGB → luminance check. Currently only accepts a 3- or +// 6-digit hex value (with or without a leading `#`); the env var name +// `HERMES_TUI_BACKGROUND` is intentionally generic so a future OSC11 +// query helper can cache its answer there too, but additional formats +// (rgb()/hsl()/named colours) would need explicit parsing here first. +const LUMA_LIGHT_THRESHOLD = 0.6 + +// Strict allow-list: parseInt(..., 16) silently truncates at the first +// non-hex character (e.g. `fffgff` would parse as `fff` and yield a +// false-positive "white" reading), so reject anything that doesn't match +// the canonical 3- or 6-digit shape up front. +const HEX_3_RE = /^[0-9a-f]{3}$/ +const HEX_6_RE = /^[0-9a-f]{6}$/ + +function backgroundLuminance(raw: string): null | number { + const v = raw.trim().toLowerCase() + + if (!v) { + return null + } + + const hex = v.startsWith('#') ? v.slice(1) : v + const rgb = HEX_6_RE.test(hex) + ? [parseInt(hex.slice(0, 2), 16), parseInt(hex.slice(2, 4), 16), parseInt(hex.slice(4, 6), 16)] + : HEX_3_RE.test(hex) + ? [parseInt(hex[0]! + hex[0]!, 16), parseInt(hex[1]! + hex[1]!, 16), parseInt(hex[2]! + hex[2]!, 16)] + : null + + if (!rgb) { + return null + } + + // Rec. 709 luma — close enough for "is this background bright". + return (0.2126 * rgb[0]! + 0.7152 * rgb[1]! + 0.0722 * rgb[2]!) / 255 +} + +// Pick light vs dark with ordered, explainable signals (#11300): +// +// 1. `HERMES_TUI_LIGHT` boolean — `1`/`true`/`yes`/`on` → light; +// `0`/`false`/`no`/`off` → dark. Either explicit value wins +// regardless of any later signal. +// 2. `HERMES_TUI_THEME` named override — `light` / `dark` win over +// every signal below. +// 3. `HERMES_TUI_BACKGROUND` hex hint (3- or 6-digit) — luminance +// ≥ LUMA_LIGHT_THRESHOLD → light. +// 4. `COLORFGBG` last field — XFCE / rxvt / Terminal.app emit +// slot 7 or 15 on light profiles; 0–15 ranges are otherwise +// treated as authoritatively dark so the TERM_PROGRAM +// allow-list below cannot override an explicit dark profile. +// 5. `TERM_PROGRAM` light-default allow-list (currently empty). +// +// Anything we can't decide stays dark — the default Hermes palette +// is the dark one. +export function detectLightMode( + env: NodeJS.ProcessEnv = process.env, + // Injectable so tests can prove the COLORFGBG-over-TERM_PROGRAM + // precedence rule even though the production allow-list is empty. + lightDefaultTermPrograms: ReadonlySet = LIGHT_DEFAULT_TERM_PROGRAMS, +): boolean { + const lightFlag = (env.HERMES_TUI_LIGHT ?? '').trim().toLowerCase() + + if (TRUE_RE.test(lightFlag)) { return true } - if (/^(?:0|false|no|off)$/.test(explicit)) { + if (FALSE_RE.test(lightFlag)) { return false } - const bg = Number((env.COLORFGBG ?? '').trim().split(';').at(-1)) + const themeFlag = (env.HERMES_TUI_THEME ?? '').trim().toLowerCase() - return bg === 7 || bg === 15 + if (themeFlag === 'light') { + return true + } + + if (themeFlag === 'dark') { + return false + } + + const bgHint = backgroundLuminance(env.HERMES_TUI_BACKGROUND ?? '') + + if (bgHint !== null) { + return bgHint >= LUMA_LIGHT_THRESHOLD + } + + const colorfgbg = (env.COLORFGBG ?? '').trim() + + if (colorfgbg) { + // Validate as a decimal integer before coercing — `Number('')` is 0, + // so a malformed `COLORFGBG='15;'` would otherwise look like an + // authoritative dark slot and incorrectly block the TERM_PROGRAM + // allow-list. Anything that isn't pure digits falls through. + const lastField = colorfgbg.split(';').at(-1) ?? '' + + if (/^\d+$/.test(lastField)) { + const bg = Number(lastField) + + if (bg === 7 || bg === 15) { + return true + } + + // Slots 0–6 and 8–14 are the dark half of the 0–15 ANSI range. + // When COLORFGBG is set we trust it as authoritative — a non-light + // value here shouldn't get overridden by the TERM_PROGRAM allow-list. + if (bg >= 0 && bg < 16) { + return false + } + } + } + + const termProgram = (env.TERM_PROGRAM ?? '').trim() + + return lightDefaultTermPrograms.has(termProgram) } export const DEFAULT_THEME: Theme = detectLightMode() ? LIGHT_THEME : DARK_THEME From 7d81d763667b6733e06658ff9d7e522167f76a74 Mon Sep 17 00:00:00 2001 From: brooklyn! Date: Tue, 28 Apr 2026 16:19:16 -0700 Subject: [PATCH 14/23] feat(tui): pluggable busy-indicator styles (#13610) (#17150) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(tui): pluggable busy-indicator styles (kaomoji/emoji/unicode/ascii) The status-bar `FaceTicker` rotated through wide-and-variable kaomoji glyphs (`(。•́︿•̀。)`, `( ͡° ͜ʖ ͡°)`, …) every 2.5s. Real display widths range from ~5 to ~16 columns, so the rest of the bar (cwd, ctx %, voice, bg counter) shifted on every cycle. Padding the verb alone (#17116) helped but didn't address the dominant jitter source — the glyph itself. Add four indicator styles, configurable + hot-swappable: * `kaomoji` (default — preserves the existing vibe; verb is now pad-stable so the only width churn left is the kaomoji itself). * `emoji` — single 2-col emoji frame (`⚕ 🌀 🤔 ✨ 🍵 🔮`). * `unicode` — `unicode-animations` braille spinner (1-col, smooth). * `ascii` — `| / - \` (1-col, max compat). Wires: * `display.tui_status_indicator` in `DEFAULT_CONFIG` (default `kaomoji`). * New JSON-RPC `config.set/get indicator` keys, narrow allow-list. * `applyDisplay` reads the field and patches `UiState.indicatorStyle`, so the existing `mtime` poll picks up `~/.hermes/config.yaml` edits within ~5s without a TUI restart. * `/indicator [style]` slash command (alias `/indicator-style`, subcommand completion `kaomoji|emoji|unicode|ascii`). Bare form shows the current style; setter fires `config.set` and optimistically `patchUiState({ indicatorStyle })` so the live TUI swaps immediately, matching the `/skin` UX. * `CommandDef("indicator", ..., subcommands=...)` so classic CLI autocomplete + TUI `complete.slash` both surface it. * `FaceTicker` decouples spinner cadence from verb cadence — the glyph runs at the spinner's authored interval (or `FACE_TICK_MS` for kaomoji), the verb stays on the original 2.5s cycle, and both re-arm cleanly when style changes. Tests: * `normalizeIndicatorStyle` rejects unknown / non-string input. * `applyDisplay → tui_status_indicator` covers fan-out + fallback. * `/indicator