feat(moa): render reference-model blocks in TUI and desktop, not just CLI (#53855)

The MoA reference-block display (each reference model's output shown as a
labelled thinking block before the aggregator responds) previously existed
only in the classic CLI. The facade already emits moa.reference / moa.aggregating
through tool_progress_callback; this wires the TUI and desktop consumers.

- tui_gateway/server.py: _on_tool_progress relays moa.reference (label / text /
  index / count) and moa.aggregating to the Ink/desktop client as their own
  events.
- ui-tui: gatewayTypes adds the two event shapes; createGatewayEventHandler
  routes them; turnController.recordMoaReference pushes a committed
  thinking-style segment tagged with the source model. Shown regardless of
  showReasoning — references ARE the mixture-of-agents process the user opted
  into, not ordinary reasoning. moa.aggregating is a status-only transition
  (no transcript entry).
- apps/desktop: use-message-stream appends each reference as a labelled
  reasoning chunk via the existing reasoning disclosure; GatewayEventPayload
  gains label/index/aggregator.

Tests: tui_gateway emit (3), Ink handler render + showReasoning-independence +
aggregating-no-segment (3). TUI typecheck/lint clean; desktop typecheck/lint
clean.
This commit is contained in:
Teknium 2026-06-27 18:46:20 -07:00 committed by GitHub
parent d3d621f7c3
commit 163cb24d45
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 246 additions and 0 deletions

View file

@ -900,6 +900,29 @@ export function useMessageStream({
appendReasoningDelta(sessionId, coerceThinkingText(payload?.text), true)
}
if (isActiveEvent) {
setPetActivity({ reasoning: true })
}
} else if (event.type === 'moa.reference') {
// MoA reference-model output — surface as a labelled thinking chunk
// (tagged with the source model) before the aggregator's response, so
// the mixture-of-agents process is visible. Reuses the reasoning
// disclosure rather than introducing a parallel surface.
if (sessionId) {
const label = coerceGatewayText(payload?.label) || 'reference'
const idx = typeof payload?.index === 'number' ? payload.index : undefined
const cnt = typeof payload?.count === 'number' ? payload.count : undefined
const header = idx && cnt ? `◇ Reference ${idx}/${cnt}${label}` : `◇ Reference — ${label}`
const body = coerceThinkingText(payload?.text)
appendReasoningDelta(sessionId, `${header}\n${body}\n\n`, true)
}
if (isActiveEvent) {
setPetActivity({ reasoning: true })
}
} else if (event.type === 'moa.aggregating') {
// Status transition only; the aggregator's reply arrives via the normal
// message stream. No reasoning/transcript mutation here.
if (isActiveEvent) {
setPetActivity({ reasoning: true })
}

View file

@ -72,6 +72,10 @@ export type GatewayEventPayload = {
// session.title (live auto-title push) — stored session id + generated title
session_id?: string
title?: string
// moa.reference / moa.aggregating (Mixture of Agents per-model relay)
label?: string
index?: number
aggregator?: string
}
export function textPart(text: string): ChatMessagePart {

View file

@ -0,0 +1,98 @@
"""Tests for the TUI gateway relaying MoA reference events to the client.
When a MoA preset is the active model, the agent's tool_progress_callback emits
``moa.reference`` (one per reference model, before the aggregator acts) and a
single ``moa.aggregating`` marker. ``_on_tool_progress`` must forward these to
the Ink/desktop client as labelled events so each reference renders like a
thinking block tagged with its source model.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
@pytest.fixture()
def server():
with patch.dict(
"sys.modules",
{
"hermes_constants": MagicMock(
get_hermes_home=MagicMock(return_value="/tmp/hermes_test_moa_emit")
),
"hermes_cli.env_loader": MagicMock(),
"hermes_cli.banner": MagicMock(),
"hermes_state": MagicMock(),
},
):
import importlib
mod = importlib.import_module("tui_gateway.server")
yield mod
mod._sessions.clear()
@pytest.fixture()
def emits(server, monkeypatch):
captured: list = []
monkeypatch.setattr(
server,
"_emit",
lambda event, sid, payload=None: captured.append((event, sid, payload)),
)
monkeypatch.setattr(server, "_tool_progress_enabled", lambda sid: True)
return captured
def test_moa_reference_relayed_with_label_and_index(server, emits):
server._on_tool_progress(
"sid-1",
"moa.reference",
"openrouter:openai/gpt-5.5",
"Paris is the capital of France.",
None,
moa_index=1,
moa_count=2,
)
assert len(emits) == 1
event, sid, payload = emits[0]
assert event == "moa.reference"
assert sid == "sid-1"
assert payload["label"] == "openrouter:openai/gpt-5.5"
assert payload["text"] == "Paris is the capital of France."
assert payload["index"] == 1
assert payload["count"] == 2
def test_moa_aggregating_relayed(server, emits):
server._on_tool_progress(
"sid-1",
"moa.aggregating",
"openrouter:anthropic/claude-opus-4.8",
None,
None,
)
assert len(emits) == 1
event, sid, payload = emits[0]
assert event == "moa.aggregating"
assert payload["aggregator"] == "openrouter:anthropic/claude-opus-4.8"
def test_moa_reference_without_index_omits_index(server, emits):
server._on_tool_progress(
"sid-1",
"moa.reference",
"openrouter:anthropic/claude-opus-4.8",
"The capital is Paris.",
None,
)
assert len(emits) == 1
_event, _sid, payload = emits[0]
assert "index" not in payload
assert "count" not in payload
assert payload["label"] == "openrouter:anthropic/claude-opus-4.8"

View file

@ -3374,6 +3374,24 @@ def _on_tool_progress(
payload["verbose"] = True
_emit("reasoning.available", sid, payload)
return
if event_type == "moa.reference" and name:
# MoA reference-model output — relay as a labelled block the Ink/desktop
# client renders before the aggregator's response (like a thinking
# block, tagged with the source model). `name` is the slot label,
# `preview` is the reference text.
ref_payload: dict[str, object] = {
"label": str(name),
"text": str(preview or ""),
}
if _kwargs.get("moa_index") is not None:
ref_payload["index"] = _kwargs.get("moa_index")
if _kwargs.get("moa_count") is not None:
ref_payload["count"] = _kwargs.get("moa_count")
_emit("moa.reference", sid, ref_payload)
return
if event_type == "moa.aggregating":
_emit("moa.aggregating", sid, {"aggregator": str(name or "")})
return
if event_type.startswith("subagent."):
payload = {
"goal": str(_kwargs.get("goal") or ""),

View file

@ -410,6 +410,55 @@ describe('createGatewayEventHandler', () => {
expect(appended[1]).toMatchObject({ role: 'assistant', text: 'final answer' })
})
it('renders moa.reference as a labelled thinking-style segment', () => {
const appended: Msg[] = []
const onEvent = createGatewayEventHandler(buildCtx(appended))
onEvent({ payload: {}, type: 'message.start' } as any)
onEvent({
payload: { count: 2, index: 1, label: 'openrouter:openai/gpt-5.5', text: 'Paris.' },
type: 'moa.reference'
} as any)
onEvent({
payload: { count: 2, index: 2, label: 'openrouter:anthropic/claude-opus-4.8', text: 'Paris.' },
type: 'moa.reference'
} as any)
const segments = getTurnState().streamSegments
const refBlocks = segments.filter(m => typeof m.thinking === 'string' && m.thinking.includes('Reference'))
expect(refBlocks).toHaveLength(2)
expect(refBlocks[0]?.thinking).toContain('Reference 1/2 — openrouter:openai/gpt-5.5')
expect(refBlocks[0]?.thinking).toContain('Paris.')
expect(refBlocks[1]?.thinking).toContain('Reference 2/2 — openrouter:anthropic/claude-opus-4.8')
})
it('renders moa.reference even when showReasoning is off (it is the MoA process, not reasoning)', () => {
patchUiState({ showReasoning: false })
const appended: Msg[] = []
const onEvent = createGatewayEventHandler(buildCtx(appended))
onEvent({ payload: {}, type: 'message.start' } as any)
onEvent({
payload: { label: 'openrouter:openai/gpt-5.5', text: 'Four.' },
type: 'moa.reference'
} as any)
const segments = getTurnState().streamSegments
const refBlocks = segments.filter(m => typeof m.thinking === 'string' && m.thinking.includes('Reference'))
expect(refBlocks).toHaveLength(1)
expect(refBlocks[0]?.thinking).toContain('openrouter:openai/gpt-5.5')
})
it('moa.aggregating does not append a transcript segment', () => {
const appended: Msg[] = []
const onEvent = createGatewayEventHandler(buildCtx(appended))
onEvent({ payload: {}, type: 'message.start' } as any)
const before = getTurnState().streamSegments.length
onEvent({ payload: { aggregator: 'openrouter:anthropic/claude-opus-4.8' }, type: 'moa.aggregating' } as any)
expect(getTurnState().streamSegments.length).toBe(before)
})
it('uses message.complete reasoning when no streamed reasoning ref', () => {
const appended: Msg[] = []
const fromServer = 'recovered from last_reasoning'

View file

@ -688,6 +688,21 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
return
case 'moa.reference':
turnController.recordMoaReference(
String(ev.payload?.label ?? 'reference'),
String(ev.payload?.text ?? ''),
typeof ev.payload?.index === 'number' ? ev.payload.index : undefined,
typeof ev.payload?.count === 'number' ? ev.payload.count : undefined
)
return
case 'moa.aggregating':
// Spinner/status transition only — the aggregator's response follows
// through the normal message stream. No committed transcript entry.
return
case 'tool.progress':
if (ev.payload?.preview && ev.payload.name) {
turnController.recordToolProgress(ev.payload.name, ev.payload.preview)

View file

@ -690,6 +690,39 @@ class TurnController {
this.pulseReasoningStreaming()
}
/**
* Render one MoA reference model's output as a committed labelled block
* before the aggregator responds. Unlike reasoning, references are shown
* regardless of showReasoning (they ARE the mixture-of-agents process the
* user opted into by selecting a MoA preset). Each becomes its own
* thinking-style segment tagged with the source model, so a multi-reference
* preset builds a stack the user can scroll.
*/
recordMoaReference(label: string, text: string, index?: number, count?: number) {
if (this.interrupted) {
return
}
// Close any open reasoning segment so the reference block lands as its own
// committed entry rather than merging into streaming reasoning.
this.closeReasoningSegment()
const header =
index && count ? `◇ Reference ${index}/${count}${label}` : `◇ Reference — ${label}`
const body = text.trim()
const thinking = body ? `${header}\n${body}` : header
this.pushSegment({
kind: 'trail',
role: 'system',
text: '',
thinking,
thinkingTokens: estimateTokensRough(thinking)
})
patchTurnState({ streamSegments: this.segmentMessages })
}
recordReasoningDelta(text: string, force = false) {
if (this.interrupted || (!force && !getUiState().showReasoning)) {
return

View file

@ -658,6 +658,12 @@ export type GatewayEvent =
session_id?: string
type: 'reasoning.delta' | 'reasoning.available'
}
| {
payload: { count?: number; index?: number; label?: string; text?: string }
session_id?: string
type: 'moa.reference'
}
| { payload?: { aggregator?: string }; session_id?: string; type: 'moa.aggregating' }
| { payload: { name?: string; preview?: string }; session_id?: string; type: 'tool.progress' }
| { payload: { name?: string }; session_id?: string; type: 'tool.generating' }
| {