mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-13 09:01:54 +00:00
fix(compressor): keep last visible assistant reply out of compaction summary + label handoffs in WebUI (#29824)
Two-pronged fix for the WebUI "context compaction block in place of last assistant response" regression. Agent layer (the real fix). ``_find_tail_cut_by_tokens`` already had ``_ensure_last_user_message_in_tail`` to keep the most recent user request out of the compressed middle (#10896), but no symmetric anchor for the assistant side. When the conversation has an oversized recent tool result or a long stretch of tool-call/result pairs *after* the assistant's last visible reply, the token-budget walk can stop with the previously-visible reply on the wrong side of ``cut_idx``. The summariser then rolls it into the single ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block persisted as ``role="user"`` or ``role="assistant"``, and from the operator's perspective the WebUI session viewer (``web/src/pages/SessionsPage.tsx``) and the TUI chat panel both suddenly show the opaque "Context compaction" block in the slot where they were just reading the actual answer: User: "i cant see the output of the last message you sent, i did see it previously, however now see 'context compaction'" Added ``_ensure_last_assistant_message_in_tail`` mirror of the user-side anchor. It looks for the most recent assistant message with non-empty text content (skipping tool-call-only assistant "stubs" which the UI renders as small "calling tool X" indicators rather than a readable bubble) and walks ``cut_idx`` back through the standard ``_align_boundary_backward`` so we don't split a tool_call/result group that immediately precedes it. The two anchors are chained — each only walks ``cut_idx`` backward, so the tail can only grow. Falls back to "most recent assistant of any kind" only when no content-bearing reply exists in the compressible region (fresh multi-step tool sequence with no prior reply) — in that case the agent-side fix is effectively a no-op and the existing user-message anchor carries the load. WebUI layer (clarity). Added ``isCompactionMessage`` detector that recognises the ``[CONTEXT COMPACTION — REFERENCE ONLY]`` (current) and ``[CONTEXT SUMMARY]:`` (legacy) prefixes from ``agent/context_compressor.py``, and a new ``compaction`` entry in ``MessageBubble``'s ``ROLE_STYLES`` map. Compaction blocks now render as muted, italicised system-style rows labelled ``Context handoff`` — clearly metadata, not the assistant's actual reply — so an operator scrolling back through a long session can't mistake the summary for a real answer. Keeping the detected prefixes inline (rather than importing them) because the WebUI bundle has no Python interop. A guardrail comment points readers at the source-of-truth constants in ``agent/context_compressor.py``.
This commit is contained in:
parent
7a318aae22
commit
691ff7c188
2 changed files with 149 additions and 4 deletions
|
|
@ -1833,6 +1833,105 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
return i
|
||||
return -1
|
||||
|
||||
def _find_last_assistant_message_idx(
|
||||
self, messages: List[Dict[str, Any]], head_end: int
|
||||
) -> int:
|
||||
"""Return the index of the last user-visible assistant reply at or
|
||||
after *head_end*, or -1.
|
||||
|
||||
A "user-visible reply" is an assistant message with non-empty
|
||||
textual content — i.e. one that the WebUI / TUI / SessionsPage
|
||||
rendered as a bubble the operator could read. We deliberately
|
||||
skip assistant messages that contain only ``tool_calls`` (and
|
||||
no text), because those render as small "calling tool X"
|
||||
indicators and aren't what the reporter means by "the output
|
||||
of the last message you sent" (#29824).
|
||||
|
||||
Falling back to the most recent assistant message of ANY kind
|
||||
only kicks in when no content-bearing assistant message exists
|
||||
in the compressible region — typically a fresh session that
|
||||
just started a multi-step tool sequence with no prior reply
|
||||
to anchor. In that case the agent fix is a no-op and the
|
||||
existing user-message anchor carries the load.
|
||||
"""
|
||||
last_any = -1
|
||||
for i in range(len(messages) - 1, head_end - 1, -1):
|
||||
msg = messages[i]
|
||||
if msg.get("role") != "assistant":
|
||||
continue
|
||||
if last_any < 0:
|
||||
last_any = i
|
||||
content = msg.get("content")
|
||||
if isinstance(content, str) and content.strip():
|
||||
return i
|
||||
if isinstance(content, list):
|
||||
# Multimodal / Anthropic-style content: look for any
|
||||
# text block with non-empty text.
|
||||
for part in content:
|
||||
if isinstance(part, dict):
|
||||
text = part.get("text") or part.get("content")
|
||||
if isinstance(text, str) and text.strip():
|
||||
return i
|
||||
return last_any
|
||||
|
||||
def _ensure_last_assistant_message_in_tail(
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
cut_idx: int,
|
||||
head_end: int,
|
||||
) -> int:
|
||||
"""Guarantee the most recent assistant message is in the protected tail.
|
||||
|
||||
WebUI / TUI / SessionsPage bug (#29824). Without this anchor,
|
||||
``_find_tail_cut_by_tokens`` can leave the user's most recent
|
||||
visible assistant response inside the compressed middle region —
|
||||
especially when the conversation has a single oversized tool
|
||||
result or a long stretch of tool-call/result pairs after the
|
||||
last assistant reply. The summariser then rolls that reply up
|
||||
into the single ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block
|
||||
persisted as ``role="user"`` or ``role="assistant"``. From the
|
||||
operator's perspective the WebUI session viewer
|
||||
(``web/src/pages/SessionsPage.tsx``) and the TUI chat panel
|
||||
both suddenly show the opaque "Context compaction" block in the
|
||||
slot where they were just reading the assistant's actual reply:
|
||||
|
||||
User: "i cant see the output of the last message you
|
||||
sent, i did see it previously, however now see
|
||||
'context compaction'"
|
||||
|
||||
Mirror of ``_ensure_last_user_message_in_tail`` but anchors on
|
||||
the last assistant-role message. Re-runs the tool-group
|
||||
alignment so we don't split a ``tool_call`` / ``tool_result``
|
||||
group that immediately precedes the anchored message — orphaned
|
||||
tool messages would otherwise be removed by
|
||||
``_sanitize_tool_pairs`` and trigger the same data-loss symptom
|
||||
we're trying to prevent.
|
||||
"""
|
||||
last_asst_idx = self._find_last_assistant_message_idx(messages, head_end)
|
||||
if last_asst_idx < 0:
|
||||
# No assistant message in the compressible region — nothing
|
||||
# to anchor (single-turn pre-reply state, etc.).
|
||||
return cut_idx
|
||||
if last_asst_idx >= cut_idx:
|
||||
# Already in the tail — the token-budget walk did the right
|
||||
# thing on its own.
|
||||
return cut_idx
|
||||
# Pull cut_idx back to the assistant message, then re-align so
|
||||
# we don't split a tool group that immediately precedes it
|
||||
# (e.g. an ``assistant(tool_calls)`` → ``tool(result)`` →
|
||||
# ``assistant(final reply)`` sequence would otherwise leave the
|
||||
# ``tool`` orphan when cut lands at the final reply).
|
||||
new_cut = self._align_boundary_backward(messages, last_asst_idx)
|
||||
if not self.quiet_mode:
|
||||
logger.debug(
|
||||
"Anchoring tail cut to last assistant message at index %d "
|
||||
"(was %d, aligned to %d) to keep the previously-visible "
|
||||
"reply out of the compaction summary (#29824)",
|
||||
last_asst_idx, cut_idx, new_cut,
|
||||
)
|
||||
# Safety: never go back into the head region.
|
||||
return max(new_cut, head_end + 1)
|
||||
|
||||
def _ensure_last_user_message_in_tail(
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
|
|
@ -1976,6 +2075,13 @@ This compaction should PRIORITISE preserving all information related to the focu
|
|||
# active task is never lost to compression (fixes #10896).
|
||||
cut_idx = self._ensure_last_user_message_in_tail(messages, cut_idx, head_end)
|
||||
|
||||
# Ensure the most recent assistant message is always in the tail
|
||||
# so the previously-visible reply isn't silently rolled into the
|
||||
# ``[CONTEXT COMPACTION — REFERENCE ONLY]`` block (fixes #29824).
|
||||
# Each anchor only walks ``cut_idx`` backward, so chaining them is
|
||||
# monotonic — the tail can only grow, never shrink.
|
||||
cut_idx = self._ensure_last_assistant_message_in_tail(messages, cut_idx, head_end)
|
||||
|
||||
return max(cut_idx, head_end + 1)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -147,6 +147,32 @@ function ToolCallBlock({
|
|||
);
|
||||
}
|
||||
|
||||
// Context-compaction handoff blocks are persisted as ``role="user"`` or
|
||||
// ``role="assistant"`` with content starting with one of these prefixes —
|
||||
// they're metadata inserted by ``agent/context_compressor.py``, NOT real
|
||||
// turns the user typed or the model replied with. Rendering them with
|
||||
// the same styling as regular messages confuses operators scrolling the
|
||||
// session timeline (#29824 — "WebUI can show context compaction block
|
||||
// instead of latest assistant response after compression"), so we
|
||||
// detect them here and downgrade them to a muted, clearly-labelled
|
||||
// "Context handoff" row.
|
||||
//
|
||||
// Keep these prefixes in sync with ``SUMMARY_PREFIX`` and
|
||||
// ``LEGACY_SUMMARY_PREFIX`` in ``agent/context_compressor.py``.
|
||||
const COMPACTION_PREFIXES = [
|
||||
"[CONTEXT COMPACTION — REFERENCE ONLY]",
|
||||
"[CONTEXT COMPACTION - REFERENCE ONLY]",
|
||||
"[CONTEXT SUMMARY]:",
|
||||
] as const;
|
||||
|
||||
function isCompactionMessage(msg: SessionMessage): boolean {
|
||||
if (msg.role !== "user" && msg.role !== "assistant") return false;
|
||||
const content = msg.content;
|
||||
if (typeof content !== "string") return false;
|
||||
const head = content.trimStart();
|
||||
return COMPACTION_PREFIXES.some((p) => head.startsWith(p));
|
||||
}
|
||||
|
||||
function MessageBubble({
|
||||
msg,
|
||||
highlight,
|
||||
|
|
@ -180,12 +206,25 @@ function MessageBubble({
|
|||
text: "text-warning",
|
||||
label: t.sessions.roles.tool,
|
||||
},
|
||||
// Compaction handoffs render as faded system-style metadata with a
|
||||
// distinctive label so they can't be mistaken for real assistant
|
||||
// replies during a scroll-back review (#29824).
|
||||
compaction: {
|
||||
bg: "bg-muted/50",
|
||||
text: "text-muted-foreground italic",
|
||||
label: "Context handoff",
|
||||
},
|
||||
};
|
||||
|
||||
const style = ROLE_STYLES[msg.role] ?? ROLE_STYLES.system;
|
||||
const label = msg.tool_name
|
||||
? `${t.sessions.roles.tool}: ${msg.tool_name}`
|
||||
: style.label;
|
||||
const isCompaction = isCompactionMessage(msg);
|
||||
const style = isCompaction
|
||||
? ROLE_STYLES.compaction
|
||||
: ROLE_STYLES[msg.role] ?? ROLE_STYLES.system;
|
||||
const label = isCompaction
|
||||
? ROLE_STYLES.compaction.label
|
||||
: msg.tool_name
|
||||
? `${t.sessions.roles.tool}: ${msg.tool_name}`
|
||||
: style.label;
|
||||
|
||||
// Check if any search term appears as a prefix of any word in content
|
||||
const isHit = (() => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue