mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-25 05:52:34 +00:00
Persisted assistant `reasoning_content` / `reasoning` fields are now emitted as ACP `agent_thought_chunk` notifications during `_replay_session_history`, so editor clients (Zed, etc.) rebuild collapsed Thinking panes when the user re-opens a session that used a thinking model. Ordering matches live streaming: thought precedes message text within the same assistant turn, mirroring how `reasoning_callback` deltas arrive before `stream_delta_callback` deltas in `events.py::make_thinking_cb` / `make_message_cb`. Behavior on non-reasoning histories is unchanged; the replay loop's existing text / tool_call / tool_call_update / plan emission is preserved bit-for-bit. Closes #12285. Credit: - @Yukipukii1 (#14691) — original thought-replay design via `acp.update_agent_thought_text`; the tool-call portion of that PR has since landed via #19139, but the reasoning replay is theirs. - @HenkDz (#17652 / #18578) — established the `_replay_session_history` and `_history_*` helper conventions this builds on. - @D1zzyDwarf (#16531) — also closed by this work.
This commit is contained in:
parent
a91a57fa5a
commit
f3a4af9cf2
2 changed files with 277 additions and 22 deletions
|
|
@ -18,6 +18,7 @@ import acp
|
||||||
from acp.schema import (
|
from acp.schema import (
|
||||||
AgentCapabilities,
|
AgentCapabilities,
|
||||||
AgentMessageChunk,
|
AgentMessageChunk,
|
||||||
|
AgentThoughtChunk,
|
||||||
AuthenticateResponse,
|
AuthenticateResponse,
|
||||||
AvailableCommand,
|
AvailableCommand,
|
||||||
AvailableCommandsUpdate,
|
AvailableCommandsUpdate,
|
||||||
|
|
@ -788,14 +789,20 @@ class HermesACPAgent(acp.Agent):
|
||||||
# ---- Session management -------------------------------------------------
|
# ---- Session management -------------------------------------------------
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _history_message_text(message: dict[str, Any]) -> str:
|
def _flatten_history_text(value: Any) -> str:
|
||||||
"""Extract displayable text from a persisted OpenAI-style message."""
|
"""Normalize a persisted text-or-text-parts value into a single string.
|
||||||
content = message.get("content")
|
|
||||||
if isinstance(content, str):
|
OpenAI-style assistant content (and provider reasoning fields) can arrive
|
||||||
return content.strip()
|
as either a scalar string or a list of ``{"text": ...}`` /
|
||||||
if isinstance(content, list):
|
``{"type": "text", "content": ...}`` parts. Whitespace-only inputs
|
||||||
|
collapse to an empty string so callers can treat ``""`` as "nothing to
|
||||||
|
emit".
|
||||||
|
"""
|
||||||
|
if isinstance(value, str):
|
||||||
|
return value.strip()
|
||||||
|
if isinstance(value, list):
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
for item in content:
|
for item in value:
|
||||||
if isinstance(item, dict):
|
if isinstance(item, dict):
|
||||||
text = item.get("text")
|
text = item.get("text")
|
||||||
if isinstance(text, str):
|
if isinstance(text, str):
|
||||||
|
|
@ -807,6 +814,29 @@ class HermesACPAgent(acp.Agent):
|
||||||
return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
|
return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _history_message_text(cls, message: dict[str, Any]) -> str:
|
||||||
|
"""Extract displayable text from a persisted OpenAI-style message."""
|
||||||
|
return cls._flatten_history_text(message.get("content"))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _history_reasoning_text(cls, message: dict[str, Any]) -> str:
|
||||||
|
"""Extract displayable reasoning/thought text from a persisted assistant message.
|
||||||
|
|
||||||
|
Returns the first non-empty value among ``reasoning_content`` (the
|
||||||
|
canonical field used by DeepSeek / Moonshot and the post-#16892
|
||||||
|
chat-completions normalizer) and ``reasoning`` (used by the codex
|
||||||
|
event projector and several other transports). Both keys are
|
||||||
|
actively written by live code paths, so neither branch is
|
||||||
|
deprecated — they cover different transports rather than old vs.
|
||||||
|
new sessions.
|
||||||
|
"""
|
||||||
|
for key in ("reasoning_content", "reasoning"):
|
||||||
|
text = cls._flatten_history_text(message.get(key))
|
||||||
|
if text:
|
||||||
|
return text
|
||||||
|
return ""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _history_message_update(
|
def _history_message_update(
|
||||||
*,
|
*,
|
||||||
|
|
@ -827,6 +857,11 @@ class HermesACPAgent(acp.Agent):
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _history_thought_update(text: str) -> AgentThoughtChunk:
|
||||||
|
"""Build an ACP history replay update for an assistant thought."""
|
||||||
|
return acp.update_agent_thought_text(text)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]:
|
||||||
"""Extract function name/arguments from an OpenAI-style tool_call."""
|
"""Extract function name/arguments from an OpenAI-style tool_call."""
|
||||||
|
|
@ -858,9 +893,10 @@ class HermesACPAgent(acp.Agent):
|
||||||
|
|
||||||
Zed's ACP history UI calls ``session/load`` after the user picks an item
|
Zed's ACP history UI calls ``session/load`` after the user picks an item
|
||||||
from the Agents sidebar. The agent must then replay the full conversation
|
from the Agents sidebar. The agent must then replay the full conversation
|
||||||
as user/assistant chunks plus reconstructed tool-call start/completion
|
as user/assistant chunks, thinking-mode thought chunks, plus reconstructed
|
||||||
notifications; merely restoring server-side state makes Hermes remember
|
tool-call start/completion notifications; merely restoring server-side
|
||||||
context, but leaves the editor looking like a clean thread.
|
state makes Hermes remember context, but leaves the editor looking like a
|
||||||
|
clean thread.
|
||||||
"""
|
"""
|
||||||
if not self._conn or not state.history:
|
if not self._conn or not state.history:
|
||||||
return
|
return
|
||||||
|
|
@ -882,24 +918,37 @@ class HermesACPAgent(acp.Agent):
|
||||||
for message in state.history:
|
for message in state.history:
|
||||||
role = str(message.get("role") or "")
|
role = str(message.get("role") or "")
|
||||||
|
|
||||||
if role in {"user", "assistant"}:
|
if role == "user":
|
||||||
|
text = self._history_message_text(message)
|
||||||
|
if text:
|
||||||
|
update = self._history_message_update(role=role, text=text)
|
||||||
|
if update is not None and not await _send(update):
|
||||||
|
return
|
||||||
|
continue
|
||||||
|
|
||||||
|
if role == "assistant":
|
||||||
|
thought = self._history_reasoning_text(message)
|
||||||
|
if thought and not await _send(self._history_thought_update(thought)):
|
||||||
|
return
|
||||||
|
|
||||||
text = self._history_message_text(message)
|
text = self._history_message_text(message)
|
||||||
if text:
|
if text:
|
||||||
update = self._history_message_update(role=role, text=text)
|
update = self._history_message_update(role=role, text=text)
|
||||||
if update is not None and not await _send(update):
|
if update is not None and not await _send(update):
|
||||||
return
|
return
|
||||||
|
|
||||||
if role == "assistant" and isinstance(message.get("tool_calls"), list):
|
tool_calls = message.get("tool_calls")
|
||||||
for tool_call in message["tool_calls"]:
|
if isinstance(tool_calls, list):
|
||||||
if not isinstance(tool_call, dict):
|
for tool_call in tool_calls:
|
||||||
continue
|
if not isinstance(tool_call, dict):
|
||||||
tool_call_id = self._history_tool_call_id(tool_call)
|
continue
|
||||||
if not tool_call_id:
|
tool_call_id = self._history_tool_call_id(tool_call)
|
||||||
continue
|
if not tool_call_id:
|
||||||
tool_name, args = self._history_tool_call_name_args(tool_call)
|
continue
|
||||||
active_tool_calls[tool_call_id] = (tool_name, args)
|
tool_name, args = self._history_tool_call_name_args(tool_call)
|
||||||
if not await _send(build_tool_start(tool_call_id, tool_name, args)):
|
active_tool_calls[tool_call_id] = (tool_name, args)
|
||||||
return
|
if not await _send(build_tool_start(tool_call_id, tool_name, args)):
|
||||||
|
return
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if role == "tool":
|
if role == "tool":
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ from acp.schema import (
|
||||||
AgentCapabilities,
|
AgentCapabilities,
|
||||||
AgentMessageChunk,
|
AgentMessageChunk,
|
||||||
AgentPlanUpdate,
|
AgentPlanUpdate,
|
||||||
|
AgentThoughtChunk,
|
||||||
AuthenticateResponse,
|
AuthenticateResponse,
|
||||||
AvailableCommandsUpdate,
|
AvailableCommandsUpdate,
|
||||||
Implementation,
|
Implementation,
|
||||||
|
|
@ -466,6 +467,211 @@ class TestSessionOps:
|
||||||
for update in updates
|
for update in updates
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_load_session_replays_reasoning_thought_before_message(self, agent):
|
||||||
|
"""Thinking-model thoughts must be replayed via ``agent_thought_chunk``.
|
||||||
|
|
||||||
|
Regression for #12285 — when a session is loaded, persisted assistant
|
||||||
|
``reasoning_content`` / ``reasoning`` fields must surface as ACP
|
||||||
|
``AgentThoughtChunk`` notifications in the same relative position they
|
||||||
|
had live (thought streams before the assistant message text), so Zed's
|
||||||
|
collapsed Thinking pane rebuilds instead of vanishing on reconnect.
|
||||||
|
"""
|
||||||
|
mock_conn = MagicMock(spec=acp.Client)
|
||||||
|
mock_conn.session_update = AsyncMock()
|
||||||
|
agent._conn = mock_conn
|
||||||
|
|
||||||
|
new_resp = await agent.new_session(cwd="/tmp")
|
||||||
|
state = agent.session_manager.get_session(new_resp.session_id)
|
||||||
|
state.history = [
|
||||||
|
{"role": "user", "content": "Walk me through it."},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"reasoning_content": "Let me think step by step about the request.",
|
||||||
|
"content": "Here is the plan.",
|
||||||
|
},
|
||||||
|
{"role": "user", "content": "And the legacy case?"},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
# No reasoning_content — exercise the legacy "reasoning" fallback
|
||||||
|
# path so sessions persisted before #16892 still replay thoughts.
|
||||||
|
"reasoning": "Older sessions stored the trace under the internal key.",
|
||||||
|
"content": "Same idea, older field name.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_conn.session_update.reset_mock()
|
||||||
|
resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
assert isinstance(resp, LoadSessionResponse)
|
||||||
|
|
||||||
|
replay_kinds = [
|
||||||
|
getattr(call.kwargs.get("update"), "session_update", None)
|
||||||
|
for call in mock_conn.session_update.await_args_list
|
||||||
|
if getattr(call.kwargs.get("update"), "session_update", None)
|
||||||
|
in {"user_message_chunk", "agent_message_chunk", "agent_thought_chunk"}
|
||||||
|
]
|
||||||
|
assert replay_kinds == [
|
||||||
|
"user_message_chunk",
|
||||||
|
"agent_thought_chunk",
|
||||||
|
"agent_message_chunk",
|
||||||
|
"user_message_chunk",
|
||||||
|
"agent_thought_chunk",
|
||||||
|
"agent_message_chunk",
|
||||||
|
]
|
||||||
|
|
||||||
|
thought_updates = [
|
||||||
|
call.kwargs["update"]
|
||||||
|
for call in mock_conn.session_update.await_args_list
|
||||||
|
if isinstance(call.kwargs.get("update"), AgentThoughtChunk)
|
||||||
|
]
|
||||||
|
assert len(thought_updates) == 2
|
||||||
|
assert thought_updates[0].content.text == "Let me think step by step about the request."
|
||||||
|
assert thought_updates[1].content.text == "Older sessions stored the trace under the internal key."
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_load_session_replays_reasoning_only_turn(self, agent):
|
||||||
|
"""Assistant turns with reasoning but no content should still emit a thought.
|
||||||
|
|
||||||
|
Pure reasoning-only assistant entries (e.g. a thinking step before a
|
||||||
|
tool-call turn) commonly carry ``reasoning_content`` with empty
|
||||||
|
``content``. The replay must still surface the thought so the editor's
|
||||||
|
Thinking pane rebuilds, even when there is no message text to follow.
|
||||||
|
"""
|
||||||
|
mock_conn = MagicMock(spec=acp.Client)
|
||||||
|
mock_conn.session_update = AsyncMock()
|
||||||
|
agent._conn = mock_conn
|
||||||
|
|
||||||
|
new_resp = await agent.new_session(cwd="/tmp")
|
||||||
|
state = agent.session_manager.get_session(new_resp.session_id)
|
||||||
|
state.history = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"reasoning_content": "I should call the search tool next.",
|
||||||
|
"content": "",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_conn.session_update.reset_mock()
|
||||||
|
await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
thought_updates = [
|
||||||
|
call.kwargs["update"]
|
||||||
|
for call in mock_conn.session_update.await_args_list
|
||||||
|
if isinstance(call.kwargs.get("update"), AgentThoughtChunk)
|
||||||
|
]
|
||||||
|
message_updates = [
|
||||||
|
call.kwargs["update"]
|
||||||
|
for call in mock_conn.session_update.await_args_list
|
||||||
|
if isinstance(call.kwargs.get("update"), AgentMessageChunk)
|
||||||
|
]
|
||||||
|
assert len(thought_updates) == 1
|
||||||
|
assert thought_updates[0].content.text == "I should call the search tool next."
|
||||||
|
assert message_updates == []
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_load_session_skips_empty_reasoning_fields(self, agent):
|
||||||
|
"""Empty/whitespace reasoning fields must not produce notifications."""
|
||||||
|
mock_conn = MagicMock(spec=acp.Client)
|
||||||
|
mock_conn.session_update = AsyncMock()
|
||||||
|
agent._conn = mock_conn
|
||||||
|
|
||||||
|
new_resp = await agent.new_session(cwd="/tmp")
|
||||||
|
state = agent.session_manager.get_session(new_resp.session_id)
|
||||||
|
state.history = [
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"reasoning_content": "",
|
||||||
|
"reasoning": " \n\t",
|
||||||
|
"content": "Just a regular answer.",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_conn.session_update.reset_mock()
|
||||||
|
await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
thought_updates = [
|
||||||
|
call.kwargs["update"]
|
||||||
|
for call in mock_conn.session_update.await_args_list
|
||||||
|
if isinstance(call.kwargs.get("update"), AgentThoughtChunk)
|
||||||
|
]
|
||||||
|
assert thought_updates == []
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_load_session_replays_thought_then_tool_call_without_message(self, agent):
|
||||||
|
"""Canonical thinking-model shape: reasoning + tool_call + no body text.
|
||||||
|
|
||||||
|
Thinking models commonly emit a pre-tool thought followed by a
|
||||||
|
tool_calls turn with empty ``content``. Replay must emit:
|
||||||
|
``agent_thought_chunk`` then ``tool_call`` then ``tool_call_update``
|
||||||
|
for the matching tool result — and crucially, NO ``agent_message_chunk``
|
||||||
|
for the empty-text assistant body. Regression for the canonical
|
||||||
|
thinking-then-tool flow on #12285.
|
||||||
|
"""
|
||||||
|
mock_conn = MagicMock(spec=acp.Client)
|
||||||
|
mock_conn.session_update = AsyncMock()
|
||||||
|
agent._conn = mock_conn
|
||||||
|
|
||||||
|
new_resp = await agent.new_session(cwd="/tmp")
|
||||||
|
state = agent.session_manager.get_session(new_resp.session_id)
|
||||||
|
state.history = [
|
||||||
|
{"role": "user", "content": "Find the bug."},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"reasoning_content": "I should grep for the function name first.",
|
||||||
|
"content": "",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "call_grep_1",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "search_files",
|
||||||
|
"arguments": '{"pattern":"foo","path":"."}',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "tool",
|
||||||
|
"tool_call_id": "call_grep_1",
|
||||||
|
"content": '{"total_count":1,"matches":[{"path":"x.py","line":1,"content":"foo"}]}',
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_conn.session_update.reset_mock()
|
||||||
|
await agent.load_session(cwd="/tmp", session_id=new_resp.session_id)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
kinds = [
|
||||||
|
getattr(call.kwargs.get("update"), "session_update", None)
|
||||||
|
for call in mock_conn.session_update.await_args_list
|
||||||
|
if getattr(call.kwargs.get("update"), "session_update", None)
|
||||||
|
in {
|
||||||
|
"user_message_chunk",
|
||||||
|
"agent_thought_chunk",
|
||||||
|
"agent_message_chunk",
|
||||||
|
"tool_call",
|
||||||
|
"tool_call_update",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# No agent_message_chunk for the empty-content assistant turn.
|
||||||
|
assert "agent_message_chunk" not in kinds
|
||||||
|
# Thought must precede the tool_call_start within the assistant turn,
|
||||||
|
# and the tool result follows.
|
||||||
|
assert kinds == [
|
||||||
|
"user_message_chunk",
|
||||||
|
"agent_thought_chunk",
|
||||||
|
"tool_call",
|
||||||
|
"tool_call_update",
|
||||||
|
]
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_load_session_schedules_history_replay_after_response(self, agent):
|
async def test_load_session_schedules_history_replay_after_response(self, agent):
|
||||||
"""Zed only attaches replayed updates after session/load has completed."""
|
"""Zed only attaches replayed updates after session/load has completed."""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue