From f3a4af9cf2a626cb3e055766cb1cff60168d295d Mon Sep 17 00:00:00 2001 From: kshitij <82637225+kshitijk4poor@users.noreply.github.com> Date: Sat, 16 May 2026 06:45:29 -0700 Subject: [PATCH] fix(acp): replay assistant reasoning as agent_thought_chunk on session/load (#12285) (#26943) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Persisted assistant `reasoning_content` / `reasoning` fields are now emitted as ACP `agent_thought_chunk` notifications during `_replay_session_history`, so editor clients (Zed, etc.) rebuild collapsed Thinking panes when the user re-opens a session that used a thinking model. Ordering matches live streaming: thought precedes message text within the same assistant turn, mirroring how `reasoning_callback` deltas arrive before `stream_delta_callback` deltas in `events.py::make_thinking_cb` / `make_message_cb`. Behavior on non-reasoning histories is unchanged; the replay loop's existing text / tool_call / tool_call_update / plan emission is preserved bit-for-bit. Closes #12285. Credit: - @Yukipukii1 (#14691) — original thought-replay design via `acp.update_agent_thought_text`; the tool-call portion of that PR has since landed via #19139, but the reasoning replay is theirs. - @HenkDz (#17652 / #18578) — established the `_replay_session_history` and `_history_*` helper conventions this builds on. - @D1zzyDwarf (#16531) — also closed by this work. --- acp_adapter/server.py | 93 +++++++++++++----- tests/acp/test_server.py | 206 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 277 insertions(+), 22 deletions(-) diff --git a/acp_adapter/server.py b/acp_adapter/server.py index 71fce1890d1..9c8bb416999 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -18,6 +18,7 @@ import acp from acp.schema import ( AgentCapabilities, AgentMessageChunk, + AgentThoughtChunk, AuthenticateResponse, AvailableCommand, AvailableCommandsUpdate, @@ -788,14 +789,20 @@ class HermesACPAgent(acp.Agent): # ---- Session management ------------------------------------------------- @staticmethod - def _history_message_text(message: dict[str, Any]) -> str: - """Extract displayable text from a persisted OpenAI-style message.""" - content = message.get("content") - if isinstance(content, str): - return content.strip() - if isinstance(content, list): + def _flatten_history_text(value: Any) -> str: + """Normalize a persisted text-or-text-parts value into a single string. + + OpenAI-style assistant content (and provider reasoning fields) can arrive + as either a scalar string or a list of ``{"text": ...}`` / + ``{"type": "text", "content": ...}`` parts. Whitespace-only inputs + collapse to an empty string so callers can treat ``""`` as "nothing to + emit". + """ + if isinstance(value, str): + return value.strip() + if isinstance(value, list): parts: list[str] = [] - for item in content: + for item in value: if isinstance(item, dict): text = item.get("text") if isinstance(text, str): @@ -807,6 +814,29 @@ class HermesACPAgent(acp.Agent): return "\n".join(part.strip() for part in parts if part and part.strip()).strip() return "" + @classmethod + def _history_message_text(cls, message: dict[str, Any]) -> str: + """Extract displayable text from a persisted OpenAI-style message.""" + return cls._flatten_history_text(message.get("content")) + + @classmethod + def _history_reasoning_text(cls, message: dict[str, Any]) -> str: + """Extract displayable reasoning/thought text from a persisted assistant message. + + Returns the first non-empty value among ``reasoning_content`` (the + canonical field used by DeepSeek / Moonshot and the post-#16892 + chat-completions normalizer) and ``reasoning`` (used by the codex + event projector and several other transports). Both keys are + actively written by live code paths, so neither branch is + deprecated — they cover different transports rather than old vs. + new sessions. + """ + for key in ("reasoning_content", "reasoning"): + text = cls._flatten_history_text(message.get(key)) + if text: + return text + return "" + @staticmethod def _history_message_update( *, @@ -827,6 +857,11 @@ class HermesACPAgent(acp.Agent): ) return None + @staticmethod + def _history_thought_update(text: str) -> AgentThoughtChunk: + """Build an ACP history replay update for an assistant thought.""" + return acp.update_agent_thought_text(text) + @staticmethod def _history_tool_call_name_args(tool_call: dict[str, Any]) -> tuple[str, dict[str, Any]]: """Extract function name/arguments from an OpenAI-style tool_call.""" @@ -858,9 +893,10 @@ class HermesACPAgent(acp.Agent): Zed's ACP history UI calls ``session/load`` after the user picks an item from the Agents sidebar. The agent must then replay the full conversation - as user/assistant chunks plus reconstructed tool-call start/completion - notifications; merely restoring server-side state makes Hermes remember - context, but leaves the editor looking like a clean thread. + as user/assistant chunks, thinking-mode thought chunks, plus reconstructed + tool-call start/completion notifications; merely restoring server-side + state makes Hermes remember context, but leaves the editor looking like a + clean thread. """ if not self._conn or not state.history: return @@ -882,24 +918,37 @@ class HermesACPAgent(acp.Agent): for message in state.history: role = str(message.get("role") or "") - if role in {"user", "assistant"}: + if role == "user": + text = self._history_message_text(message) + if text: + update = self._history_message_update(role=role, text=text) + if update is not None and not await _send(update): + return + continue + + if role == "assistant": + thought = self._history_reasoning_text(message) + if thought and not await _send(self._history_thought_update(thought)): + return + text = self._history_message_text(message) if text: update = self._history_message_update(role=role, text=text) if update is not None and not await _send(update): return - if role == "assistant" and isinstance(message.get("tool_calls"), list): - for tool_call in message["tool_calls"]: - if not isinstance(tool_call, dict): - continue - tool_call_id = self._history_tool_call_id(tool_call) - if not tool_call_id: - continue - tool_name, args = self._history_tool_call_name_args(tool_call) - active_tool_calls[tool_call_id] = (tool_name, args) - if not await _send(build_tool_start(tool_call_id, tool_name, args)): - return + tool_calls = message.get("tool_calls") + if isinstance(tool_calls, list): + for tool_call in tool_calls: + if not isinstance(tool_call, dict): + continue + tool_call_id = self._history_tool_call_id(tool_call) + if not tool_call_id: + continue + tool_name, args = self._history_tool_call_name_args(tool_call) + active_tool_calls[tool_call_id] = (tool_name, args) + if not await _send(build_tool_start(tool_call_id, tool_name, args)): + return continue if role == "tool": diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py index 511d6e00934..a7bd0367b67 100644 --- a/tests/acp/test_server.py +++ b/tests/acp/test_server.py @@ -13,6 +13,7 @@ from acp.schema import ( AgentCapabilities, AgentMessageChunk, AgentPlanUpdate, + AgentThoughtChunk, AuthenticateResponse, AvailableCommandsUpdate, Implementation, @@ -466,6 +467,211 @@ class TestSessionOps: for update in updates ) + @pytest.mark.asyncio + async def test_load_session_replays_reasoning_thought_before_message(self, agent): + """Thinking-model thoughts must be replayed via ``agent_thought_chunk``. + + Regression for #12285 — when a session is loaded, persisted assistant + ``reasoning_content`` / ``reasoning`` fields must surface as ACP + ``AgentThoughtChunk`` notifications in the same relative position they + had live (thought streams before the assistant message text), so Zed's + collapsed Thinking pane rebuilds instead of vanishing on reconnect. + """ + mock_conn = MagicMock(spec=acp.Client) + mock_conn.session_update = AsyncMock() + agent._conn = mock_conn + + new_resp = await agent.new_session(cwd="/tmp") + state = agent.session_manager.get_session(new_resp.session_id) + state.history = [ + {"role": "user", "content": "Walk me through it."}, + { + "role": "assistant", + "reasoning_content": "Let me think step by step about the request.", + "content": "Here is the plan.", + }, + {"role": "user", "content": "And the legacy case?"}, + { + "role": "assistant", + # No reasoning_content — exercise the legacy "reasoning" fallback + # path so sessions persisted before #16892 still replay thoughts. + "reasoning": "Older sessions stored the trace under the internal key.", + "content": "Same idea, older field name.", + }, + ] + + mock_conn.session_update.reset_mock() + resp = await agent.load_session(cwd="/tmp", session_id=new_resp.session_id) + await asyncio.sleep(0) + await asyncio.sleep(0) + + assert isinstance(resp, LoadSessionResponse) + + replay_kinds = [ + getattr(call.kwargs.get("update"), "session_update", None) + for call in mock_conn.session_update.await_args_list + if getattr(call.kwargs.get("update"), "session_update", None) + in {"user_message_chunk", "agent_message_chunk", "agent_thought_chunk"} + ] + assert replay_kinds == [ + "user_message_chunk", + "agent_thought_chunk", + "agent_message_chunk", + "user_message_chunk", + "agent_thought_chunk", + "agent_message_chunk", + ] + + thought_updates = [ + call.kwargs["update"] + for call in mock_conn.session_update.await_args_list + if isinstance(call.kwargs.get("update"), AgentThoughtChunk) + ] + assert len(thought_updates) == 2 + assert thought_updates[0].content.text == "Let me think step by step about the request." + assert thought_updates[1].content.text == "Older sessions stored the trace under the internal key." + + @pytest.mark.asyncio + async def test_load_session_replays_reasoning_only_turn(self, agent): + """Assistant turns with reasoning but no content should still emit a thought. + + Pure reasoning-only assistant entries (e.g. a thinking step before a + tool-call turn) commonly carry ``reasoning_content`` with empty + ``content``. The replay must still surface the thought so the editor's + Thinking pane rebuilds, even when there is no message text to follow. + """ + mock_conn = MagicMock(spec=acp.Client) + mock_conn.session_update = AsyncMock() + agent._conn = mock_conn + + new_resp = await agent.new_session(cwd="/tmp") + state = agent.session_manager.get_session(new_resp.session_id) + state.history = [ + { + "role": "assistant", + "reasoning_content": "I should call the search tool next.", + "content": "", + }, + ] + + mock_conn.session_update.reset_mock() + await agent.load_session(cwd="/tmp", session_id=new_resp.session_id) + await asyncio.sleep(0) + await asyncio.sleep(0) + + thought_updates = [ + call.kwargs["update"] + for call in mock_conn.session_update.await_args_list + if isinstance(call.kwargs.get("update"), AgentThoughtChunk) + ] + message_updates = [ + call.kwargs["update"] + for call in mock_conn.session_update.await_args_list + if isinstance(call.kwargs.get("update"), AgentMessageChunk) + ] + assert len(thought_updates) == 1 + assert thought_updates[0].content.text == "I should call the search tool next." + assert message_updates == [] + + @pytest.mark.asyncio + async def test_load_session_skips_empty_reasoning_fields(self, agent): + """Empty/whitespace reasoning fields must not produce notifications.""" + mock_conn = MagicMock(spec=acp.Client) + mock_conn.session_update = AsyncMock() + agent._conn = mock_conn + + new_resp = await agent.new_session(cwd="/tmp") + state = agent.session_manager.get_session(new_resp.session_id) + state.history = [ + { + "role": "assistant", + "reasoning_content": "", + "reasoning": " \n\t", + "content": "Just a regular answer.", + }, + ] + + mock_conn.session_update.reset_mock() + await agent.load_session(cwd="/tmp", session_id=new_resp.session_id) + await asyncio.sleep(0) + await asyncio.sleep(0) + + thought_updates = [ + call.kwargs["update"] + for call in mock_conn.session_update.await_args_list + if isinstance(call.kwargs.get("update"), AgentThoughtChunk) + ] + assert thought_updates == [] + + @pytest.mark.asyncio + async def test_load_session_replays_thought_then_tool_call_without_message(self, agent): + """Canonical thinking-model shape: reasoning + tool_call + no body text. + + Thinking models commonly emit a pre-tool thought followed by a + tool_calls turn with empty ``content``. Replay must emit: + ``agent_thought_chunk`` then ``tool_call`` then ``tool_call_update`` + for the matching tool result — and crucially, NO ``agent_message_chunk`` + for the empty-text assistant body. Regression for the canonical + thinking-then-tool flow on #12285. + """ + mock_conn = MagicMock(spec=acp.Client) + mock_conn.session_update = AsyncMock() + agent._conn = mock_conn + + new_resp = await agent.new_session(cwd="/tmp") + state = agent.session_manager.get_session(new_resp.session_id) + state.history = [ + {"role": "user", "content": "Find the bug."}, + { + "role": "assistant", + "reasoning_content": "I should grep for the function name first.", + "content": "", + "tool_calls": [ + { + "id": "call_grep_1", + "type": "function", + "function": { + "name": "search_files", + "arguments": '{"pattern":"foo","path":"."}', + }, + } + ], + }, + { + "role": "tool", + "tool_call_id": "call_grep_1", + "content": '{"total_count":1,"matches":[{"path":"x.py","line":1,"content":"foo"}]}', + }, + ] + + mock_conn.session_update.reset_mock() + await agent.load_session(cwd="/tmp", session_id=new_resp.session_id) + await asyncio.sleep(0) + await asyncio.sleep(0) + + kinds = [ + getattr(call.kwargs.get("update"), "session_update", None) + for call in mock_conn.session_update.await_args_list + if getattr(call.kwargs.get("update"), "session_update", None) + in { + "user_message_chunk", + "agent_thought_chunk", + "agent_message_chunk", + "tool_call", + "tool_call_update", + } + ] + # No agent_message_chunk for the empty-content assistant turn. + assert "agent_message_chunk" not in kinds + # Thought must precede the tool_call_start within the assistant turn, + # and the tool result follows. + assert kinds == [ + "user_message_chunk", + "agent_thought_chunk", + "tool_call", + "tool_call_update", + ] + @pytest.mark.asyncio async def test_load_session_schedules_history_replay_after_response(self, agent): """Zed only attaches replayed updates after session/load has completed."""