diff --git a/gateway/run.py b/gateway/run.py index f68e71c9a..db99ad087 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -7216,6 +7216,7 @@ class GatewayRunner: tool_calls=msg.get("tool_calls"), tool_call_id=msg.get("tool_call_id"), reasoning=msg.get("reasoning"), + reasoning_content=msg.get("reasoning_content"), ) except Exception: pass # Best-effort copy diff --git a/gateway/session.py b/gateway/session.py index 7fc83b081..ea3f17490 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -1147,6 +1147,10 @@ class SessionStore: tool_name=message.get("tool_name"), tool_calls=message.get("tool_calls"), tool_call_id=message.get("tool_call_id"), + reasoning=message.get("reasoning") if message.get("role") == "assistant" else None, + reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None, + reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None, + codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None, ) except Exception as e: logger.debug("Session DB operation failed: %s", e) @@ -1176,6 +1180,7 @@ class SessionStore: tool_calls=msg.get("tool_calls"), tool_call_id=msg.get("tool_call_id"), reasoning=msg.get("reasoning") if role == "assistant" else None, + reasoning_content=msg.get("reasoning_content") if role == "assistant" else None, reasoning_details=msg.get("reasoning_details") if role == "assistant" else None, codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None, ) diff --git a/hermes_state.py b/hermes_state.py index 2d8a0fd4a..46f3de6fd 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -31,7 +31,7 @@ T = TypeVar("T") DEFAULT_DB_PATH = get_hermes_home() / "state.db" -SCHEMA_VERSION = 6 +SCHEMA_VERSION = 7 SCHEMA_SQL = """ CREATE TABLE IF NOT EXISTS schema_version ( @@ -80,6 +80,7 @@ CREATE TABLE IF NOT EXISTS messages ( token_count INTEGER, finish_reason TEXT, reasoning TEXT, + reasoning_content TEXT, reasoning_details TEXT, codex_reasoning_items TEXT ); @@ -329,6 +330,15 @@ class SessionDB: except sqlite3.OperationalError: pass # Column already exists cursor.execute("UPDATE schema_version SET version = 6") + if current_version < 7: + # v7: preserve provider-native reasoning_content separately from + # normalized reasoning text. Kimi/Moonshot replay can require + # this field on assistant tool-call messages when thinking is on. + try: + cursor.execute('ALTER TABLE messages ADD COLUMN "reasoning_content" TEXT') + except sqlite3.OperationalError: + pass # Column already exists + cursor.execute("UPDATE schema_version SET version = 7") # Unique title index — always ensure it exists (safe to run after migrations # since the title column is guaranteed to exist at this point) @@ -922,6 +932,7 @@ class SessionDB: token_count: int = None, finish_reason: str = None, reasoning: str = None, + reasoning_content: str = None, reasoning_details: Any = None, codex_reasoning_items: Any = None, ) -> int: @@ -951,8 +962,8 @@ class SessionDB: cursor = conn.execute( """INSERT INTO messages (session_id, role, content, tool_call_id, tool_calls, tool_name, timestamp, token_count, finish_reason, - reasoning, reasoning_details, codex_reasoning_items) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + reasoning, reasoning_content, reasoning_details, codex_reasoning_items) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", ( session_id, role, @@ -964,6 +975,7 @@ class SessionDB: token_count, finish_reason, reasoning, + reasoning_content, reasoning_details_json, codex_items_json, ), @@ -1014,7 +1026,7 @@ class SessionDB: with self._lock: cursor = self._conn.execute( "SELECT role, content, tool_call_id, tool_calls, tool_name, " - "reasoning, reasoning_details, codex_reasoning_items " + "reasoning, reasoning_content, reasoning_details, codex_reasoning_items " "FROM messages WHERE session_id = ? ORDER BY timestamp, id", (session_id,), ) @@ -1038,6 +1050,8 @@ class SessionDB: if row["role"] == "assistant": if row["reasoning"]: msg["reasoning"] = row["reasoning"] + if row["reasoning_content"] is not None: + msg["reasoning_content"] = row["reasoning_content"] if row["reasoning_details"]: try: msg["reasoning_details"] = json.loads(row["reasoning_details"]) diff --git a/run_agent.py b/run_agent.py index b88baf2fa..f21e5e147 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2966,6 +2966,7 @@ class AIAgent: tool_call_id=msg.get("tool_call_id"), finish_reason=msg.get("finish_reason"), reasoning=msg.get("reasoning") if role == "assistant" else None, + reasoning_content=msg.get("reasoning_content") if role == "assistant" else None, reasoning_details=msg.get("reasoning_details") if role == "assistant" else None, codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None, ) @@ -7003,6 +7004,11 @@ class AIAgent: "finish_reason": finish_reason, } + if hasattr(assistant_message, "reasoning_content"): + raw_reasoning_content = getattr(assistant_message, "reasoning_content", None) + if raw_reasoning_content is not None: + msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content) + if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details: # Pass reasoning_details back unmodified so providers (OpenRouter, # Anthropic, OpenAI) can maintain reasoning continuity across turns. @@ -7077,6 +7083,30 @@ class AIAgent: return msg + def _copy_reasoning_content_for_api(self, source_msg: dict, api_msg: dict) -> None: + """Copy provider-facing reasoning fields onto an API replay message.""" + if source_msg.get("role") != "assistant": + return + + explicit_reasoning = source_msg.get("reasoning_content") + if isinstance(explicit_reasoning, str): + api_msg["reasoning_content"] = explicit_reasoning + return + + normalized_reasoning = source_msg.get("reasoning") + if isinstance(normalized_reasoning, str) and normalized_reasoning: + api_msg["reasoning_content"] = normalized_reasoning + return + + kimi_requires_reasoning = ( + self.provider in {"kimi-coding", "kimi-coding-cn"} + or base_url_host_matches(self.base_url, "api.kimi.com") + or base_url_host_matches(self.base_url, "moonshot.ai") + or base_url_host_matches(self.base_url, "moonshot.cn") + ) + if kimi_requires_reasoning and source_msg.get("tool_calls"): + api_msg["reasoning_content"] = "" + @staticmethod def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict: """Strip Codex Responses API fields from tool_calls for strict providers. @@ -7160,10 +7190,7 @@ class AIAgent: api_messages = [] for msg in messages: api_msg = msg.copy() - if msg.get("role") == "assistant": - reasoning = msg.get("reasoning") - if reasoning: - api_msg["reasoning_content"] = reasoning + self._copy_reasoning_content_for_api(msg, api_msg) api_msg.pop("reasoning", None) api_msg.pop("finish_reason", None) api_msg.pop("_flush_sentinel", None) @@ -8923,11 +8950,7 @@ class AIAgent: # For ALL assistant messages, pass reasoning back to the API # This ensures multi-turn reasoning context is preserved - if msg.get("role") == "assistant": - reasoning_text = msg.get("reasoning") - if reasoning_text: - # Add reasoning_content for API compatibility (Moonshot AI, Novita, OpenRouter) - api_msg["reasoning_content"] = reasoning_text + self._copy_reasoning_content_for_api(msg, api_msg) # Remove 'reasoning' field - it's for trajectory storage only # We've copied it to 'reasoning_content' for the API above diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index bf1eba51d..539b12a5e 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -1059,6 +1059,7 @@ class TestRewriteTranscriptPreservesReasoning: role="assistant", content="The answer is 42.", reasoning="I need to think step by step.", + reasoning_content="provider scratchpad", reasoning_details=[{"type": "summary", "text": "step by step"}], codex_reasoning_items=[{"id": "r1", "type": "reasoning"}], ) @@ -1066,6 +1067,7 @@ class TestRewriteTranscriptPreservesReasoning: # Verify all three were stored before = db.get_messages_as_conversation(session_id) assert before[0].get("reasoning") == "I need to think step by step." + assert before[0].get("reasoning_content") == "provider scratchpad" assert before[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}] assert before[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}] @@ -1082,5 +1084,6 @@ class TestRewriteTranscriptPreservesReasoning: # Load again — all three reasoning fields must survive after = db.get_messages_as_conversation(session_id) assert after[0].get("reasoning") == "I need to think step by step." + assert after[0].get("reasoning_content") == "provider scratchpad" assert after[0].get("reasoning_details") == [{"type": "summary", "text": "step by step"}] assert after[0].get("codex_reasoning_items") == [{"id": "r1", "type": "reasoning"}] diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index db16df33d..fc252c744 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1216,6 +1216,15 @@ class TestBuildAssistantMessage: result = agent._build_assistant_message(msg, "stop") assert result["reasoning"] == "thinking" + def test_reasoning_content_preserved_separately(self, agent): + msg = _mock_assistant_msg( + content="answer", + reasoning="summary", + reasoning_content="provider scratchpad", + ) + result = agent._build_assistant_message(msg, "stop") + assert result["reasoning_content"] == "provider scratchpad" + def test_with_tool_calls(self, agent): tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") msg = _mock_assistant_msg(content="", tool_calls=[tc]) @@ -4188,6 +4197,90 @@ class TestPersistUserMessageOverride: assert first_db_write["content"] == "Hello there" +class TestReasoningReplayForStrictProviders: + """Assistant replay must preserve provider-native reasoning fields.""" + + def _setup_agent(self, agent): + agent._cached_system_prompt = "You are helpful." + agent._use_prompt_caching = False + agent.tool_delay = 0 + agent.compression_enabled = False + agent.save_trajectories = False + + def test_kimi_tool_replay_includes_empty_reasoning_content(self, agent): + self._setup_agent(agent) + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.provider = "kimi-coding" + + prior_assistant = { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "c1", + "type": "function", + "function": {"name": "terminal", "arguments": "{\"command\":\"date\"}"}, + } + ], + } + tool_result = {"role": "tool", "tool_call_id": "c1", "content": "Tue Apr 21"} + final_resp = _mock_response(content="done", finish_reason="stop") + agent.client.chat.completions.create.return_value = final_resp + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation( + "next step", + conversation_history=[prior_assistant, tool_result], + ) + + assert result["completed"] is True + sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"] + replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant") + assert replayed_assistant["role"] == "assistant" + assert replayed_assistant["tool_calls"][0]["function"]["name"] == "terminal" + assert "reasoning_content" in replayed_assistant + assert replayed_assistant["reasoning_content"] == "" + + def test_explicit_reasoning_content_beats_normalized_reasoning_on_replay(self, agent): + self._setup_agent(agent) + prior_assistant = { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "c1", + "type": "function", + "function": {"name": "web_search", "arguments": "{\"q\":\"test\"}"}, + } + ], + "reasoning": "summary reasoning", + "reasoning_content": "provider-native scratchpad", + } + tool_result = {"role": "tool", "tool_call_id": "c1", "content": "ok"} + final_resp = _mock_response(content="done", finish_reason="stop") + agent.client.chat.completions.create.return_value = final_resp + + with ( + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation( + "next step", + conversation_history=[prior_assistant, tool_result], + ) + + assert result["completed"] is True + sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"] + replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant") + assert replayed_assistant["reasoning_content"] == "provider-native scratchpad" + + # --------------------------------------------------------------------------- # Bugfix: _vprint force=True on error messages during TTS # --------------------------------------------------------------------------- diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index dfb2445c5..49fea324d 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -255,6 +255,38 @@ class TestMessageStorage: assert msg["reasoning"] == "Thinking about what to say" assert msg["reasoning_details"] == details + def test_reasoning_content_persisted_and_restored(self, db): + """reasoning_content must survive session replay as its own field.""" + db.create_session(session_id="s1", source="cli") + db.append_message( + "s1", + role="assistant", + content="Hello", + reasoning="Short summary", + reasoning_content="Longer provider-native scratchpad", + ) + + conv = db.get_messages_as_conversation("s1") + assert len(conv) == 1 + assert conv[0]["reasoning"] == "Short summary" + assert conv[0]["reasoning_content"] == "Longer provider-native scratchpad" + + def test_reasoning_content_empty_string_restored_for_assistant(self, db): + """Empty reasoning_content still needs to round-trip for strict replays.""" + db.create_session(session_id="s1", source="cli") + db.append_message( + "s1", + role="assistant", + content="", + tool_calls=[{"id": "c1", "type": "function", "function": {"name": "date", "arguments": "{}"}}], + reasoning_content="", + ) + + conv = db.get_messages_as_conversation("s1") + assert len(conv) == 1 + assert "reasoning_content" in conv[0] + assert conv[0]["reasoning_content"] == "" + def test_reasoning_not_set_for_non_assistant(self, db): """reasoning is never leaked onto user or tool messages.""" db.create_session(session_id="s1", source="telegram") @@ -1120,7 +1152,7 @@ class TestSchemaInit: def test_schema_version(self, db): cursor = db._conn.execute("SELECT version FROM schema_version") version = cursor.fetchone()[0] - assert version == 6 + assert version == 7 def test_title_column_exists(self, db): """Verify the title column was created in the sessions table.""" @@ -1176,12 +1208,12 @@ class TestSchemaInit: conn.commit() conn.close() - # Open with SessionDB — should migrate to v6 + # Open with SessionDB — should migrate to v7 migrated_db = SessionDB(db_path=db_path) # Verify migration cursor = migrated_db._conn.execute("SELECT version FROM schema_version") - assert cursor.fetchone()[0] == 6 + assert cursor.fetchone()[0] == 7 # Verify title column exists and is NULL for existing sessions session = migrated_db.get_session("existing")