mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-24 05:41:40 +00:00
fix(gateway): preserve reasoning_content, codex_message_items, finish_reason on transcript replay (#22839)
PR #2974 whitelisted three reasoning fields (reasoning, reasoning_details, codex_reasoning_items) for the gateway's simple-text replay branch. Three more fields were added to the DB later but the whitelist was never updated: - reasoning_content: provider-facing thinking text. _copy_reasoning_content_for_api promotes 'reasoning' -> 'reasoning_content' at send time only when the strings happen to match. Carrying the original verbatim avoids loss for providers that return them as distinct fields (DeepSeek/Kimi/ Moonshot thinking modes), and preserves the empty-string sentinel that DeepSeek V4 Pro requires for thinking-mode replay. - codex_message_items: exact assistant message items with 'phase'. OpenAI docs: 'preserve and resend phase on all assistant messages — dropping it can degrade performance.' Required for prefix cache hits. No recovery path exists — once dropped, gone. - finish_reason: informational; cheap to keep so transcripts replay identically across CLI and gateway. The CLI is unaffected because cli.py keeps the live in-memory message list across turns (cli.py:10046 'self.conversation_history = result["messages"]'). The gateway rebuilds agent_history from the SQLite transcript on every turn, so any field stripped during replay is silently lost. Refactors the inline whitelist into a module-level _build_replay_entry() helper so the contract can be unit-tested. 16 new tests pin the field set and falsy-value handling. Verified end-to-end: DB stores all 8 fields, replay now preserves all 8 (was preserving only 5 for assistant text turns).
This commit is contained in:
parent
c7f0aab949
commit
70bfd429e5
2 changed files with 332 additions and 11 deletions
|
|
@ -203,6 +203,78 @@ def _is_fresh_gateway_interruption(
|
|||
return current - timestamp <= window
|
||||
|
||||
|
||||
# Assistant-message fields that must survive transcript replay so multi-turn
|
||||
# reasoning context, prefix-cache hits, and provider-specific echo
|
||||
# requirements all behave the same on the gateway as they do in the CLI.
|
||||
#
|
||||
# ``reasoning`` and ``reasoning_details`` were the original three preserved
|
||||
# by PR #2974 (schema v6). ``reasoning_content``, ``codex_reasoning_items``,
|
||||
# ``codex_message_items``, and ``finish_reason`` were added to the DB later
|
||||
# but the gateway's replay whitelist was never expanded to match — so any
|
||||
# pure-text assistant turn (no ``tool_calls``) silently dropped them on
|
||||
# replay, regressing the CLI-vs-gateway behavioural parity.
|
||||
#
|
||||
# Why each field matters on replay:
|
||||
# * ``reasoning`` / ``reasoning_content``: provider-facing thinking text.
|
||||
# ``_copy_reasoning_content_for_api`` promotes ``reasoning`` →
|
||||
# ``reasoning_content`` at send time, but only when the strings happen to
|
||||
# match. Carrying the original ``reasoning_content`` verbatim avoids
|
||||
# reconstruction loss for providers that return them as distinct fields
|
||||
# (DeepSeek/Kimi/Moonshot thinking modes).
|
||||
# * ``reasoning_details``: opaque structured array (signature,
|
||||
# encrypted_content) used by OpenRouter/Anthropic to maintain reasoning
|
||||
# continuity across turns.
|
||||
# * ``codex_reasoning_items``: encrypted reasoning blobs for the OpenAI
|
||||
# Codex Responses API.
|
||||
# * ``codex_message_items``: exact assistant message items with ``phase``.
|
||||
# OpenAI docs: "preserve and resend phase on all assistant messages —
|
||||
# dropping it can degrade performance." Required for prefix cache hits.
|
||||
# * ``finish_reason``: informational; cheap to keep so transcripts replay
|
||||
# identically across CLI and gateway.
|
||||
_ASSISTANT_REPLAY_FIELDS: tuple[str, ...] = (
|
||||
"reasoning",
|
||||
"reasoning_content",
|
||||
"reasoning_details",
|
||||
"codex_reasoning_items",
|
||||
"codex_message_items",
|
||||
"finish_reason",
|
||||
)
|
||||
|
||||
|
||||
def _build_replay_entry(role: str, content: Any, msg: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Build a replay entry for a non-tool-calling message, preserving the
|
||||
assistant fields the agent's API builders rely on for multi-turn fidelity.
|
||||
|
||||
Lifted out of the inline ``run_sync`` closure so the field whitelist can
|
||||
be unit-tested in isolation. Mirrors the ``_ASSISTANT_REPLAY_FIELDS``
|
||||
contract above.
|
||||
|
||||
Empty values: most fields are dropped when falsy (matching the original
|
||||
PR #2974 behaviour) since an empty list/string for those carries no
|
||||
information. The exception is ``reasoning_content``: DeepSeek/Kimi
|
||||
thinking-mode replay treats an empty string as a meaningful sentinel
|
||||
that ``_copy_reasoning_content_for_api`` upgrades to a single space.
|
||||
Dropping it here would make the gateway send no ``reasoning_content`` at
|
||||
all on the next turn, which can cause HTTP 400 from strict thinking
|
||||
providers.
|
||||
"""
|
||||
entry: Dict[str, Any] = {"role": role, "content": content}
|
||||
if role == "assistant":
|
||||
for _rkey in _ASSISTANT_REPLAY_FIELDS:
|
||||
if _rkey not in msg:
|
||||
continue
|
||||
_rval = msg.get(_rkey)
|
||||
if _rkey == "reasoning_content":
|
||||
# Preserve empty-string sentinel for thinking-mode replay.
|
||||
if _rval is None:
|
||||
continue
|
||||
else:
|
||||
if not _rval:
|
||||
continue
|
||||
entry[_rkey] = _rval
|
||||
return entry
|
||||
|
||||
|
||||
def _last_transcript_timestamp(history: Optional[List[Dict[str, Any]]]) -> Any:
|
||||
"""Return the ``timestamp`` of the last usable transcript row, if any.
|
||||
|
||||
|
|
@ -14412,17 +14484,12 @@ class GatewayRunner:
|
|||
if msg.get("mirror"):
|
||||
mirror_src = msg.get("mirror_source", "another session")
|
||||
content = f"[Delivered from {mirror_src}] {content}"
|
||||
entry = {"role": role, "content": content}
|
||||
# Preserve reasoning fields on assistant messages so
|
||||
# multi-turn reasoning context survives session reload.
|
||||
# The agent's _build_api_kwargs converts these to the
|
||||
# provider-specific format (reasoning_content, etc.).
|
||||
if role == "assistant":
|
||||
for _rkey in ("reasoning", "reasoning_details",
|
||||
"codex_reasoning_items"):
|
||||
_rval = msg.get(_rkey)
|
||||
if _rval:
|
||||
entry[_rkey] = _rval
|
||||
# Preserve assistant reasoning + Codex replay fields so
|
||||
# multi-turn reasoning context, prefix-cache hits, and
|
||||
# provider-specific echo requirements survive session
|
||||
# reload. See ``_ASSISTANT_REPLAY_FIELDS`` for the full
|
||||
# whitelist and rationale.
|
||||
entry = _build_replay_entry(role, content, msg)
|
||||
agent_history.append(entry)
|
||||
|
||||
# Collect MEDIA paths already in history so we can exclude them
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue