hermes-agent/tests/gateway/test_replay_entry_fields.py

"""Tests for ``gateway.run._build_replay_entry``.

The gateway rebuilds ``agent_history`` from the persisted transcript on every
turn (unlike the CLI, which keeps the live in-memory message list).  When a
pure-text assistant turn (no ``tool_calls``) is replayed, the simple-text
branch in ``run_sync`` used to whitelist only three reasoning fields:
``reasoning``, ``reasoning_details``, ``codex_reasoning_items``.

That whitelist predated three fields the DB now persists:
``reasoning_content``, ``codex_message_items``, and ``finish_reason``.  The
unrecovered drop of ``codex_message_items`` in particular kills prefix-cache
hits for OpenAI Codex Responses API users — OpenAI's docs require the
``phase`` field be replayed on every assistant message.

These tests pin the expanded whitelist so it doesn't regress.
"""
from __future__ import annotations

import pytest

from gateway.run import _ASSISTANT_REPLAY_FIELDS, _build_replay_entry


class TestBuildReplayEntry:
    def test_user_message_has_only_role_and_content(self):
        entry = _build_replay_entry(
            "user",
            "hello",
            {"role": "user", "content": "hello", "reasoning": "leak", "extra": "drop"},
        )
        assert entry == {"role": "user", "content": "hello"}

    def test_tool_message_has_only_role_and_content(self):
        # Tool messages aren't routed through this helper in production
        # (they take the rich-passthrough branch), but the helper itself
        # must not leak reasoning fields onto non-assistant roles even if
        # someone calls it incorrectly.
        entry = _build_replay_entry(
            "tool",
            "result",
            {"role": "tool", "content": "result", "reasoning": "leak"},
        )
        assert entry == {"role": "tool", "content": "result"}

    def test_assistant_minimal_has_only_role_and_content(self):
        entry = _build_replay_entry(
            "assistant",
            "ok",
            {"role": "assistant", "content": "ok"},
        )
        assert entry == {"role": "assistant", "content": "ok"}

    def test_assistant_preserves_reasoning(self):
        msg = {
            "role": "assistant",
            "content": "answer",
            "reasoning": "I think therefore I am.",
        }
        entry = _build_replay_entry("assistant", "answer", msg)
        assert entry["reasoning"] == "I think therefore I am."

    def test_assistant_preserves_reasoning_content(self):
        """reasoning_content was silently dropped before this fix.

        Required for DeepSeek/Kimi/Moonshot thinking-mode echo so the
        provider receives back what it sent.
        """
        msg = {
            "role": "assistant",
            "content": "answer",
            "reasoning_content": "structured CoT",
        }
        entry = _build_replay_entry("assistant", "answer", msg)
        assert entry["reasoning_content"] == "structured CoT"

    def test_assistant_preserves_reasoning_details(self):
        details = [
            {
                "type": "reasoning.summary",
                "format": "text",
                "summary": "thought hard",
            },
            {
                "type": "reasoning.encrypted",
                "data": "opaque_blob",
                "signature": "sig123",
            },
        ]
        msg = {
            "role": "assistant",
            "content": "answer",
            "reasoning_details": details,
        }
        entry = _build_replay_entry("assistant", "answer", msg)
        assert entry["reasoning_details"] == details

    def test_assistant_preserves_codex_reasoning_items(self):
        items = [{"type": "reasoning", "encrypted_content": "blob"}]
        msg = {
            "role": "assistant",
            "content": "answer",
            "codex_reasoning_items": items,
        }
        entry = _build_replay_entry("assistant", "answer", msg)
        assert entry["codex_reasoning_items"] == items

    def test_assistant_preserves_codex_message_items(self):
        """codex_message_items was silently dropped before this fix.

        OpenAI docs: 'preserve and resend phase on all assistant messages
        — dropping it can degrade performance.'  Required for prefix
        cache hits on the Codex Responses API.
        """
        items = [
            {
                "type": "message",
                "role": "assistant",
                "id": "msg_123",
                "phase": "final_answer",
                "content": [{"type": "output_text", "text": "Done"}],
            }
        ]
        msg = {
            "role": "assistant",
            "content": "Done",
            "codex_message_items": items,
        }
        entry = _build_replay_entry("assistant", "Done", msg)
        assert entry["codex_message_items"] == items

    def test_assistant_preserves_finish_reason(self):
        """finish_reason was silently dropped before this fix.

        Cheap to keep; lets transcripts replay byte-identically across
        CLI and gateway.
        """
        msg = {
            "role": "assistant",
            "content": "answer",
            "finish_reason": "stop",
        }
        entry = _build_replay_entry("assistant", "answer", msg)
        assert entry["finish_reason"] == "stop"

    def test_assistant_drops_falsy_reasoning(self):
        """Empty/None reasoning fields stay dropped (matching PR #2974
        behaviour) — empty strings/lists for these fields carry no info."""
        msg = {
            "role": "assistant",
            "content": "answer",
            "reasoning": "",
            "reasoning_details": [],
            "codex_reasoning_items": [],
            "codex_message_items": [],
            "finish_reason": "",
        }
        entry = _build_replay_entry("assistant", "answer", msg)
        assert entry == {"role": "assistant", "content": "answer"}

    def test_assistant_preserves_empty_reasoning_content(self):
        """Empty reasoning_content is a meaningful sentinel.

        DeepSeek V4 Pro thinking mode rejects bare missing reasoning_content
        with HTTP 400.  ``_copy_reasoning_content_for_api`` upgrades the
        empty string to a single space at API-send time, but only if the
        empty string actually reached it.  Dropping it here would 400 the
        next turn for affected providers.
        """
        msg = {
            "role": "assistant",
            "content": "answer",
            "reasoning_content": "",
        }
        entry = _build_replay_entry("assistant", "answer", msg)
        assert "reasoning_content" in entry
        assert entry["reasoning_content"] == ""

    def test_assistant_drops_none_reasoning_content(self):
        """None reasoning_content is just an absent field; drop it."""
        msg = {
            "role": "assistant",
            "content": "answer",
            "reasoning_content": None,
        }
        entry = _build_replay_entry("assistant", "answer", msg)
        assert "reasoning_content" not in entry

    def test_assistant_preserves_all_six_fields_together(self):
        details = [{"type": "reasoning.summary", "summary": "s"}]
        codex_items = [{"type": "reasoning", "encrypted_content": "b"}]
        msg_items = [
            {
                "type": "message",
                "role": "assistant",
                "phase": "final_answer",
                "content": [{"type": "output_text", "text": "x"}],
            }
        ]
        msg = {
            "role": "assistant",
            "content": "answer",
            "reasoning": "thinking",
            "reasoning_content": "structured",
            "reasoning_details": details,
            "codex_reasoning_items": codex_items,
            "codex_message_items": msg_items,
            "finish_reason": "stop",
        }
        entry = _build_replay_entry("assistant", "answer", msg)
        assert entry["reasoning"] == "thinking"
        assert entry["reasoning_content"] == "structured"
        assert entry["reasoning_details"] == details
        assert entry["codex_reasoning_items"] == codex_items
        assert entry["codex_message_items"] == msg_items
        assert entry["finish_reason"] == "stop"

    def test_assistant_does_not_invent_keys(self):
        """The helper only copies over fields that are explicitly present."""
        msg = {"role": "assistant", "content": "answer", "reasoning": "r"}
        entry = _build_replay_entry("assistant", "answer", msg)
        # reasoning_details/etc. weren't in msg, so they shouldn't be in entry
        for absent in (
            "reasoning_content",
            "reasoning_details",
            "codex_reasoning_items",
            "codex_message_items",
            "finish_reason",
        ):
            assert absent not in entry

    def test_replay_fields_constant_is_stable(self):
        """Pin the whitelist explicitly so accidental renames are caught."""
        assert _ASSISTANT_REPLAY_FIELDS == (
            "reasoning",
            "reasoning_content",
            "reasoning_details",
            "codex_reasoning_items",
            "codex_message_items",
            "finish_reason",
        )

    def test_unrelated_keys_are_ignored(self):
        """Random keys on the message must not leak into the replay entry."""
        msg = {
            "role": "assistant",
            "content": "answer",
            "timestamp": 12345.6,
            "internal_marker": "should not flow",
            "tool_call_id": "should not be set on simple-text branch",
        }
        entry = _build_replay_entry("assistant", "answer", msg)
        assert "timestamp" not in entry
        assert "internal_marker" not in entry
        assert "tool_call_id" not in entry