hermes-agent/tests/agent/test_anthropic_normalize_v2.py

"""Regression tests: normalize_anthropic_response_v2 vs v1.

Constructs mock Anthropic responses and asserts that the v2 function
(returning NormalizedResponse) produces identical field values to the
original v1 function (returning SimpleNamespace + finish_reason).
"""

import json
import pytest
from types import SimpleNamespace

from agent.anthropic_adapter import (
    normalize_anthropic_response,
    normalize_anthropic_response_v2,
)
from agent.transports.types import NormalizedResponse, ToolCall


# ---------------------------------------------------------------------------
# Helpers to build mock Anthropic SDK responses
# ---------------------------------------------------------------------------

def _text_block(text: str):
    return SimpleNamespace(type="text", text=text)


def _thinking_block(thinking: str, signature: str = "sig_abc"):
    return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)


def _tool_use_block(id: str, name: str, input: dict):
    return SimpleNamespace(type="tool_use", id=id, name=name, input=input)


def _response(content_blocks, stop_reason="end_turn"):
    return SimpleNamespace(
        content=content_blocks,
        stop_reason=stop_reason,
        usage=SimpleNamespace(
            input_tokens=10,
            output_tokens=5,
        ),
    )


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------

class TestTextOnly:
    """Text-only response — no tools, no thinking."""

    def setup_method(self):
        self.resp = _response([_text_block("Hello world")])
        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
        self.v2 = normalize_anthropic_response_v2(self.resp)

    def test_type(self):
        assert isinstance(self.v2, NormalizedResponse)

    def test_content_matches(self):
        assert self.v2.content == self.v1_msg.content

    def test_finish_reason_matches(self):
        assert self.v2.finish_reason == self.v1_finish

    def test_no_tool_calls(self):
        assert self.v2.tool_calls is None
        assert self.v1_msg.tool_calls is None

    def test_no_reasoning(self):
        assert self.v2.reasoning is None
        assert self.v1_msg.reasoning is None


class TestWithToolCalls:
    """Response with tool calls."""

    def setup_method(self):
        self.resp = _response(
            [
                _text_block("I'll check that"),
                _tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
                _tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
            ],
            stop_reason="tool_use",
        )
        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
        self.v2 = normalize_anthropic_response_v2(self.resp)

    def test_finish_reason(self):
        assert self.v2.finish_reason == "tool_calls"
        assert self.v1_finish == "tool_calls"

    def test_tool_call_count(self):
        assert len(self.v2.tool_calls) == 2
        assert len(self.v1_msg.tool_calls) == 2

    def test_tool_call_ids_match(self):
        for i in range(2):
            assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id

    def test_tool_call_names_match(self):
        assert self.v2.tool_calls[0].name == "terminal"
        assert self.v2.tool_calls[1].name == "read_file"
        for i in range(2):
            assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name

    def test_tool_call_arguments_match(self):
        for i in range(2):
            assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments

    def test_content_preserved(self):
        assert self.v2.content == self.v1_msg.content
        assert "check that" in self.v2.content


class TestWithThinking:
    """Response with thinking blocks (Claude 3.5+ extended thinking)."""

    def setup_method(self):
        self.resp = _response([
            _thinking_block("Let me think about this carefully..."),
            _text_block("The answer is 42."),
        ])
        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
        self.v2 = normalize_anthropic_response_v2(self.resp)

    def test_reasoning_matches(self):
        assert self.v2.reasoning == self.v1_msg.reasoning
        assert "think about this" in self.v2.reasoning

    def test_reasoning_details_in_provider_data(self):
        v1_details = self.v1_msg.reasoning_details
        v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
        assert v1_details is not None
        assert v2_details is not None
        assert len(v2_details) == len(v1_details)

    def test_content_excludes_thinking(self):
        assert self.v2.content == "The answer is 42."


class TestMixed:
    """Response with thinking + text + tool calls."""

    def setup_method(self):
        self.resp = _response(
            [
                _thinking_block("Planning my approach..."),
                _text_block("I'll run the command"),
                _tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
            ],
            stop_reason="tool_use",
        )
        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
        self.v2 = normalize_anthropic_response_v2(self.resp)

    def test_all_fields_present(self):
        assert self.v2.content is not None
        assert self.v2.tool_calls is not None
        assert self.v2.reasoning is not None
        assert self.v2.finish_reason == "tool_calls"

    def test_content_matches(self):
        assert self.v2.content == self.v1_msg.content

    def test_reasoning_matches(self):
        assert self.v2.reasoning == self.v1_msg.reasoning

    def test_tool_call_matches(self):
        assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
        assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name


class TestStopReasons:
    """Verify finish_reason mapping matches between v1 and v2."""

    @pytest.mark.parametrize("stop_reason,expected", [
        ("end_turn", "stop"),
        ("tool_use", "tool_calls"),
        ("max_tokens", "length"),
        ("stop_sequence", "stop"),
        ("refusal", "content_filter"),
        ("model_context_window_exceeded", "length"),
        ("unknown_future_reason", "stop"),
    ])
    def test_stop_reason_mapping(self, stop_reason, expected):
        resp = _response([_text_block("x")], stop_reason=stop_reason)
        v1_msg, v1_finish = normalize_anthropic_response(resp)
        v2 = normalize_anthropic_response_v2(resp)
        assert v2.finish_reason == v1_finish == expected


class TestStripToolPrefix:
    """Verify mcp_ prefix stripping works identically."""

    def test_prefix_stripped(self):
        resp = _response(
            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
            stop_reason="tool_use",
        )
        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
        assert v1_msg.tool_calls[0].function.name == "terminal"
        assert v2.tool_calls[0].name == "terminal"

    def test_prefix_kept(self):
        resp = _response(
            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
            stop_reason="tool_use",
        )
        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
        assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
        assert v2.tool_calls[0].name == "mcp_terminal"


class TestEdgeCases:
    """Edge cases: empty content, no blocks, etc."""

    def test_empty_content_blocks(self):
        resp = _response([])
        v1_msg, v1_finish = normalize_anthropic_response(resp)
        v2 = normalize_anthropic_response_v2(resp)
        assert v2.content == v1_msg.content
        assert v2.content is None

    def test_no_reasoning_details_means_none_provider_data(self):
        resp = _response([_text_block("hi")])
        v2 = normalize_anthropic_response_v2(resp)
        assert v2.provider_data is None

    def test_v2_returns_dataclass_not_namespace(self):
        resp = _response([_text_block("hi")])
        v2 = normalize_anthropic_response_v2(resp)
        assert isinstance(v2, NormalizedResponse)
        assert not isinstance(v2, SimpleNamespace)