mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
Add agent/transports/types.py with three shared dataclasses: - NormalizedResponse: content, tool_calls, finish_reason, reasoning, usage, provider_data - ToolCall: id, name, arguments, provider_data (per-tool-call protocol metadata) - Usage: prompt_tokens, completion_tokens, total_tokens, cached_tokens Add normalize_anthropic_response_v2() to anthropic_adapter.py — wraps the existing v1 function and maps its output to NormalizedResponse. One call site in run_agent.py (the main normalize branch) uses v2 with a back-compat shim to SimpleNamespace for downstream code. No ABC, no registry, no streaming, no client lifecycle. Those land in PR 3 with the first concrete transport (AnthropicTransport). 46 new tests: - test_types.py: dataclass construction, build_tool_call, map_finish_reason - test_anthropic_normalize_v2.py: v1-vs-v2 regression tests (text, tools, thinking, mixed, stop reasons, mcp prefix stripping, edge cases) Part of the provider transport refactor (PR 2 of 9).
238 lines
8.3 KiB
Python
238 lines
8.3 KiB
Python
"""Regression tests: normalize_anthropic_response_v2 vs v1.
|
|
|
|
Constructs mock Anthropic responses and asserts that the v2 function
|
|
(returning NormalizedResponse) produces identical field values to the
|
|
original v1 function (returning SimpleNamespace + finish_reason).
|
|
"""
|
|
|
|
import json
|
|
import pytest
|
|
from types import SimpleNamespace
|
|
|
|
from agent.anthropic_adapter import (
|
|
normalize_anthropic_response,
|
|
normalize_anthropic_response_v2,
|
|
)
|
|
from agent.transports.types import NormalizedResponse, ToolCall
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers to build mock Anthropic SDK responses
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _text_block(text: str):
|
|
return SimpleNamespace(type="text", text=text)
|
|
|
|
|
|
def _thinking_block(thinking: str, signature: str = "sig_abc"):
|
|
return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
|
|
|
|
|
|
def _tool_use_block(id: str, name: str, input: dict):
|
|
return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
|
|
|
|
|
|
def _response(content_blocks, stop_reason="end_turn"):
|
|
return SimpleNamespace(
|
|
content=content_blocks,
|
|
stop_reason=stop_reason,
|
|
usage=SimpleNamespace(
|
|
input_tokens=10,
|
|
output_tokens=5,
|
|
),
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestTextOnly:
|
|
"""Text-only response — no tools, no thinking."""
|
|
|
|
def setup_method(self):
|
|
self.resp = _response([_text_block("Hello world")])
|
|
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
|
self.v2 = normalize_anthropic_response_v2(self.resp)
|
|
|
|
def test_type(self):
|
|
assert isinstance(self.v2, NormalizedResponse)
|
|
|
|
def test_content_matches(self):
|
|
assert self.v2.content == self.v1_msg.content
|
|
|
|
def test_finish_reason_matches(self):
|
|
assert self.v2.finish_reason == self.v1_finish
|
|
|
|
def test_no_tool_calls(self):
|
|
assert self.v2.tool_calls is None
|
|
assert self.v1_msg.tool_calls is None
|
|
|
|
def test_no_reasoning(self):
|
|
assert self.v2.reasoning is None
|
|
assert self.v1_msg.reasoning is None
|
|
|
|
|
|
class TestWithToolCalls:
|
|
"""Response with tool calls."""
|
|
|
|
def setup_method(self):
|
|
self.resp = _response(
|
|
[
|
|
_text_block("I'll check that"),
|
|
_tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
|
|
_tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
|
|
],
|
|
stop_reason="tool_use",
|
|
)
|
|
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
|
self.v2 = normalize_anthropic_response_v2(self.resp)
|
|
|
|
def test_finish_reason(self):
|
|
assert self.v2.finish_reason == "tool_calls"
|
|
assert self.v1_finish == "tool_calls"
|
|
|
|
def test_tool_call_count(self):
|
|
assert len(self.v2.tool_calls) == 2
|
|
assert len(self.v1_msg.tool_calls) == 2
|
|
|
|
def test_tool_call_ids_match(self):
|
|
for i in range(2):
|
|
assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
|
|
|
|
def test_tool_call_names_match(self):
|
|
assert self.v2.tool_calls[0].name == "terminal"
|
|
assert self.v2.tool_calls[1].name == "read_file"
|
|
for i in range(2):
|
|
assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
|
|
|
|
def test_tool_call_arguments_match(self):
|
|
for i in range(2):
|
|
assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
|
|
|
|
def test_content_preserved(self):
|
|
assert self.v2.content == self.v1_msg.content
|
|
assert "check that" in self.v2.content
|
|
|
|
|
|
class TestWithThinking:
|
|
"""Response with thinking blocks (Claude 3.5+ extended thinking)."""
|
|
|
|
def setup_method(self):
|
|
self.resp = _response([
|
|
_thinking_block("Let me think about this carefully..."),
|
|
_text_block("The answer is 42."),
|
|
])
|
|
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
|
self.v2 = normalize_anthropic_response_v2(self.resp)
|
|
|
|
def test_reasoning_matches(self):
|
|
assert self.v2.reasoning == self.v1_msg.reasoning
|
|
assert "think about this" in self.v2.reasoning
|
|
|
|
def test_reasoning_details_in_provider_data(self):
|
|
v1_details = self.v1_msg.reasoning_details
|
|
v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
|
|
assert v1_details is not None
|
|
assert v2_details is not None
|
|
assert len(v2_details) == len(v1_details)
|
|
|
|
def test_content_excludes_thinking(self):
|
|
assert self.v2.content == "The answer is 42."
|
|
|
|
|
|
class TestMixed:
|
|
"""Response with thinking + text + tool calls."""
|
|
|
|
def setup_method(self):
|
|
self.resp = _response(
|
|
[
|
|
_thinking_block("Planning my approach..."),
|
|
_text_block("I'll run the command"),
|
|
_tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
|
|
],
|
|
stop_reason="tool_use",
|
|
)
|
|
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
|
self.v2 = normalize_anthropic_response_v2(self.resp)
|
|
|
|
def test_all_fields_present(self):
|
|
assert self.v2.content is not None
|
|
assert self.v2.tool_calls is not None
|
|
assert self.v2.reasoning is not None
|
|
assert self.v2.finish_reason == "tool_calls"
|
|
|
|
def test_content_matches(self):
|
|
assert self.v2.content == self.v1_msg.content
|
|
|
|
def test_reasoning_matches(self):
|
|
assert self.v2.reasoning == self.v1_msg.reasoning
|
|
|
|
def test_tool_call_matches(self):
|
|
assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
|
|
assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
|
|
|
|
|
|
class TestStopReasons:
|
|
"""Verify finish_reason mapping matches between v1 and v2."""
|
|
|
|
@pytest.mark.parametrize("stop_reason,expected", [
|
|
("end_turn", "stop"),
|
|
("tool_use", "tool_calls"),
|
|
("max_tokens", "length"),
|
|
("stop_sequence", "stop"),
|
|
("refusal", "content_filter"),
|
|
("model_context_window_exceeded", "length"),
|
|
("unknown_future_reason", "stop"),
|
|
])
|
|
def test_stop_reason_mapping(self, stop_reason, expected):
|
|
resp = _response([_text_block("x")], stop_reason=stop_reason)
|
|
v1_msg, v1_finish = normalize_anthropic_response(resp)
|
|
v2 = normalize_anthropic_response_v2(resp)
|
|
assert v2.finish_reason == v1_finish == expected
|
|
|
|
|
|
class TestStripToolPrefix:
|
|
"""Verify mcp_ prefix stripping works identically."""
|
|
|
|
def test_prefix_stripped(self):
|
|
resp = _response(
|
|
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
|
|
stop_reason="tool_use",
|
|
)
|
|
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
|
|
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
|
|
assert v1_msg.tool_calls[0].function.name == "terminal"
|
|
assert v2.tool_calls[0].name == "terminal"
|
|
|
|
def test_prefix_kept(self):
|
|
resp = _response(
|
|
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
|
|
stop_reason="tool_use",
|
|
)
|
|
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
|
|
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
|
|
assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
|
|
assert v2.tool_calls[0].name == "mcp_terminal"
|
|
|
|
|
|
class TestEdgeCases:
|
|
"""Edge cases: empty content, no blocks, etc."""
|
|
|
|
def test_empty_content_blocks(self):
|
|
resp = _response([])
|
|
v1_msg, v1_finish = normalize_anthropic_response(resp)
|
|
v2 = normalize_anthropic_response_v2(resp)
|
|
assert v2.content == v1_msg.content
|
|
assert v2.content is None
|
|
|
|
def test_no_reasoning_details_means_none_provider_data(self):
|
|
resp = _response([_text_block("hi")])
|
|
v2 = normalize_anthropic_response_v2(resp)
|
|
assert v2.provider_data is None
|
|
|
|
def test_v2_returns_dataclass_not_namespace(self):
|
|
resp = _response([_text_block("hi")])
|
|
v2 = normalize_anthropic_response_v2(resp)
|
|
assert isinstance(v2, NormalizedResponse)
|
|
assert not isinstance(v2, SimpleNamespace)
|