hermes-agent/tests/agent/test_anthropic_kwargs_sanitize.py
Teknium 399b8ee5f0
fix(anthropic): strip Responses-only kwargs before Messages SDK call (#31673) (#42155)
A Responses-API-shaped payload carrying instructions=/input=/store=/
parallel_tool_calls= can reach the native Anthropic messages.stream() /
messages.create() call under a rare api_mode-flip race (e.g. a concurrent
auxiliary vision call mutating a shared agent between the kwargs build and
the stream dispatch). The Anthropic SDK rejects these with a non-retryable
TypeError that kills the whole turn and propagates the entire fallback chain.

Add sanitize_anthropic_kwargs() at both Anthropic dispatch sites: it drops
the Responses-only keys in place and logs a WARNING (with #31673 breadcrumb)
when one is present, so the underlying race stays visible in the wild
instead of being silently papered over.
2026-06-08 09:36:38 -07:00

94 lines
3.1 KiB
Python

"""Tests for sanitize_anthropic_kwargs (#31673).
Guards the Anthropic Messages dispatch boundary against Responses-API-only
kwargs (``instructions``, ``input``, ``store``, ``parallel_tool_calls``)
leaking in under an api_mode-flip race. The Anthropic SDK raises a
non-retryable ``TypeError`` on any of them, killing the whole turn.
"""
import logging
import pytest
from agent.anthropic_adapter import (
_RESPONSES_ONLY_KWARGS,
sanitize_anthropic_kwargs,
)
def _fake_anthropic_call(**kwargs):
"""Mimic the Anthropic SDK's strict kwarg signature."""
allowed = {
"model", "messages", "max_tokens", "system", "tools", "tool_choice",
"extra_body", "extra_headers", "temperature", "top_p", "top_k",
"thinking", "timeout",
}
bad = set(kwargs) - allowed
if bad:
raise TypeError(
"Messages.stream() got an unexpected keyword argument "
f"{sorted(bad)[0]!r}"
)
return "OK"
def test_bare_leaked_payload_reproduces_the_typeerror():
"""Without the guard, a Responses-shaped payload raises the issue's error."""
with pytest.raises(TypeError, match="unexpected keyword argument"):
_fake_anthropic_call(model="claude-sonnet-4-6", instructions="sys")
def test_strips_all_responses_only_keys():
payload = {
"model": "claude-sonnet-4-6",
"instructions": "You are Hermes.",
"input": [{"role": "user", "content": "hi"}],
"store": False,
"parallel_tool_calls": True,
}
out = sanitize_anthropic_kwargs(payload)
assert out is payload # mutates in place and returns same dict
assert payload == {"model": "claude-sonnet-4-6"}
assert _fake_anthropic_call(**payload) == "OK"
def test_clean_anthropic_payload_is_untouched():
payload = {
"model": "claude-sonnet-4-6",
"messages": [{"role": "user", "content": "hi"}],
"max_tokens": 1024,
"system": "sys",
"tools": [{"name": "x"}],
}
snapshot = dict(payload)
sanitize_anthropic_kwargs(payload)
assert payload == snapshot
assert _fake_anthropic_call(**payload) == "OK"
def test_warns_when_keys_are_stripped(caplog):
with caplog.at_level(logging.WARNING, logger="agent.anthropic_adapter"):
sanitize_anthropic_kwargs(
{"model": "m", "instructions": "sys"}, log_prefix="[pfx] "
)
assert any(
"31673" in r.message and "[pfx] " in r.message
for r in caplog.records
), caplog.records
def test_no_warning_on_clean_payload(caplog):
with caplog.at_level(logging.WARNING, logger="agent.anthropic_adapter"):
sanitize_anthropic_kwargs({"model": "m", "messages": []})
assert not caplog.records
def test_non_dict_input_is_noop():
assert sanitize_anthropic_kwargs(None) is None
assert sanitize_anthropic_kwargs("not a dict") == "not a dict"
def test_responses_only_kwargs_membership():
# Contract: instructions (the reported symptom) plus the sibling
# Responses-shape keys are all covered.
assert {"instructions", "input", "store", "parallel_tool_calls"} <= _RESPONSES_ONLY_KWARGS