diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index a4a211843ee..bf3f4aef859 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -2301,3 +2301,43 @@ def build_anthropic_kwargs( kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)} return kwargs + + +# Keys that belong exclusively to the OpenAI Responses / Codex API shape. +# The Anthropic Messages SDK (``messages.create()`` / ``messages.stream()``) +# raises ``TypeError: ... got an unexpected keyword argument`` on any of them. +_RESPONSES_ONLY_KWARGS = frozenset( + {"instructions", "input", "store", "parallel_tool_calls"} +) + + +def sanitize_anthropic_kwargs(api_kwargs: Any, *, log_prefix: str = "") -> Any: + """Drop Responses-API-only keys before an Anthropic Messages SDK call. + + Defensive boundary guard for #31673: under rare api_mode-flip races + (e.g. a concurrent auxiliary call mutating a shared agent between the + kwargs build and the stream dispatch), a Responses-shaped payload + carrying ``instructions=`` can reach ``messages.stream()`` / + ``messages.create()``. The Anthropic SDK rejects it with a + non-retryable ``TypeError`` that nukes the whole turn and propagates + the entire fallback chain. + + Mutates ``api_kwargs`` in place and returns it. When a foreign key is + present we log a WARNING so the underlying race stays visible in the + wild instead of being silently papered over. + """ + if not isinstance(api_kwargs, dict): + return api_kwargs + leaked = _RESPONSES_ONLY_KWARGS.intersection(api_kwargs) + if leaked: + for _key in leaked: + api_kwargs.pop(_key, None) + logger.warning( + "%sStripped Responses-only kwarg(s) %s from an Anthropic Messages " + "call (api_mode flip race — see #31673). The call will proceed; " + "this breadcrumb means a kwargs build ran under a Responses " + "api_mode while dispatch ran under anthropic_messages.", + log_prefix, + sorted(leaked), + ) + return api_kwargs diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index 4a220a0e356..3f483789ede 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -2024,6 +2024,14 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta= # Per-attempt diagnostic dict for the retry block to consume. _diag = agent._stream_diag_init() request_client_holder["diag"] = _diag + # Defensive: strip Responses-only kwargs (instructions, input, ...) + # that can leak in under an api_mode-flip race. The Anthropic SDK + # raises a non-retryable TypeError on them, killing the turn. See + # #31673 / sanitize_anthropic_kwargs(). + from agent.anthropic_adapter import sanitize_anthropic_kwargs + sanitize_anthropic_kwargs( + api_kwargs, log_prefix=getattr(agent, "log_prefix", "") + ) # Use the Anthropic SDK's streaming context manager with agent._anthropic_client.messages.stream(**api_kwargs) as stream: # The Anthropic SDK exposes the raw httpx response on diff --git a/run_agent.py b/run_agent.py index 015cf7d2b23..6a1304f42f9 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3915,6 +3915,13 @@ class AIAgent: def _anthropic_messages_create(self, api_kwargs: dict): if self.api_mode == "anthropic_messages": self._try_refresh_anthropic_client_credentials() + # Defensive: strip Responses-only kwargs that can leak in under an + # api_mode-flip race (the Anthropic SDK raises a non-retryable + # TypeError on them). See #31673. + from agent.anthropic_adapter import sanitize_anthropic_kwargs + sanitize_anthropic_kwargs( + api_kwargs, log_prefix=getattr(self, "log_prefix", "") + ) return self._anthropic_client.messages.create(**api_kwargs) def _rebuild_anthropic_client(self) -> None: diff --git a/tests/agent/test_anthropic_kwargs_sanitize.py b/tests/agent/test_anthropic_kwargs_sanitize.py new file mode 100644 index 00000000000..d0466ff7f31 --- /dev/null +++ b/tests/agent/test_anthropic_kwargs_sanitize.py @@ -0,0 +1,94 @@ +"""Tests for sanitize_anthropic_kwargs (#31673). + +Guards the Anthropic Messages dispatch boundary against Responses-API-only +kwargs (``instructions``, ``input``, ``store``, ``parallel_tool_calls``) +leaking in under an api_mode-flip race. The Anthropic SDK raises a +non-retryable ``TypeError`` on any of them, killing the whole turn. +""" + +import logging + +import pytest + +from agent.anthropic_adapter import ( + _RESPONSES_ONLY_KWARGS, + sanitize_anthropic_kwargs, +) + + +def _fake_anthropic_call(**kwargs): + """Mimic the Anthropic SDK's strict kwarg signature.""" + allowed = { + "model", "messages", "max_tokens", "system", "tools", "tool_choice", + "extra_body", "extra_headers", "temperature", "top_p", "top_k", + "thinking", "timeout", + } + bad = set(kwargs) - allowed + if bad: + raise TypeError( + "Messages.stream() got an unexpected keyword argument " + f"{sorted(bad)[0]!r}" + ) + return "OK" + + +def test_bare_leaked_payload_reproduces_the_typeerror(): + """Without the guard, a Responses-shaped payload raises the issue's error.""" + with pytest.raises(TypeError, match="unexpected keyword argument"): + _fake_anthropic_call(model="claude-sonnet-4-6", instructions="sys") + + +def test_strips_all_responses_only_keys(): + payload = { + "model": "claude-sonnet-4-6", + "instructions": "You are Hermes.", + "input": [{"role": "user", "content": "hi"}], + "store": False, + "parallel_tool_calls": True, + } + out = sanitize_anthropic_kwargs(payload) + assert out is payload # mutates in place and returns same dict + assert payload == {"model": "claude-sonnet-4-6"} + assert _fake_anthropic_call(**payload) == "OK" + + +def test_clean_anthropic_payload_is_untouched(): + payload = { + "model": "claude-sonnet-4-6", + "messages": [{"role": "user", "content": "hi"}], + "max_tokens": 1024, + "system": "sys", + "tools": [{"name": "x"}], + } + snapshot = dict(payload) + sanitize_anthropic_kwargs(payload) + assert payload == snapshot + assert _fake_anthropic_call(**payload) == "OK" + + +def test_warns_when_keys_are_stripped(caplog): + with caplog.at_level(logging.WARNING, logger="agent.anthropic_adapter"): + sanitize_anthropic_kwargs( + {"model": "m", "instructions": "sys"}, log_prefix="[pfx] " + ) + assert any( + "31673" in r.message and "[pfx] " in r.message + for r in caplog.records + ), caplog.records + + +def test_no_warning_on_clean_payload(caplog): + with caplog.at_level(logging.WARNING, logger="agent.anthropic_adapter"): + sanitize_anthropic_kwargs({"model": "m", "messages": []}) + assert not caplog.records + + +def test_non_dict_input_is_noop(): + assert sanitize_anthropic_kwargs(None) is None + assert sanitize_anthropic_kwargs("not a dict") == "not a dict" + + +def test_responses_only_kwargs_membership(): + # Contract: instructions (the reported symptom) plus the sibling + # Responses-shape keys are all covered. + assert {"instructions", "input", "store", "parallel_tool_calls"} <= _RESPONSES_ONLY_KWARGS