From bd49bce2781629474d895f3de0350ecf3de38bb4 Mon Sep 17 00:00:00 2001 From: teyrebaz33 Date: Sun, 22 Mar 2026 01:05:26 +0300 Subject: [PATCH] fix(prompt-caching): skip top-level cache_control on role:tool for OpenRouter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the native Anthropic Messages API path, convert_messages_to_anthropic() moves top-level cache_control on role:tool messages inside the tool_result block. On OpenRouter (chat_completions), no such conversion happens — the unexpected top-level field causes a silent hang on the second tool call. Add native_anthropic parameter to _apply_cache_marker() and apply_anthropic_cache_control(). When False (OpenRouter), role:tool messages are skipped entirely. When True (native Anthropic), existing behaviour is preserved. Fixes #2362 --- agent/prompt_caching.py | 10 ++++++---- run_agent.py | 2 +- tests/agent/test_prompt_caching.py | 11 +++++++++-- tests/test_anthropic_adapter.py | 2 +- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/agent/prompt_caching.py b/agent/prompt_caching.py index 661a28b6a..d80f58ea4 100644 --- a/agent/prompt_caching.py +++ b/agent/prompt_caching.py @@ -12,13 +12,14 @@ import copy from typing import Any, Dict, List -def _apply_cache_marker(msg: dict, cache_marker: dict) -> None: +def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool = False) -> None: """Add cache_control to a single message, handling all format variations.""" role = msg.get("role", "") content = msg.get("content") if role == "tool": - msg["cache_control"] = cache_marker + if native_anthropic: + msg["cache_control"] = cache_marker return if content is None or content == "": @@ -40,6 +41,7 @@ def _apply_cache_marker(msg: dict, cache_marker: dict) -> None: def apply_anthropic_cache_control( api_messages: List[Dict[str, Any]], cache_ttl: str = "5m", + native_anthropic: bool = False, ) -> List[Dict[str, Any]]: """Apply system_and_3 caching strategy to messages for Anthropic models. @@ -59,12 +61,12 @@ def apply_anthropic_cache_control( breakpoints_used = 0 if messages[0].get("role") == "system": - _apply_cache_marker(messages[0], marker) + _apply_cache_marker(messages[0], marker, native_anthropic=native_anthropic) breakpoints_used += 1 remaining = 4 - breakpoints_used non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"] for idx in non_sys[-remaining:]: - _apply_cache_marker(messages[idx], marker) + _apply_cache_marker(messages[idx], marker, native_anthropic=native_anthropic) return messages diff --git a/run_agent.py b/run_agent.py index ef5a92b83..9c1b8e727 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5627,7 +5627,7 @@ class AIAgent: # inject cache_control breakpoints (system + last 3 messages) to reduce # input token costs by ~75% on multi-turn conversations. if self._use_prompt_caching: - api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl) + api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages')) # Safety net: strip orphaned tool results / add stubs for missing # results before sending to the API. Runs unconditionally — not diff --git a/tests/agent/test_prompt_caching.py b/tests/agent/test_prompt_caching.py index fd87a80e3..f6f3e9f0a 100644 --- a/tests/agent/test_prompt_caching.py +++ b/tests/agent/test_prompt_caching.py @@ -13,11 +13,18 @@ MARKER = {"type": "ephemeral"} class TestApplyCacheMarker: - def test_tool_message_gets_top_level_marker(self): + def test_tool_message_gets_top_level_marker_on_native_anthropic(self): + """Native Anthropic path: cache_control injected top-level (adapter moves it inside tool_result).""" msg = {"role": "tool", "content": "result"} - _apply_cache_marker(msg, MARKER) + _apply_cache_marker(msg, MARKER, native_anthropic=True) assert msg["cache_control"] == MARKER + def test_tool_message_skips_marker_on_openrouter(self): + """OpenRouter path: top-level cache_control on role:tool is invalid and causes silent hang.""" + msg = {"role": "tool", "content": "result"} + _apply_cache_marker(msg, MARKER, native_anthropic=False) + assert "cache_control" not in msg + def test_none_content_gets_top_level_marker(self): msg = {"role": "assistant", "content": None} _apply_cache_marker(msg, MARKER) diff --git a/tests/test_anthropic_adapter.py b/tests/test_anthropic_adapter.py index abbad79f3..71638f0d3 100644 --- a/tests/test_anthropic_adapter.py +++ b/tests/test_anthropic_adapter.py @@ -718,7 +718,7 @@ class TestConvertMessages: ], }, {"role": "tool", "tool_call_id": "tc_1", "content": "result"}, - ]) + ], native_anthropic=True) _, result = convert_messages_to_anthropic(messages) user_msg = [m for m in result if m["role"] == "user"][0]