mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(prompt-caching): skip top-level cache_control on role:tool for OpenRouter
On the native Anthropic Messages API path, convert_messages_to_anthropic() moves top-level cache_control on role:tool messages inside the tool_result block. On OpenRouter (chat_completions), no such conversion happens — the unexpected top-level field causes a silent hang on the second tool call. Add native_anthropic parameter to _apply_cache_marker() and apply_anthropic_cache_control(). When False (OpenRouter), role:tool messages are skipped entirely. When True (native Anthropic), existing behaviour is preserved. Fixes #2362
This commit is contained in:
parent
52dd479214
commit
bd49bce278
4 changed files with 17 additions and 8 deletions
|
|
@ -12,13 +12,14 @@ import copy
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
|
||||||
def _apply_cache_marker(msg: dict, cache_marker: dict) -> None:
|
def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool = False) -> None:
|
||||||
"""Add cache_control to a single message, handling all format variations."""
|
"""Add cache_control to a single message, handling all format variations."""
|
||||||
role = msg.get("role", "")
|
role = msg.get("role", "")
|
||||||
content = msg.get("content")
|
content = msg.get("content")
|
||||||
|
|
||||||
if role == "tool":
|
if role == "tool":
|
||||||
msg["cache_control"] = cache_marker
|
if native_anthropic:
|
||||||
|
msg["cache_control"] = cache_marker
|
||||||
return
|
return
|
||||||
|
|
||||||
if content is None or content == "":
|
if content is None or content == "":
|
||||||
|
|
@ -40,6 +41,7 @@ def _apply_cache_marker(msg: dict, cache_marker: dict) -> None:
|
||||||
def apply_anthropic_cache_control(
|
def apply_anthropic_cache_control(
|
||||||
api_messages: List[Dict[str, Any]],
|
api_messages: List[Dict[str, Any]],
|
||||||
cache_ttl: str = "5m",
|
cache_ttl: str = "5m",
|
||||||
|
native_anthropic: bool = False,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""Apply system_and_3 caching strategy to messages for Anthropic models.
|
"""Apply system_and_3 caching strategy to messages for Anthropic models.
|
||||||
|
|
||||||
|
|
@ -59,12 +61,12 @@ def apply_anthropic_cache_control(
|
||||||
breakpoints_used = 0
|
breakpoints_used = 0
|
||||||
|
|
||||||
if messages[0].get("role") == "system":
|
if messages[0].get("role") == "system":
|
||||||
_apply_cache_marker(messages[0], marker)
|
_apply_cache_marker(messages[0], marker, native_anthropic=native_anthropic)
|
||||||
breakpoints_used += 1
|
breakpoints_used += 1
|
||||||
|
|
||||||
remaining = 4 - breakpoints_used
|
remaining = 4 - breakpoints_used
|
||||||
non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
|
non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"]
|
||||||
for idx in non_sys[-remaining:]:
|
for idx in non_sys[-remaining:]:
|
||||||
_apply_cache_marker(messages[idx], marker)
|
_apply_cache_marker(messages[idx], marker, native_anthropic=native_anthropic)
|
||||||
|
|
||||||
return messages
|
return messages
|
||||||
|
|
|
||||||
|
|
@ -5627,7 +5627,7 @@ class AIAgent:
|
||||||
# inject cache_control breakpoints (system + last 3 messages) to reduce
|
# inject cache_control breakpoints (system + last 3 messages) to reduce
|
||||||
# input token costs by ~75% on multi-turn conversations.
|
# input token costs by ~75% on multi-turn conversations.
|
||||||
if self._use_prompt_caching:
|
if self._use_prompt_caching:
|
||||||
api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl)
|
api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl, native_anthropic=(self.api_mode == 'anthropic_messages'))
|
||||||
|
|
||||||
# Safety net: strip orphaned tool results / add stubs for missing
|
# Safety net: strip orphaned tool results / add stubs for missing
|
||||||
# results before sending to the API. Runs unconditionally — not
|
# results before sending to the API. Runs unconditionally — not
|
||||||
|
|
|
||||||
|
|
@ -13,11 +13,18 @@ MARKER = {"type": "ephemeral"}
|
||||||
|
|
||||||
|
|
||||||
class TestApplyCacheMarker:
|
class TestApplyCacheMarker:
|
||||||
def test_tool_message_gets_top_level_marker(self):
|
def test_tool_message_gets_top_level_marker_on_native_anthropic(self):
|
||||||
|
"""Native Anthropic path: cache_control injected top-level (adapter moves it inside tool_result)."""
|
||||||
msg = {"role": "tool", "content": "result"}
|
msg = {"role": "tool", "content": "result"}
|
||||||
_apply_cache_marker(msg, MARKER)
|
_apply_cache_marker(msg, MARKER, native_anthropic=True)
|
||||||
assert msg["cache_control"] == MARKER
|
assert msg["cache_control"] == MARKER
|
||||||
|
|
||||||
|
def test_tool_message_skips_marker_on_openrouter(self):
|
||||||
|
"""OpenRouter path: top-level cache_control on role:tool is invalid and causes silent hang."""
|
||||||
|
msg = {"role": "tool", "content": "result"}
|
||||||
|
_apply_cache_marker(msg, MARKER, native_anthropic=False)
|
||||||
|
assert "cache_control" not in msg
|
||||||
|
|
||||||
def test_none_content_gets_top_level_marker(self):
|
def test_none_content_gets_top_level_marker(self):
|
||||||
msg = {"role": "assistant", "content": None}
|
msg = {"role": "assistant", "content": None}
|
||||||
_apply_cache_marker(msg, MARKER)
|
_apply_cache_marker(msg, MARKER)
|
||||||
|
|
|
||||||
|
|
@ -718,7 +718,7 @@ class TestConvertMessages:
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{"role": "tool", "tool_call_id": "tc_1", "content": "result"},
|
{"role": "tool", "tool_call_id": "tc_1", "content": "result"},
|
||||||
])
|
], native_anthropic=True)
|
||||||
|
|
||||||
_, result = convert_messages_to_anthropic(messages)
|
_, result = convert_messages_to_anthropic(messages)
|
||||||
user_msg = [m for m in result if m["role"] == "user"][0]
|
user_msg = [m for m in result if m["role"] == "user"][0]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue