hermes-agent/tests/agent/test_prompt_caching.py
Teknium b06e999302
fix(cache): kill long-lived prefix layout — system prompt is now byte-static within a session (#24778)
The long-lived prefix-cache layout split the system prompt into stable/
context/volatile blocks and re-derived them on every API call. The
volatile tier (timestamp + memory snapshot + USER profile) ticks per
turn, so the system message bytes mutated mid-conversation and broke
upstream prompt caches (OpenRouter, Nous Portal, Anthropic).

Diagnosed via live wire-format diffing: an 8-turn conversation showed
OLD layout flipping system block[1] sha mid-session at the minute
boundary, dropping cached_tokens to 0 on that turn (cumulative
66.6% vs 83.3% for the single-block layout). Hermes invariant:
history (system + all but the last 1-2 messages) must be static.

Fix: drop the long-lived layout entirely. Single layout everywhere —
system_and_3 with one cached system string built once on first turn,
replayed verbatim on every subsequent turn. Loses cross-session 1h
prefix caching for Claude (the feature that motivated the split), but
within-session caching now actually works on every provider.

Removed:
- run_agent.py: _use_long_lived_prefix_cache flag, _long_lived_cache_ttl,
  _supports_long_lived_anthropic_cache method, the long-lived branch in
  run_conversation, mark_tools_for_long_lived_cache call site
- agent/prompt_caching.py: apply_anthropic_cache_control_long_lived,
  mark_tools_for_long_lived_cache, _mark_system_stable_block helper
- hermes_cli/config.py: prompt_caching.long_lived_prefix and
  prompt_caching.long_lived_ttl config keys
- tests/agent/test_prompt_caching_live.py (entire file)
- tests/agent/test_prompt_caching.py: TestMarkToolsForLongLivedCache,
  TestApplyAnthropicCacheControlLongLived
- tests/run_agent/test_anthropic_prompt_cache_policy.py:
  TestSupportsLongLivedAnthropicCache

Targeted tests: 62/62 pass.
2026-05-12 20:46:04 -07:00

143 lines
5.4 KiB
Python

"""Tests for agent/prompt_caching.py — Anthropic cache control injection."""
import copy
import pytest
from agent.prompt_caching import (
_apply_cache_marker,
apply_anthropic_cache_control,
)
MARKER = {"type": "ephemeral"}
class TestApplyCacheMarker:
def test_tool_message_gets_top_level_marker_on_native_anthropic(self):
"""Native Anthropic path: cache_control injected top-level (adapter moves it inside tool_result)."""
msg = {"role": "tool", "content": "result"}
_apply_cache_marker(msg, MARKER, native_anthropic=True)
assert msg["cache_control"] == MARKER
def test_tool_message_skips_marker_on_openrouter(self):
"""OpenRouter path: top-level cache_control on role:tool is invalid and causes silent hang."""
msg = {"role": "tool", "content": "result"}
_apply_cache_marker(msg, MARKER, native_anthropic=False)
assert "cache_control" not in msg
def test_none_content_gets_top_level_marker(self):
msg = {"role": "assistant", "content": None}
_apply_cache_marker(msg, MARKER)
assert msg["cache_control"] == MARKER
def test_empty_string_content_gets_top_level_marker(self):
"""Empty text blocks cannot have cache_control (Anthropic rejects them)."""
msg = {"role": "assistant", "content": ""}
_apply_cache_marker(msg, MARKER)
assert msg["cache_control"] == MARKER
# Must NOT wrap into [{"type": "text", "text": "", "cache_control": ...}]
assert msg["content"] == ""
def test_string_content_wrapped_in_list(self):
msg = {"role": "user", "content": "Hello"}
_apply_cache_marker(msg, MARKER)
assert isinstance(msg["content"], list)
assert len(msg["content"]) == 1
assert msg["content"][0]["type"] == "text"
assert msg["content"][0]["text"] == "Hello"
assert msg["content"][0]["cache_control"] == MARKER
def test_list_content_last_item_gets_marker(self):
msg = {
"role": "user",
"content": [
{"type": "text", "text": "First"},
{"type": "text", "text": "Second"},
],
}
_apply_cache_marker(msg, MARKER)
assert "cache_control" not in msg["content"][0]
assert msg["content"][1]["cache_control"] == MARKER
def test_empty_list_content_no_crash(self):
msg = {"role": "user", "content": []}
# Should not crash on empty list
_apply_cache_marker(msg, MARKER)
class TestApplyAnthropicCacheControl:
def test_empty_messages(self):
result = apply_anthropic_cache_control([])
assert result == []
def test_returns_deep_copy(self):
msgs = [{"role": "user", "content": "Hello"}]
result = apply_anthropic_cache_control(msgs)
assert result is not msgs
assert result[0] is not msgs[0]
# Original should be unmodified
assert "cache_control" not in msgs[0].get("content", "")
def test_system_message_gets_marker(self):
msgs = [
{"role": "system", "content": "You are helpful"},
{"role": "user", "content": "Hi"},
]
result = apply_anthropic_cache_control(msgs)
# System message should have cache_control
sys_content = result[0]["content"]
assert isinstance(sys_content, list)
assert sys_content[0]["cache_control"]["type"] == "ephemeral"
def test_last_3_non_system_get_markers(self):
msgs = [
{"role": "system", "content": "System"},
{"role": "user", "content": "msg1"},
{"role": "assistant", "content": "msg2"},
{"role": "user", "content": "msg3"},
{"role": "assistant", "content": "msg4"},
]
result = apply_anthropic_cache_control(msgs)
# System (index 0) + last 3 non-system (indices 2, 3, 4) = 4 breakpoints
# Index 1 (msg1) should NOT have marker
content_1 = result[1]["content"]
if isinstance(content_1, str):
assert True # No marker applied (still a string)
else:
assert "cache_control" not in content_1[0]
def test_no_system_message(self):
msgs = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"},
]
result = apply_anthropic_cache_control(msgs)
# Both should get markers (4 slots available, only 2 messages)
assert len(result) == 2
def test_1h_ttl(self):
msgs = [{"role": "system", "content": "System prompt"}]
result = apply_anthropic_cache_control(msgs, cache_ttl="1h")
sys_content = result[0]["content"]
assert isinstance(sys_content, list)
assert sys_content[0]["cache_control"]["ttl"] == "1h"
def test_max_4_breakpoints(self):
msgs = [
{"role": "system", "content": "System"},
] + [
{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg{i}"}
for i in range(10)
]
result = apply_anthropic_cache_control(msgs)
# Count how many messages have cache_control
count = 0
for msg in result:
content = msg.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and "cache_control" in item:
count += 1
elif "cache_control" in msg:
count += 1
assert count <= 4