"""Anthropic prompt caching strategies. Two layouts: * ``system_and_3`` (default, used everywhere except the long-lived path): 4 cache_control breakpoints — system prompt + last 3 non-system messages. All at the same TTL (5m or 1h). Reduces input token costs by ~75% on multi-turn conversations within a single session. * ``prefix_and_2`` (Claude on Anthropic / OpenRouter / Nous Portal): 4 breakpoints split across two TTL tiers — tools[-1] (1h) + stable system prefix (1h) + last 2 non-system messages (5m). The long-lived prefix is byte-stable across sessions for a given user config, so every fresh session reads the cached system+tools instead of re-paying for them. Within-session rolling window shrinks from 3 messages to 2 to free the breakpoint budget. Pure functions -- no class state, no AIAgent dependency. """ import copy from typing import Any, Dict, List, Optional def _apply_cache_marker(msg: dict, cache_marker: dict, native_anthropic: bool = False) -> None: """Add cache_control to a single message, handling all format variations.""" role = msg.get("role", "") content = msg.get("content") if role == "tool": if native_anthropic: msg["cache_control"] = cache_marker return if content is None or content == "": msg["cache_control"] = cache_marker return if isinstance(content, str): msg["content"] = [ {"type": "text", "text": content, "cache_control": cache_marker} ] return if isinstance(content, list) and content: last = content[-1] if isinstance(last, dict): last["cache_control"] = cache_marker def _build_marker(ttl: str) -> Dict[str, str]: """Build a cache_control marker dict for the given TTL ('5m' or '1h').""" marker: Dict[str, str] = {"type": "ephemeral"} if ttl == "1h": marker["ttl"] = "1h" return marker def apply_anthropic_cache_control( api_messages: List[Dict[str, Any]], cache_ttl: str = "5m", native_anthropic: bool = False, ) -> List[Dict[str, Any]]: """Apply system_and_3 caching strategy to messages for Anthropic models. Places up to 4 cache_control breakpoints: system prompt + last 3 non-system messages, all at the same TTL. Returns: Deep copy of messages with cache_control breakpoints injected. """ messages = copy.deepcopy(api_messages) if not messages: return messages marker = _build_marker(cache_ttl) breakpoints_used = 0 if messages[0].get("role") == "system": _apply_cache_marker(messages[0], marker, native_anthropic=native_anthropic) breakpoints_used += 1 remaining = 4 - breakpoints_used non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"] for idx in non_sys[-remaining:]: _apply_cache_marker(messages[idx], marker, native_anthropic=native_anthropic) return messages def _mark_system_stable_block( messages: List[Dict[str, Any]], long_lived_marker: Dict[str, str], ) -> bool: """Mark the *first* content block of the system message with the 1h marker. The system message is expected to have been split into multiple content blocks beforehand by the caller — block[0] is the cross-session-stable prefix, subsequent blocks carry context files + volatile suffix. Falls back to marking the whole system message as a single block when the message hasn't been split (preserves correctness on the fallback path). Returns True when a marker was placed. """ if not messages or messages[0].get("role") != "system": return False sys_msg = messages[0] content = sys_msg.get("content") # Already a list of blocks → mark the first block. if isinstance(content, list) and content: first = content[0] if isinstance(first, dict): first["cache_control"] = long_lived_marker return True return False # String content (no split) → cannot place a stable-prefix breakpoint # without changing the byte content. Caller is responsible for # splitting; if they didn't, fall through to envelope marker so we still # cache *something* for this turn. if isinstance(content, str) and content: sys_msg["content"] = [ {"type": "text", "text": content, "cache_control": long_lived_marker} ] return True return False def apply_anthropic_cache_control_long_lived( api_messages: List[Dict[str, Any]], long_lived_ttl: str = "1h", rolling_ttl: str = "5m", native_anthropic: bool = False, ) -> List[Dict[str, Any]]: """Apply prefix_and_2 caching: long-lived stable prefix + rolling window. Layout (4 breakpoints total): * Stable system prefix (block[0]) → ``long_lived_ttl`` TTL * Last 2 non-system messages → ``rolling_ttl`` TTL each NOTE: this function does NOT mark the tools array. Tools cache_control is attached separately (see ``mark_tools_for_long_lived_cache``) because tools live outside the messages list in the API payload. The caller MUST have split the system message into ordered content blocks where block[0] is the cross-session-stable portion. If the system message is still a single string, it is wrapped into a single block and marked — this is correct, just less effective (the volatile suffix is not isolated, so the prefix invalidates per-session). Returns: Deep copy of messages with cache_control breakpoints injected. """ messages = copy.deepcopy(api_messages) if not messages: return messages long_marker = _build_marker(long_lived_ttl) rolling_marker = _build_marker(rolling_ttl) placed_prefix = _mark_system_stable_block(messages, long_marker) # Reserve 1 breakpoint for the system prefix (when placed); spend the # remaining 3 on the rolling tail. Anthropic max is 4 total — # tools[-1] (when marked) consumes the 4th, so we cap rolling at 2 here. rolling_budget = 2 if placed_prefix else 3 non_sys = [i for i in range(len(messages)) if messages[i].get("role") != "system"] for idx in non_sys[-rolling_budget:]: _apply_cache_marker(messages[idx], rolling_marker, native_anthropic=native_anthropic) return messages def mark_tools_for_long_lived_cache( tools: Optional[List[Dict[str, Any]]], long_lived_ttl: str = "1h", ) -> Optional[List[Dict[str, Any]]]: """Attach cache_control to the last tool in the OpenAI-format tools list. Anthropic prefix-cache order is ``tools → system → messages``. Marking the last tool dict caches the entire tools array (Anthropic's docs: "the marker is placed on the last block you want included in the cached prefix"). Marker is preserved across the OpenAI-wire boundary on OpenRouter and Nous Portal (which proxies to OpenRouter); on native Anthropic the marker is forwarded by ``convert_tools_to_anthropic``. Returns a deep copy of the tools list with the marker attached, or the input unchanged when tools is empty/None. Pure function — does not mutate the input. """ if not tools: return tools out = copy.deepcopy(tools) last = out[-1] if isinstance(last, dict): last["cache_control"] = _build_marker(long_lived_ttl) return out