mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: read prompt caching cache_ttl from config
- Load prompt_caching.cache_ttl in AIAgent (5m default, 1h opt-in) - Document DEFAULT_CONFIG and developer guide example - Add unit tests for default, 1h, and invalid TTL fallback Made-with: Cursor
This commit is contained in:
parent
9de555f3e3
commit
7626f3702e
4 changed files with 84 additions and 5 deletions
|
|
@ -521,6 +521,12 @@ DEFAULT_CONFIG = {
|
||||||
|
|
||||||
},
|
},
|
||||||
|
|
||||||
|
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
|
||||||
|
# cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
|
||||||
|
"prompt_caching": {
|
||||||
|
"cache_ttl": "5m",
|
||||||
|
},
|
||||||
|
|
||||||
# AWS Bedrock provider configuration.
|
# AWS Bedrock provider configuration.
|
||||||
# Only used when model.provider is "bedrock".
|
# Only used when model.provider is "bedrock".
|
||||||
"bedrock": {
|
"bedrock": {
|
||||||
|
|
|
||||||
17
run_agent.py
17
run_agent.py
|
|
@ -1036,8 +1036,21 @@ class AIAgent:
|
||||||
self._use_prompt_caching, self._use_native_cache_layout = (
|
self._use_prompt_caching, self._use_native_cache_layout = (
|
||||||
self._anthropic_prompt_cache_policy()
|
self._anthropic_prompt_cache_policy()
|
||||||
)
|
)
|
||||||
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
|
# Anthropic supports "5m" (default) and "1h" cache TTL tiers. Read from
|
||||||
|
# config.yaml under prompt_caching.cache_ttl; unknown values keep "5m".
|
||||||
|
# 1h tier costs 2x on write vs 1.25x for 5m, but amortizes across long
|
||||||
|
# sessions with >5-minute pauses between turns (#14971).
|
||||||
|
self._cache_ttl = "5m"
|
||||||
|
try:
|
||||||
|
from hermes_cli.config import load_config as _load_pc_cfg
|
||||||
|
|
||||||
|
_pc_cfg = _load_pc_cfg().get("prompt_caching", {}) or {}
|
||||||
|
_ttl = _pc_cfg.get("cache_ttl", "5m")
|
||||||
|
if _ttl in ("5m", "1h"):
|
||||||
|
self._cache_ttl = _ttl
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Iteration budget: the LLM is only notified when it actually exhausts
|
# Iteration budget: the LLM is only notified when it actually exhausts
|
||||||
# the iteration budget (api_call_count >= max_iterations). At that
|
# the iteration budget (api_call_count >= max_iterations). At that
|
||||||
# point we inject ONE message, allow one final API call, and if the
|
# point we inject ONE message, allow one final API call, and if the
|
||||||
|
|
|
||||||
|
|
@ -685,6 +685,66 @@ class TestInit:
|
||||||
assert a.api_mode == "anthropic_messages"
|
assert a.api_mode == "anthropic_messages"
|
||||||
assert a._use_prompt_caching is True
|
assert a._use_prompt_caching is True
|
||||||
|
|
||||||
|
def test_prompt_caching_cache_ttl_defaults_without_config(self):
|
||||||
|
"""cache_ttl stays 5m when prompt_caching is absent from config."""
|
||||||
|
with (
|
||||||
|
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||||
|
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
patch("hermes_cli.config.load_config", return_value={}),
|
||||||
|
):
|
||||||
|
a = AIAgent(
|
||||||
|
api_key="test-k...7890",
|
||||||
|
model="anthropic/claude-sonnet-4-20250514",
|
||||||
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
quiet_mode=True,
|
||||||
|
skip_context_files=True,
|
||||||
|
skip_memory=True,
|
||||||
|
)
|
||||||
|
assert a._cache_ttl == "5m"
|
||||||
|
|
||||||
|
def test_prompt_caching_cache_ttl_custom_1h(self):
|
||||||
|
"""prompt_caching.cache_ttl 1h is applied when present in config."""
|
||||||
|
with (
|
||||||
|
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||||
|
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
patch(
|
||||||
|
"hermes_cli.config.load_config",
|
||||||
|
return_value={"prompt_caching": {"cache_ttl": "1h"}},
|
||||||
|
),
|
||||||
|
):
|
||||||
|
a = AIAgent(
|
||||||
|
api_key="test-k...7890",
|
||||||
|
model="anthropic/claude-sonnet-4-20250514",
|
||||||
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
quiet_mode=True,
|
||||||
|
skip_context_files=True,
|
||||||
|
skip_memory=True,
|
||||||
|
)
|
||||||
|
assert a._cache_ttl == "1h"
|
||||||
|
|
||||||
|
def test_prompt_caching_cache_ttl_invalid_falls_back(self):
|
||||||
|
"""Non-Anthropic TTL values keep default 5m without raising."""
|
||||||
|
with (
|
||||||
|
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||||
|
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||||
|
patch("run_agent.OpenAI"),
|
||||||
|
patch(
|
||||||
|
"hermes_cli.config.load_config",
|
||||||
|
return_value={"prompt_caching": {"cache_ttl": "30m"}},
|
||||||
|
),
|
||||||
|
):
|
||||||
|
a = AIAgent(
|
||||||
|
api_key="test-k...7890",
|
||||||
|
model="anthropic/claude-sonnet-4-20250514",
|
||||||
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
quiet_mode=True,
|
||||||
|
skip_context_files=True,
|
||||||
|
skip_memory=True,
|
||||||
|
)
|
||||||
|
assert a._cache_ttl == "5m"
|
||||||
|
|
||||||
def test_valid_tool_names_populated(self):
|
def test_valid_tool_names_populated(self):
|
||||||
"""valid_tool_names should contain names from loaded tools."""
|
"""valid_tool_names should contain names from loaded tools."""
|
||||||
tools = _make_tool_defs("web_search", "terminal")
|
tools = _make_tool_defs("web_search", "terminal")
|
||||||
|
|
|
||||||
|
|
@ -332,9 +332,9 @@ Prompt caching is automatically enabled when:
|
||||||
- The provider supports `cache_control` (native Anthropic API or OpenRouter)
|
- The provider supports `cache_control` (native Anthropic API or OpenRouter)
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# config.yaml — TTL is configurable
|
# config.yaml — TTL is configurable (must be "5m" or "1h")
|
||||||
model:
|
prompt_caching:
|
||||||
cache_ttl: "5m" # "5m" or "1h"
|
cache_ttl: "5m"
|
||||||
```
|
```
|
||||||
|
|
||||||
The CLI shows caching status at startup:
|
The CLI shows caching status at startup:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue