mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:42:00 +00:00
fix(bedrock): price Claude prompt-cache tokens in /usage (#50307)
Bedrock Claude routes through the AnthropicBedrock SDK and injects cache_control, so cached tokens are always reported — but the pricing table had no cache cost fields for any Bedrock model, so /usage showed "cost unknown" on every cached session. Also, cross-region inference profiles (us./global./eu. prefixes) never matched the bare pricing keys. - Add cache_read/cache_write rates to the four Bedrock Claude rows (read 0.1x input, write 1.25x input per the Bedrock pricing page). - Normalize the cross-region prefix in the Bedrock pricing lookup, mirroring is_anthropic_bedrock_model's prefix list. Closes #50295.
This commit is contained in:
parent
7bc6f18062
commit
e0498bd305
2 changed files with 108 additions and 0 deletions
|
|
@ -451,6 +451,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
|
|||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("15.00"),
|
||||
output_cost_per_million=Decimal("75.00"),
|
||||
cache_read_cost_per_million=Decimal("1.50"),
|
||||
cache_write_cost_per_million=Decimal("18.75"),
|
||||
source="official_docs_snapshot",
|
||||
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||
pricing_version="bedrock-pricing-2026-04",
|
||||
|
|
@ -461,6 +463,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
|
|||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("3.00"),
|
||||
output_cost_per_million=Decimal("15.00"),
|
||||
cache_read_cost_per_million=Decimal("0.30"),
|
||||
cache_write_cost_per_million=Decimal("3.75"),
|
||||
source="official_docs_snapshot",
|
||||
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||
pricing_version="bedrock-pricing-2026-04",
|
||||
|
|
@ -471,6 +475,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
|
|||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("3.00"),
|
||||
output_cost_per_million=Decimal("15.00"),
|
||||
cache_read_cost_per_million=Decimal("0.30"),
|
||||
cache_write_cost_per_million=Decimal("3.75"),
|
||||
source="official_docs_snapshot",
|
||||
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||
pricing_version="bedrock-pricing-2026-04",
|
||||
|
|
@ -481,6 +487,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
|
|||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("0.80"),
|
||||
output_cost_per_million=Decimal("4.00"),
|
||||
cache_read_cost_per_million=Decimal("0.08"),
|
||||
cache_write_cost_per_million=Decimal("1.00"),
|
||||
source="official_docs_snapshot",
|
||||
source_url="https://aws.amazon.com/bedrock/pricing/",
|
||||
pricing_version="bedrock-pricing-2026-04",
|
||||
|
|
@ -584,6 +592,26 @@ def resolve_billing_route(
|
|||
return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
|
||||
|
||||
|
||||
def _normalize_bedrock_model_name(model: str) -> str:
|
||||
"""Normalize a Bedrock model id to its bare foundation-model form.
|
||||
|
||||
Bedrock cross-region inference profiles prefix the foundation model id
|
||||
with a region scope (``us.`` / ``global.`` / ``eu.`` / ``ap.`` / ``jp.``),
|
||||
e.g. ``us.anthropic.claude-opus-4-7``. The pricing table is keyed on the
|
||||
bare ``anthropic.claude-*`` id, so the prefix must be stripped before the
|
||||
lookup or every cross-region session prices as unknown. Mirrors the
|
||||
prefix list in ``bedrock_adapter.is_anthropic_bedrock_model``. Also
|
||||
normalizes dot-notation version numbers (``4.7`` → ``4-7``).
|
||||
"""
|
||||
name = model.lower().strip()
|
||||
for prefix in ("us.", "global.", "eu.", "ap.", "jp."):
|
||||
if name.startswith(prefix):
|
||||
name = name[len(prefix):]
|
||||
break
|
||||
name = re.sub(r"(\d+)\.(\d+)", r"\1-\2", name)
|
||||
return name
|
||||
|
||||
|
||||
def _normalize_anthropic_model_name(model: str) -> str:
|
||||
"""Normalize Anthropic model name variants to canonical form.
|
||||
|
||||
|
|
@ -614,6 +642,14 @@ def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]
|
|||
entry = _OFFICIAL_DOCS_PRICING.get((route.provider, normalized))
|
||||
if entry:
|
||||
return entry
|
||||
# Bedrock cross-region inference profiles carry a region prefix
|
||||
# (us./global./eu./...) that the bare pricing keys don't have.
|
||||
if route.provider == "bedrock":
|
||||
normalized = _normalize_bedrock_model_name(model)
|
||||
if normalized != model:
|
||||
entry = _OFFICIAL_DOCS_PRICING.get((route.provider, normalized))
|
||||
if entry:
|
||||
return entry
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -250,3 +250,75 @@ def test_deepseek_v4_pro_estimate_usage_cost():
|
|||
assert result.amount_usd is not None
|
||||
# 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48
|
||||
assert float(result.amount_usd) == 3.48
|
||||
|
||||
|
||||
def test_bedrock_claude_rows_all_carry_cache_pricing():
|
||||
"""Invariant: every Bedrock Claude pricing row must carry cache-read AND
|
||||
cache-write rates, otherwise a cached session prices as ``unknown``.
|
||||
|
||||
Bedrock Claude routes through the AnthropicBedrock SDK and injects
|
||||
cache_control, so cached tokens are always reported — the pricing layer
|
||||
must be able to value them. See #50295.
|
||||
"""
|
||||
from agent.usage_pricing import _OFFICIAL_DOCS_PRICING
|
||||
|
||||
claude_rows = [
|
||||
(prov, model)
|
||||
for (prov, model) in _OFFICIAL_DOCS_PRICING
|
||||
if prov == "bedrock" and "claude" in model
|
||||
]
|
||||
assert claude_rows, "expected at least one bedrock Claude pricing row"
|
||||
for key in claude_rows:
|
||||
entry = _OFFICIAL_DOCS_PRICING[key]
|
||||
assert entry.input_cost_per_million is not None, key
|
||||
assert entry.cache_read_cost_per_million is not None, key
|
||||
assert entry.cache_write_cost_per_million is not None, key
|
||||
# Cache reads are cheaper than fresh input; cache writes cost more.
|
||||
assert entry.cache_read_cost_per_million < entry.input_cost_per_million, key
|
||||
assert entry.cache_write_cost_per_million > entry.input_cost_per_million, key
|
||||
|
||||
|
||||
def test_bedrock_cross_region_profile_prefix_resolves_to_pricing():
|
||||
"""Cross-region inference profiles (us./global./eu. prefixes) must resolve
|
||||
to the same pricing entry as the bare foundation-model id. Without prefix
|
||||
normalization, ``us.anthropic.claude-*`` sessions price as unknown.
|
||||
"""
|
||||
bedrock_url = "https://bedrock-runtime.us-east-1.amazonaws.com"
|
||||
bare = get_pricing_entry(
|
||||
"anthropic.claude-sonnet-4-5", provider="bedrock", base_url=bedrock_url
|
||||
)
|
||||
assert bare is not None
|
||||
for prefix in ("us.", "global.", "eu."):
|
||||
scoped = get_pricing_entry(
|
||||
f"{prefix}anthropic.claude-sonnet-4-5",
|
||||
provider="bedrock",
|
||||
base_url=bedrock_url,
|
||||
)
|
||||
assert scoped is not None, prefix
|
||||
assert scoped.input_cost_per_million == bare.input_cost_per_million
|
||||
assert scoped.cache_read_cost_per_million == bare.cache_read_cost_per_million
|
||||
|
||||
|
||||
def test_bedrock_claude_cached_session_estimates_cost_not_unknown():
|
||||
"""A Bedrock Claude session with cache hits must produce a dollar estimate,
|
||||
not ``unknown`` — the user-visible symptom in #50295.
|
||||
"""
|
||||
bedrock_url = "https://bedrock-runtime.us-east-1.amazonaws.com"
|
||||
usage = SimpleNamespace(
|
||||
input_tokens=55,
|
||||
output_tokens=7113,
|
||||
cache_read_input_tokens=1369379,
|
||||
cache_creation_input_tokens=42135,
|
||||
)
|
||||
canonical = normalize_usage(usage, provider="bedrock", api_mode="anthropic_messages")
|
||||
assert canonical.cache_read_tokens == 1369379
|
||||
assert canonical.cache_write_tokens == 42135
|
||||
|
||||
result = estimate_usage_cost(
|
||||
"us.anthropic.claude-opus-4-6",
|
||||
canonical,
|
||||
provider="bedrock",
|
||||
base_url=bedrock_url,
|
||||
)
|
||||
assert result.status == "estimated"
|
||||
assert result.amount_usd is not None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue