fix(bedrock): price Claude prompt-cache tokens in /usage (#50307)

Bedrock Claude routes through the AnthropicBedrock SDK and injects
cache_control, so cached tokens are always reported — but the pricing
table had no cache cost fields for any Bedrock model, so /usage showed
"cost unknown" on every cached session. Also, cross-region inference
profiles (us./global./eu. prefixes) never matched the bare pricing keys.

- Add cache_read/cache_write rates to the four Bedrock Claude rows
  (read 0.1x input, write 1.25x input per the Bedrock pricing page).
- Normalize the cross-region prefix in the Bedrock pricing lookup,
  mirroring is_anthropic_bedrock_model's prefix list.

Closes #50295.
This commit is contained in:
Teknium 2026-06-21 11:48:43 -07:00 committed by GitHub
parent 7bc6f18062
commit e0498bd305
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 108 additions and 0 deletions

View file

@ -451,6 +451,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
): PricingEntry(
input_cost_per_million=Decimal("15.00"),
output_cost_per_million=Decimal("75.00"),
cache_read_cost_per_million=Decimal("1.50"),
cache_write_cost_per_million=Decimal("18.75"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
@ -461,6 +463,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
cache_read_cost_per_million=Decimal("0.30"),
cache_write_cost_per_million=Decimal("3.75"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
@ -471,6 +475,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
cache_read_cost_per_million=Decimal("0.30"),
cache_write_cost_per_million=Decimal("3.75"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
@ -481,6 +487,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
): PricingEntry(
input_cost_per_million=Decimal("0.80"),
output_cost_per_million=Decimal("4.00"),
cache_read_cost_per_million=Decimal("0.08"),
cache_write_cost_per_million=Decimal("1.00"),
source="official_docs_snapshot",
source_url="https://aws.amazon.com/bedrock/pricing/",
pricing_version="bedrock-pricing-2026-04",
@ -584,6 +592,26 @@ def resolve_billing_route(
return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
def _normalize_bedrock_model_name(model: str) -> str:
"""Normalize a Bedrock model id to its bare foundation-model form.
Bedrock cross-region inference profiles prefix the foundation model id
with a region scope (``us.`` / ``global.`` / ``eu.`` / ``ap.`` / ``jp.``),
e.g. ``us.anthropic.claude-opus-4-7``. The pricing table is keyed on the
bare ``anthropic.claude-*`` id, so the prefix must be stripped before the
lookup or every cross-region session prices as unknown. Mirrors the
prefix list in ``bedrock_adapter.is_anthropic_bedrock_model``. Also
normalizes dot-notation version numbers (``4.7`` ``4-7``).
"""
name = model.lower().strip()
for prefix in ("us.", "global.", "eu.", "ap.", "jp."):
if name.startswith(prefix):
name = name[len(prefix):]
break
name = re.sub(r"(\d+)\.(\d+)", r"\1-\2", name)
return name
def _normalize_anthropic_model_name(model: str) -> str:
"""Normalize Anthropic model name variants to canonical form.
@ -614,6 +642,14 @@ def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]
entry = _OFFICIAL_DOCS_PRICING.get((route.provider, normalized))
if entry:
return entry
# Bedrock cross-region inference profiles carry a region prefix
# (us./global./eu./...) that the bare pricing keys don't have.
if route.provider == "bedrock":
normalized = _normalize_bedrock_model_name(model)
if normalized != model:
entry = _OFFICIAL_DOCS_PRICING.get((route.provider, normalized))
if entry:
return entry
return None

View file

@ -250,3 +250,75 @@ def test_deepseek_v4_pro_estimate_usage_cost():
assert result.amount_usd is not None
# 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48
assert float(result.amount_usd) == 3.48
def test_bedrock_claude_rows_all_carry_cache_pricing():
"""Invariant: every Bedrock Claude pricing row must carry cache-read AND
cache-write rates, otherwise a cached session prices as ``unknown``.
Bedrock Claude routes through the AnthropicBedrock SDK and injects
cache_control, so cached tokens are always reported the pricing layer
must be able to value them. See #50295.
"""
from agent.usage_pricing import _OFFICIAL_DOCS_PRICING
claude_rows = [
(prov, model)
for (prov, model) in _OFFICIAL_DOCS_PRICING
if prov == "bedrock" and "claude" in model
]
assert claude_rows, "expected at least one bedrock Claude pricing row"
for key in claude_rows:
entry = _OFFICIAL_DOCS_PRICING[key]
assert entry.input_cost_per_million is not None, key
assert entry.cache_read_cost_per_million is not None, key
assert entry.cache_write_cost_per_million is not None, key
# Cache reads are cheaper than fresh input; cache writes cost more.
assert entry.cache_read_cost_per_million < entry.input_cost_per_million, key
assert entry.cache_write_cost_per_million > entry.input_cost_per_million, key
def test_bedrock_cross_region_profile_prefix_resolves_to_pricing():
"""Cross-region inference profiles (us./global./eu. prefixes) must resolve
to the same pricing entry as the bare foundation-model id. Without prefix
normalization, ``us.anthropic.claude-*`` sessions price as unknown.
"""
bedrock_url = "https://bedrock-runtime.us-east-1.amazonaws.com"
bare = get_pricing_entry(
"anthropic.claude-sonnet-4-5", provider="bedrock", base_url=bedrock_url
)
assert bare is not None
for prefix in ("us.", "global.", "eu."):
scoped = get_pricing_entry(
f"{prefix}anthropic.claude-sonnet-4-5",
provider="bedrock",
base_url=bedrock_url,
)
assert scoped is not None, prefix
assert scoped.input_cost_per_million == bare.input_cost_per_million
assert scoped.cache_read_cost_per_million == bare.cache_read_cost_per_million
def test_bedrock_claude_cached_session_estimates_cost_not_unknown():
"""A Bedrock Claude session with cache hits must produce a dollar estimate,
not ``unknown`` the user-visible symptom in #50295.
"""
bedrock_url = "https://bedrock-runtime.us-east-1.amazonaws.com"
usage = SimpleNamespace(
input_tokens=55,
output_tokens=7113,
cache_read_input_tokens=1369379,
cache_creation_input_tokens=42135,
)
canonical = normalize_usage(usage, provider="bedrock", api_mode="anthropic_messages")
assert canonical.cache_read_tokens == 1369379
assert canonical.cache_write_tokens == 42135
result = estimate_usage_cost(
"us.anthropic.claude-opus-4-6",
canonical,
provider="bedrock",
base_url=bedrock_url,
)
assert result.status == "estimated"
assert result.amount_usd is not None