From e0498bd3051e29d21e442f2abfbd5eb3bf7ffabd Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 21 Jun 2026 11:48:43 -0700 Subject: [PATCH] fix(bedrock): price Claude prompt-cache tokens in /usage (#50307) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bedrock Claude routes through the AnthropicBedrock SDK and injects cache_control, so cached tokens are always reported — but the pricing table had no cache cost fields for any Bedrock model, so /usage showed "cost unknown" on every cached session. Also, cross-region inference profiles (us./global./eu. prefixes) never matched the bare pricing keys. - Add cache_read/cache_write rates to the four Bedrock Claude rows (read 0.1x input, write 1.25x input per the Bedrock pricing page). - Normalize the cross-region prefix in the Bedrock pricing lookup, mirroring is_anthropic_bedrock_model's prefix list. Closes #50295. --- agent/usage_pricing.py | 36 ++++++++++++++++ tests/agent/test_usage_pricing.py | 72 +++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py index 95bb11df521..7c4416e5fb2 100644 --- a/agent/usage_pricing.py +++ b/agent/usage_pricing.py @@ -451,6 +451,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { ): PricingEntry( input_cost_per_million=Decimal("15.00"), output_cost_per_million=Decimal("75.00"), + cache_read_cost_per_million=Decimal("1.50"), + cache_write_cost_per_million=Decimal("18.75"), source="official_docs_snapshot", source_url="https://aws.amazon.com/bedrock/pricing/", pricing_version="bedrock-pricing-2026-04", @@ -461,6 +463,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { ): PricingEntry( input_cost_per_million=Decimal("3.00"), output_cost_per_million=Decimal("15.00"), + cache_read_cost_per_million=Decimal("0.30"), + cache_write_cost_per_million=Decimal("3.75"), source="official_docs_snapshot", source_url="https://aws.amazon.com/bedrock/pricing/", pricing_version="bedrock-pricing-2026-04", @@ -471,6 +475,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { ): PricingEntry( input_cost_per_million=Decimal("3.00"), output_cost_per_million=Decimal("15.00"), + cache_read_cost_per_million=Decimal("0.30"), + cache_write_cost_per_million=Decimal("3.75"), source="official_docs_snapshot", source_url="https://aws.amazon.com/bedrock/pricing/", pricing_version="bedrock-pricing-2026-04", @@ -481,6 +487,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { ): PricingEntry( input_cost_per_million=Decimal("0.80"), output_cost_per_million=Decimal("4.00"), + cache_read_cost_per_million=Decimal("0.08"), + cache_write_cost_per_million=Decimal("1.00"), source="official_docs_snapshot", source_url="https://aws.amazon.com/bedrock/pricing/", pricing_version="bedrock-pricing-2026-04", @@ -584,6 +592,26 @@ def resolve_billing_route( return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown") +def _normalize_bedrock_model_name(model: str) -> str: + """Normalize a Bedrock model id to its bare foundation-model form. + + Bedrock cross-region inference profiles prefix the foundation model id + with a region scope (``us.`` / ``global.`` / ``eu.`` / ``ap.`` / ``jp.``), + e.g. ``us.anthropic.claude-opus-4-7``. The pricing table is keyed on the + bare ``anthropic.claude-*`` id, so the prefix must be stripped before the + lookup or every cross-region session prices as unknown. Mirrors the + prefix list in ``bedrock_adapter.is_anthropic_bedrock_model``. Also + normalizes dot-notation version numbers (``4.7`` → ``4-7``). + """ + name = model.lower().strip() + for prefix in ("us.", "global.", "eu.", "ap.", "jp."): + if name.startswith(prefix): + name = name[len(prefix):] + break + name = re.sub(r"(\d+)\.(\d+)", r"\1-\2", name) + return name + + def _normalize_anthropic_model_name(model: str) -> str: """Normalize Anthropic model name variants to canonical form. @@ -614,6 +642,14 @@ def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry] entry = _OFFICIAL_DOCS_PRICING.get((route.provider, normalized)) if entry: return entry + # Bedrock cross-region inference profiles carry a region prefix + # (us./global./eu./...) that the bare pricing keys don't have. + if route.provider == "bedrock": + normalized = _normalize_bedrock_model_name(model) + if normalized != model: + entry = _OFFICIAL_DOCS_PRICING.get((route.provider, normalized)) + if entry: + return entry return None diff --git a/tests/agent/test_usage_pricing.py b/tests/agent/test_usage_pricing.py index 319a8028b3e..3bd68ae2344 100644 --- a/tests/agent/test_usage_pricing.py +++ b/tests/agent/test_usage_pricing.py @@ -250,3 +250,75 @@ def test_deepseek_v4_pro_estimate_usage_cost(): assert result.amount_usd is not None # 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48 assert float(result.amount_usd) == 3.48 + + +def test_bedrock_claude_rows_all_carry_cache_pricing(): + """Invariant: every Bedrock Claude pricing row must carry cache-read AND + cache-write rates, otherwise a cached session prices as ``unknown``. + + Bedrock Claude routes through the AnthropicBedrock SDK and injects + cache_control, so cached tokens are always reported — the pricing layer + must be able to value them. See #50295. + """ + from agent.usage_pricing import _OFFICIAL_DOCS_PRICING + + claude_rows = [ + (prov, model) + for (prov, model) in _OFFICIAL_DOCS_PRICING + if prov == "bedrock" and "claude" in model + ] + assert claude_rows, "expected at least one bedrock Claude pricing row" + for key in claude_rows: + entry = _OFFICIAL_DOCS_PRICING[key] + assert entry.input_cost_per_million is not None, key + assert entry.cache_read_cost_per_million is not None, key + assert entry.cache_write_cost_per_million is not None, key + # Cache reads are cheaper than fresh input; cache writes cost more. + assert entry.cache_read_cost_per_million < entry.input_cost_per_million, key + assert entry.cache_write_cost_per_million > entry.input_cost_per_million, key + + +def test_bedrock_cross_region_profile_prefix_resolves_to_pricing(): + """Cross-region inference profiles (us./global./eu. prefixes) must resolve + to the same pricing entry as the bare foundation-model id. Without prefix + normalization, ``us.anthropic.claude-*`` sessions price as unknown. + """ + bedrock_url = "https://bedrock-runtime.us-east-1.amazonaws.com" + bare = get_pricing_entry( + "anthropic.claude-sonnet-4-5", provider="bedrock", base_url=bedrock_url + ) + assert bare is not None + for prefix in ("us.", "global.", "eu."): + scoped = get_pricing_entry( + f"{prefix}anthropic.claude-sonnet-4-5", + provider="bedrock", + base_url=bedrock_url, + ) + assert scoped is not None, prefix + assert scoped.input_cost_per_million == bare.input_cost_per_million + assert scoped.cache_read_cost_per_million == bare.cache_read_cost_per_million + + +def test_bedrock_claude_cached_session_estimates_cost_not_unknown(): + """A Bedrock Claude session with cache hits must produce a dollar estimate, + not ``unknown`` — the user-visible symptom in #50295. + """ + bedrock_url = "https://bedrock-runtime.us-east-1.amazonaws.com" + usage = SimpleNamespace( + input_tokens=55, + output_tokens=7113, + cache_read_input_tokens=1369379, + cache_creation_input_tokens=42135, + ) + canonical = normalize_usage(usage, provider="bedrock", api_mode="anthropic_messages") + assert canonical.cache_read_tokens == 1369379 + assert canonical.cache_write_tokens == 42135 + + result = estimate_usage_cost( + "us.anthropic.claude-opus-4-6", + canonical, + provider="bedrock", + base_url=bedrock_url, + ) + assert result.status == "estimated" + assert result.amount_usd is not None