from types import SimpleNamespace from agent.usage_pricing import ( CanonicalUsage, estimate_usage_cost, get_pricing_entry, normalize_usage, ) def test_normalize_usage_anthropic_keeps_cache_buckets_separate(): usage = SimpleNamespace( input_tokens=1000, output_tokens=500, cache_read_input_tokens=2000, cache_creation_input_tokens=400, ) normalized = normalize_usage(usage, provider="anthropic", api_mode="anthropic_messages") assert normalized.input_tokens == 1000 assert normalized.output_tokens == 500 assert normalized.cache_read_tokens == 2000 assert normalized.cache_write_tokens == 400 assert normalized.prompt_tokens == 3400 def test_normalize_usage_openai_subtracts_cached_prompt_tokens(): usage = SimpleNamespace( prompt_tokens=3000, completion_tokens=700, prompt_tokens_details=SimpleNamespace(cached_tokens=1800), ) normalized = normalize_usage(usage, provider="openai", api_mode="chat_completions") assert normalized.input_tokens == 1200 assert normalized.cache_read_tokens == 1800 assert normalized.output_tokens == 700 def test_normalize_usage_openai_reads_top_level_anthropic_cache_fields(): """Some OpenAI-compatible proxies (OpenRouter, Cline) expose Anthropic-style cache token counts at the top level of the usage object when routing Claude models, instead of nesting them in prompt_tokens_details. Regression guard for the bug fixed in cline/cline#10266 — before this fix, the chat-completions branch of normalize_usage() only read prompt_tokens_details.cache_write_tokens and completely missed the cache_creation_input_tokens case, so cache writes showed as 0 and reflected inputTokens were overstated by the cache-write amount. """ usage = SimpleNamespace( prompt_tokens=1000, completion_tokens=200, prompt_tokens_details=SimpleNamespace(cached_tokens=500), cache_creation_input_tokens=300, ) normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions") # Expected: cache read from prompt_tokens_details.cached_tokens (preferred), # cache write from top-level cache_creation_input_tokens (fallback). assert normalized.cache_read_tokens == 500 assert normalized.cache_write_tokens == 300 # input_tokens = prompt_total - cache_read - cache_write = 1000 - 500 - 300 = 200 assert normalized.input_tokens == 200 assert normalized.output_tokens == 200 def test_normalize_usage_openai_reads_top_level_cache_read_when_details_missing(): """Some proxies expose only top-level Anthropic-style fields with no prompt_tokens_details object. Regression guard for cline/cline#10266. """ usage = SimpleNamespace( prompt_tokens=1000, completion_tokens=200, cache_read_input_tokens=500, cache_creation_input_tokens=300, ) normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions") assert normalized.cache_read_tokens == 500 assert normalized.cache_write_tokens == 300 assert normalized.input_tokens == 200 def test_normalize_usage_openai_prefers_prompt_tokens_details_over_top_level(): """When both prompt_tokens_details and top-level Anthropic fields are present, we prefer the OpenAI-standard nested fields. Top-level Anthropic fields are only a fallback when the nested ones are absent/zero. """ usage = SimpleNamespace( prompt_tokens=1000, completion_tokens=200, prompt_tokens_details=SimpleNamespace(cached_tokens=600, cache_write_tokens=150), # Intentionally different values — proving we ignore these when details exist. cache_read_input_tokens=999, cache_creation_input_tokens=999, ) normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions") assert normalized.cache_read_tokens == 600 assert normalized.cache_write_tokens == 150 def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch): monkeypatch.setattr( "agent.usage_pricing.fetch_model_metadata", lambda: { "anthropic/claude-opus-4.6": { "pricing": { "prompt": "0.000005", "completion": "0.000025", "input_cache_read": "0.0000005", "input_cache_write": "0.00000625", } } }, ) entry = get_pricing_entry( "anthropic/claude-opus-4.6", provider="openrouter", base_url="https://openrouter.ai/api/v1", ) assert float(entry.input_cost_per_million) == 5.0 assert float(entry.output_cost_per_million) == 25.0 assert float(entry.cache_read_cost_per_million) == 0.5 assert float(entry.cache_write_cost_per_million) == 6.25 def test_estimate_usage_cost_marks_subscription_routes_included(): result = estimate_usage_cost( "gpt-5.3-codex", CanonicalUsage(input_tokens=1000, output_tokens=500), provider="openai-codex", base_url="https://chatgpt.com/backend-api/codex", ) assert result.status == "included" assert float(result.amount_usd) == 0.0 def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(monkeypatch): monkeypatch.setattr( "agent.usage_pricing.fetch_model_metadata", lambda: { "google/gemini-2.5-pro": { "pricing": { "prompt": "0.00000125", "completion": "0.00001", } } }, ) result = estimate_usage_cost( "google/gemini-2.5-pro", CanonicalUsage(input_tokens=1000, output_tokens=500, cache_read_tokens=100), provider="openrouter", base_url="https://openrouter.ai/api/v1", ) assert result.status == "unknown" def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch): monkeypatch.setattr( "agent.usage_pricing.fetch_endpoint_model_metadata", lambda base_url, api_key=None: { "zai-org/GLM-5-TEE": { "pricing": { "prompt": "0.0000005", "completion": "0.000002", } } }, ) entry = get_pricing_entry( "zai-org/GLM-5-TEE", provider="custom", base_url="https://llm.chutes.ai/v1", api_key="test-key", ) assert float(entry.input_cost_per_million) == 0.5 assert float(entry.output_cost_per_million) == 2.0 def test_nous_portal_pricing_preserves_vendor_prefixed_model_ids(monkeypatch): seen = {} def _fake_fetch_endpoint_model_metadata(base_url, api_key=None): seen["base_url"] = base_url return { "openai/gpt-5.5-pro": { "pricing": { "prompt": "0.000025", "completion": "0.000125", } } } monkeypatch.setattr( "agent.usage_pricing.fetch_endpoint_model_metadata", _fake_fetch_endpoint_model_metadata, ) entry = get_pricing_entry("openai/gpt-5.5-pro", provider="nous") assert seen["base_url"] == "https://inference-api.nousresearch.com/v1" assert float(entry.input_cost_per_million) == 25.0 assert float(entry.output_cost_per_million) == 125.0 def test_deepseek_v4_pro_pricing_entry_exists(): """Regression test: deepseek-v4-pro must have a pricing entry. Before this fix, deepseek-v4-pro sessions showed as unknown cost in hermes insights because the _OFFICIAL_DOCS_PRICING table had no entry for that model. See #24218. """ entry = get_pricing_entry( "deepseek-v4-pro", provider="deepseek", ) assert entry is not None assert entry.input_cost_per_million is not None assert entry.output_cost_per_million is not None assert float(entry.input_cost_per_million) == 1.74 assert float(entry.output_cost_per_million) == 3.48 assert float(entry.cache_read_cost_per_million) == 0.0145 def test_deepseek_v4_pro_estimate_usage_cost(): """Ensure deepseek-v4-pro sessions get a dollar estimate, not unknown.""" result = estimate_usage_cost( "deepseek-v4-pro", CanonicalUsage(input_tokens=1000000, output_tokens=500000), provider="deepseek", ) assert result.status == "estimated" assert result.amount_usd is not None # 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48 assert float(result.amount_usd) == 3.48 def test_bedrock_claude_rows_all_carry_cache_pricing(): """Invariant: every Bedrock Claude pricing row must carry cache-read AND cache-write rates, otherwise a cached session prices as ``unknown``. Bedrock Claude routes through the AnthropicBedrock SDK and injects cache_control, so cached tokens are always reported — the pricing layer must be able to value them. See #50295. """ from agent.usage_pricing import _OFFICIAL_DOCS_PRICING claude_rows = [ (prov, model) for (prov, model) in _OFFICIAL_DOCS_PRICING if prov == "bedrock" and "claude" in model ] assert claude_rows, "expected at least one bedrock Claude pricing row" for key in claude_rows: entry = _OFFICIAL_DOCS_PRICING[key] assert entry.input_cost_per_million is not None, key assert entry.cache_read_cost_per_million is not None, key assert entry.cache_write_cost_per_million is not None, key # Cache reads are cheaper than fresh input; cache writes cost more. assert entry.cache_read_cost_per_million < entry.input_cost_per_million, key assert entry.cache_write_cost_per_million > entry.input_cost_per_million, key def test_bedrock_cross_region_profile_prefix_resolves_to_pricing(): """Cross-region inference profiles (us./global./eu. prefixes) must resolve to the same pricing entry as the bare foundation-model id. Without prefix normalization, ``us.anthropic.claude-*`` sessions price as unknown. """ bedrock_url = "https://bedrock-runtime.us-east-1.amazonaws.com" bare = get_pricing_entry( "anthropic.claude-sonnet-4-5", provider="bedrock", base_url=bedrock_url ) assert bare is not None for prefix in ("us.", "global.", "eu."): scoped = get_pricing_entry( f"{prefix}anthropic.claude-sonnet-4-5", provider="bedrock", base_url=bedrock_url, ) assert scoped is not None, prefix assert scoped.input_cost_per_million == bare.input_cost_per_million assert scoped.cache_read_cost_per_million == bare.cache_read_cost_per_million def test_bedrock_claude_cached_session_estimates_cost_not_unknown(): """A Bedrock Claude session with cache hits must produce a dollar estimate, not ``unknown`` — the user-visible symptom in #50295. """ bedrock_url = "https://bedrock-runtime.us-east-1.amazonaws.com" usage = SimpleNamespace( input_tokens=55, output_tokens=7113, cache_read_input_tokens=1369379, cache_creation_input_tokens=42135, ) canonical = normalize_usage(usage, provider="bedrock", api_mode="anthropic_messages") assert canonical.cache_read_tokens == 1369379 assert canonical.cache_write_tokens == 42135 result = estimate_usage_cost( "us.anthropic.claude-opus-4-6", canonical, provider="bedrock", base_url=bedrock_url, ) assert result.status == "estimated" assert result.amount_usd is not None