hermes-agent/tests/agent/test_usage_pricing.py
zccyman 88ede807c4 fix(pricing): add deepseek-v4-pro to official docs pricing table
deepseek-v4-pro has been routable since v0.12 but was missing from
the _OFFICIAL_DOCS_PRICING table. Sessions using this model showed
as "unknown cost" in hermes insights instead of a dollar estimate.

Add pricing entry using published list prices:
- input: \$1.74/M tokens
- output: \$3.48/M tokens
- cache_read: \$0.0145/M tokens

Uses standard list rates (not the 75% promo) so estimates remain
accurate after promo expires 2026-05-31.

Closes #24218
2026-05-12 16:32:57 -07:00

226 lines
7.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from types import SimpleNamespace
from agent.usage_pricing import (
CanonicalUsage,
estimate_usage_cost,
get_pricing_entry,
normalize_usage,
)
def test_normalize_usage_anthropic_keeps_cache_buckets_separate():
usage = SimpleNamespace(
input_tokens=1000,
output_tokens=500,
cache_read_input_tokens=2000,
cache_creation_input_tokens=400,
)
normalized = normalize_usage(usage, provider="anthropic", api_mode="anthropic_messages")
assert normalized.input_tokens == 1000
assert normalized.output_tokens == 500
assert normalized.cache_read_tokens == 2000
assert normalized.cache_write_tokens == 400
assert normalized.prompt_tokens == 3400
def test_normalize_usage_openai_subtracts_cached_prompt_tokens():
usage = SimpleNamespace(
prompt_tokens=3000,
completion_tokens=700,
prompt_tokens_details=SimpleNamespace(cached_tokens=1800),
)
normalized = normalize_usage(usage, provider="openai", api_mode="chat_completions")
assert normalized.input_tokens == 1200
assert normalized.cache_read_tokens == 1800
assert normalized.output_tokens == 700
def test_normalize_usage_openai_reads_top_level_anthropic_cache_fields():
"""Some OpenAI-compatible proxies (OpenRouter, Vercel AI Gateway, Cline) expose
Anthropic-style cache token counts at the top level of the usage object when
routing Claude models, instead of nesting them in prompt_tokens_details.
Regression guard for the bug fixed in cline/cline#10266 — before this fix,
the chat-completions branch of normalize_usage() only read
prompt_tokens_details.cache_write_tokens and completely missed the
cache_creation_input_tokens case, so cache writes showed as 0 and reflected
inputTokens were overstated by the cache-write amount.
"""
usage = SimpleNamespace(
prompt_tokens=1000,
completion_tokens=200,
prompt_tokens_details=SimpleNamespace(cached_tokens=500),
cache_creation_input_tokens=300,
)
normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
# Expected: cache read from prompt_tokens_details.cached_tokens (preferred),
# cache write from top-level cache_creation_input_tokens (fallback).
assert normalized.cache_read_tokens == 500
assert normalized.cache_write_tokens == 300
# input_tokens = prompt_total - cache_read - cache_write = 1000 - 500 - 300 = 200
assert normalized.input_tokens == 200
assert normalized.output_tokens == 200
def test_normalize_usage_openai_reads_top_level_cache_read_when_details_missing():
"""Some proxies expose only top-level Anthropic-style fields with no
prompt_tokens_details object. Regression guard for cline/cline#10266.
"""
usage = SimpleNamespace(
prompt_tokens=1000,
completion_tokens=200,
cache_read_input_tokens=500,
cache_creation_input_tokens=300,
)
normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
assert normalized.cache_read_tokens == 500
assert normalized.cache_write_tokens == 300
assert normalized.input_tokens == 200
def test_normalize_usage_openai_prefers_prompt_tokens_details_over_top_level():
"""When both prompt_tokens_details and top-level Anthropic fields are
present, we prefer the OpenAI-standard nested fields. Top-level Anthropic
fields are only a fallback when the nested ones are absent/zero.
"""
usage = SimpleNamespace(
prompt_tokens=1000,
completion_tokens=200,
prompt_tokens_details=SimpleNamespace(cached_tokens=600, cache_write_tokens=150),
# Intentionally different values — proving we ignore these when details exist.
cache_read_input_tokens=999,
cache_creation_input_tokens=999,
)
normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
assert normalized.cache_read_tokens == 600
assert normalized.cache_write_tokens == 150
def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch):
monkeypatch.setattr(
"agent.usage_pricing.fetch_model_metadata",
lambda: {
"anthropic/claude-opus-4.6": {
"pricing": {
"prompt": "0.000005",
"completion": "0.000025",
"input_cache_read": "0.0000005",
"input_cache_write": "0.00000625",
}
}
},
)
entry = get_pricing_entry(
"anthropic/claude-opus-4.6",
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
)
assert float(entry.input_cost_per_million) == 5.0
assert float(entry.output_cost_per_million) == 25.0
assert float(entry.cache_read_cost_per_million) == 0.5
assert float(entry.cache_write_cost_per_million) == 6.25
def test_estimate_usage_cost_marks_subscription_routes_included():
result = estimate_usage_cost(
"gpt-5.3-codex",
CanonicalUsage(input_tokens=1000, output_tokens=500),
provider="openai-codex",
base_url="https://chatgpt.com/backend-api/codex",
)
assert result.status == "included"
assert float(result.amount_usd) == 0.0
def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(monkeypatch):
monkeypatch.setattr(
"agent.usage_pricing.fetch_model_metadata",
lambda: {
"google/gemini-2.5-pro": {
"pricing": {
"prompt": "0.00000125",
"completion": "0.00001",
}
}
},
)
result = estimate_usage_cost(
"google/gemini-2.5-pro",
CanonicalUsage(input_tokens=1000, output_tokens=500, cache_read_tokens=100),
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
)
assert result.status == "unknown"
def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
monkeypatch.setattr(
"agent.usage_pricing.fetch_endpoint_model_metadata",
lambda base_url, api_key=None: {
"zai-org/GLM-5-TEE": {
"pricing": {
"prompt": "0.0000005",
"completion": "0.000002",
}
}
},
)
entry = get_pricing_entry(
"zai-org/GLM-5-TEE",
provider="custom",
base_url="https://llm.chutes.ai/v1",
api_key="test-key",
)
assert float(entry.input_cost_per_million) == 0.5
assert float(entry.output_cost_per_million) == 2.0
def test_deepseek_v4_pro_pricing_entry_exists():
"""Regression test: deepseek-v4-pro must have a pricing entry.
Before this fix, deepseek-v4-pro sessions showed as unknown cost
in hermes insights because the _OFFICIAL_DOCS_PRICING table had no
entry for that model. See #24218.
"""
entry = get_pricing_entry(
"deepseek-v4-pro",
provider="deepseek",
)
assert entry is not None
assert entry.input_cost_per_million is not None
assert entry.output_cost_per_million is not None
assert float(entry.input_cost_per_million) == 1.74
assert float(entry.output_cost_per_million) == 3.48
assert float(entry.cache_read_cost_per_million) == 0.0145
def test_deepseek_v4_pro_estimate_usage_cost():
"""Ensure deepseek-v4-pro sessions get a dollar estimate, not unknown."""
result = estimate_usage_cost(
"deepseek-v4-pro",
CanonicalUsage(input_tokens=1000000, output_tokens=500000),
provider="deepseek",
)
assert result.status == "estimated"
assert result.amount_usd is not None
# 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48
assert float(result.amount_usd) == 3.48