mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat: add route-aware pricing estimates (#1695)
Salvaged from PR #1563 by @kshitijk4poor. Cherry-picked with authorship preserved. - Route-aware pricing architecture replacing static MODEL_PRICING + heuristics - Canonical usage normalization (Anthropic/OpenAI/Codex API shapes) - Cache-aware billing (separate cache_read/cache_write rates) - Cost status tracking (estimated/included/unknown/actual) - OpenRouter live pricing via models API - Schema migration v4→v5 with billing metadata columns - Removed speculative forward-looking entries - Removed cost display from CLI status bar - Threaded OpenRouter metadata pre-warm Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
This commit is contained in:
parent
d9b9987ad3
commit
d417ba2a48
14 changed files with 1694 additions and 254 deletions
101
tests/agent/test_usage_pricing.py
Normal file
101
tests/agent/test_usage_pricing.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
from types import SimpleNamespace
|
||||
|
||||
from agent.usage_pricing import (
|
||||
CanonicalUsage,
|
||||
estimate_usage_cost,
|
||||
get_pricing_entry,
|
||||
normalize_usage,
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_usage_anthropic_keeps_cache_buckets_separate():
|
||||
usage = SimpleNamespace(
|
||||
input_tokens=1000,
|
||||
output_tokens=500,
|
||||
cache_read_input_tokens=2000,
|
||||
cache_creation_input_tokens=400,
|
||||
)
|
||||
|
||||
normalized = normalize_usage(usage, provider="anthropic", api_mode="anthropic_messages")
|
||||
|
||||
assert normalized.input_tokens == 1000
|
||||
assert normalized.output_tokens == 500
|
||||
assert normalized.cache_read_tokens == 2000
|
||||
assert normalized.cache_write_tokens == 400
|
||||
assert normalized.prompt_tokens == 3400
|
||||
|
||||
|
||||
def test_normalize_usage_openai_subtracts_cached_prompt_tokens():
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=3000,
|
||||
completion_tokens=700,
|
||||
prompt_tokens_details=SimpleNamespace(cached_tokens=1800),
|
||||
)
|
||||
|
||||
normalized = normalize_usage(usage, provider="openai", api_mode="chat_completions")
|
||||
|
||||
assert normalized.input_tokens == 1200
|
||||
assert normalized.cache_read_tokens == 1800
|
||||
assert normalized.output_tokens == 700
|
||||
|
||||
|
||||
def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent.usage_pricing.fetch_model_metadata",
|
||||
lambda: {
|
||||
"anthropic/claude-opus-4.6": {
|
||||
"pricing": {
|
||||
"prompt": "0.000005",
|
||||
"completion": "0.000025",
|
||||
"input_cache_read": "0.0000005",
|
||||
"input_cache_write": "0.00000625",
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
entry = get_pricing_entry(
|
||||
"anthropic/claude-opus-4.6",
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
|
||||
assert float(entry.input_cost_per_million) == 5.0
|
||||
assert float(entry.output_cost_per_million) == 25.0
|
||||
assert float(entry.cache_read_cost_per_million) == 0.5
|
||||
assert float(entry.cache_write_cost_per_million) == 6.25
|
||||
|
||||
|
||||
def test_estimate_usage_cost_marks_subscription_routes_included():
|
||||
result = estimate_usage_cost(
|
||||
"gpt-5.3-codex",
|
||||
CanonicalUsage(input_tokens=1000, output_tokens=500),
|
||||
provider="openai-codex",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
)
|
||||
|
||||
assert result.status == "included"
|
||||
assert float(result.amount_usd) == 0.0
|
||||
|
||||
|
||||
def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent.usage_pricing.fetch_model_metadata",
|
||||
lambda: {
|
||||
"google/gemini-2.5-pro": {
|
||||
"pricing": {
|
||||
"prompt": "0.00000125",
|
||||
"completion": "0.00001",
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
result = estimate_usage_cost(
|
||||
"google/gemini-2.5-pro",
|
||||
CanonicalUsage(input_tokens=1000, output_tokens=500, cache_read_tokens=100),
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
|
||||
assert result.status == "unknown"
|
||||
|
|
@ -703,5 +703,15 @@ class TestLastPromptTokens:
|
|||
store.update_session("k1", model="openai/gpt-5.4")
|
||||
|
||||
store._db.update_token_counts.assert_called_once_with(
|
||||
"s1", 0, 0, model="openai/gpt-5.4"
|
||||
"s1",
|
||||
input_tokens=0,
|
||||
output_tokens=0,
|
||||
cache_read_tokens=0,
|
||||
cache_write_tokens=0,
|
||||
estimated_cost_usd=None,
|
||||
cost_status=None,
|
||||
cost_source=None,
|
||||
billing_provider=None,
|
||||
billing_base_url=None,
|
||||
model="openai/gpt-5.4",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -128,6 +128,13 @@ async def test_handle_message_persists_agent_token_counts(monkeypatch):
|
|||
session_entry.session_key,
|
||||
input_tokens=120,
|
||||
output_tokens=45,
|
||||
cache_read_tokens=0,
|
||||
cache_write_tokens=0,
|
||||
last_prompt_tokens=80,
|
||||
model="openai/test-model",
|
||||
estimated_cost_usd=None,
|
||||
cost_status=None,
|
||||
cost_source=None,
|
||||
provider=None,
|
||||
base_url=None,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -16,6 +16,10 @@ def _make_cli(model: str = "anthropic/claude-sonnet-4-20250514"):
|
|||
def _attach_agent(
|
||||
cli_obj,
|
||||
*,
|
||||
input_tokens: int | None = None,
|
||||
output_tokens: int | None = None,
|
||||
cache_read_tokens: int = 0,
|
||||
cache_write_tokens: int = 0,
|
||||
prompt_tokens: int,
|
||||
completion_tokens: int,
|
||||
total_tokens: int,
|
||||
|
|
@ -26,6 +30,12 @@ def _attach_agent(
|
|||
):
|
||||
cli_obj.agent = SimpleNamespace(
|
||||
model=cli_obj.model,
|
||||
provider="anthropic" if cli_obj.model.startswith("anthropic/") else None,
|
||||
base_url="",
|
||||
session_input_tokens=input_tokens if input_tokens is not None else prompt_tokens,
|
||||
session_output_tokens=output_tokens if output_tokens is not None else completion_tokens,
|
||||
session_cache_read_tokens=cache_read_tokens,
|
||||
session_cache_write_tokens=cache_write_tokens,
|
||||
session_prompt_tokens=prompt_tokens,
|
||||
session_completion_tokens=completion_tokens,
|
||||
session_total_tokens=total_tokens,
|
||||
|
|
@ -68,20 +78,19 @@ class TestCLIStatusBar:
|
|||
assert "$0.06" not in text # cost hidden by default
|
||||
assert "15m" in text
|
||||
|
||||
def test_build_status_bar_text_shows_cost_when_enabled(self):
|
||||
def test_build_status_bar_text_no_cost_in_status_bar(self):
|
||||
cli_obj = _attach_agent(
|
||||
_make_cli(),
|
||||
prompt_tokens=10000,
|
||||
completion_tokens=2400,
|
||||
total_tokens=12400,
|
||||
completion_tokens=5000,
|
||||
total_tokens=15000,
|
||||
api_calls=7,
|
||||
context_tokens=12400,
|
||||
context_tokens=50000,
|
||||
context_length=200_000,
|
||||
)
|
||||
cli_obj.show_cost = True
|
||||
|
||||
text = cli_obj._build_status_bar_text(width=120)
|
||||
assert "$" in text # cost is shown when enabled
|
||||
assert "$" not in text # cost is never shown in status bar
|
||||
|
||||
def test_build_status_bar_text_collapses_for_narrow_terminal(self):
|
||||
cli_obj = _attach_agent(
|
||||
|
|
@ -128,8 +137,8 @@ class TestCLIUsageReport:
|
|||
output = capsys.readouterr().out
|
||||
|
||||
assert "Model:" in output
|
||||
assert "Input cost:" in output
|
||||
assert "Output cost:" in output
|
||||
assert "Cost status:" in output
|
||||
assert "Cost source:" in output
|
||||
assert "Total cost:" in output
|
||||
assert "$" in output
|
||||
assert "0.064" in output
|
||||
|
|
|
|||
|
|
@ -657,7 +657,7 @@ class TestSchemaInit:
|
|||
def test_schema_version(self, db):
|
||||
cursor = db._conn.execute("SELECT version FROM schema_version")
|
||||
version = cursor.fetchone()[0]
|
||||
assert version == 4
|
||||
assert version == 5
|
||||
|
||||
def test_title_column_exists(self, db):
|
||||
"""Verify the title column was created in the sessions table."""
|
||||
|
|
@ -713,12 +713,12 @@ class TestSchemaInit:
|
|||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
# Open with SessionDB — should migrate to v4
|
||||
# Open with SessionDB — should migrate to v5
|
||||
migrated_db = SessionDB(db_path=db_path)
|
||||
|
||||
# Verify migration
|
||||
cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
|
||||
assert cursor.fetchone()[0] == 4
|
||||
assert cursor.fetchone()[0] == 5
|
||||
|
||||
# Verify title column exists and is NULL for existing sessions
|
||||
session = migrated_db.get_session("existing")
|
||||
|
|
|
|||
|
|
@ -123,28 +123,16 @@ def populated_db(db):
|
|||
# =========================================================================
|
||||
|
||||
class TestPricing:
|
||||
def test_exact_match(self):
|
||||
pricing = _get_pricing("gpt-4o")
|
||||
assert pricing["input"] == 2.50
|
||||
assert pricing["output"] == 10.00
|
||||
|
||||
def test_provider_prefix_stripped(self):
|
||||
pricing = _get_pricing("anthropic/claude-sonnet-4-20250514")
|
||||
assert pricing["input"] == 3.00
|
||||
assert pricing["output"] == 15.00
|
||||
|
||||
def test_prefix_match(self):
|
||||
pricing = _get_pricing("claude-3-5-sonnet-20241022")
|
||||
assert pricing["input"] == 3.00
|
||||
|
||||
def test_keyword_heuristic_opus(self):
|
||||
def test_unknown_models_do_not_use_heuristics(self):
|
||||
pricing = _get_pricing("some-new-opus-model")
|
||||
assert pricing["input"] == 15.00
|
||||
assert pricing["output"] == 75.00
|
||||
|
||||
def test_keyword_heuristic_haiku(self):
|
||||
assert pricing == _DEFAULT_PRICING
|
||||
pricing = _get_pricing("anthropic/claude-haiku-future")
|
||||
assert pricing["input"] == 0.80
|
||||
assert pricing == _DEFAULT_PRICING
|
||||
|
||||
def test_unknown_model_returns_zero_cost(self):
|
||||
"""Unknown/custom models should NOT have fabricated costs."""
|
||||
|
|
@ -168,40 +156,12 @@ class TestPricing:
|
|||
pricing = _get_pricing("")
|
||||
assert pricing == _DEFAULT_PRICING
|
||||
|
||||
def test_deepseek_heuristic(self):
|
||||
pricing = _get_pricing("deepseek-v3")
|
||||
assert pricing["input"] == 0.14
|
||||
|
||||
def test_gemini_heuristic(self):
|
||||
pricing = _get_pricing("gemini-3.0-ultra")
|
||||
assert pricing["input"] == 0.15
|
||||
|
||||
def test_dated_model_gpt4o_mini(self):
|
||||
"""gpt-4o-mini-2024-07-18 should match gpt-4o-mini, NOT gpt-4o."""
|
||||
pricing = _get_pricing("gpt-4o-mini-2024-07-18")
|
||||
assert pricing["input"] == 0.15 # gpt-4o-mini price, not gpt-4o's 2.50
|
||||
|
||||
def test_dated_model_o3_mini(self):
|
||||
"""o3-mini-2025-01-31 should match o3-mini, NOT o3."""
|
||||
pricing = _get_pricing("o3-mini-2025-01-31")
|
||||
assert pricing["input"] == 1.10 # o3-mini price, not o3's 10.00
|
||||
|
||||
def test_dated_model_gpt41_mini(self):
|
||||
"""gpt-4.1-mini-2025-04-14 should match gpt-4.1-mini, NOT gpt-4.1."""
|
||||
pricing = _get_pricing("gpt-4.1-mini-2025-04-14")
|
||||
assert pricing["input"] == 0.40 # gpt-4.1-mini, not gpt-4.1's 2.00
|
||||
|
||||
def test_dated_model_gpt41_nano(self):
|
||||
"""gpt-4.1-nano-2025-04-14 should match gpt-4.1-nano, NOT gpt-4.1."""
|
||||
pricing = _get_pricing("gpt-4.1-nano-2025-04-14")
|
||||
assert pricing["input"] == 0.10 # gpt-4.1-nano, not gpt-4.1's 2.00
|
||||
|
||||
|
||||
class TestHasKnownPricing:
|
||||
def test_known_commercial_model(self):
|
||||
assert _has_known_pricing("gpt-4o") is True
|
||||
assert _has_known_pricing("gpt-4o", provider="openai") is True
|
||||
assert _has_known_pricing("anthropic/claude-sonnet-4-20250514") is True
|
||||
assert _has_known_pricing("deepseek-chat") is True
|
||||
assert _has_known_pricing("gpt-4.1", provider="openai") is True
|
||||
|
||||
def test_unknown_custom_model(self):
|
||||
assert _has_known_pricing("FP16_Hermes_4.5") is False
|
||||
|
|
@ -210,26 +170,39 @@ class TestHasKnownPricing:
|
|||
assert _has_known_pricing("") is False
|
||||
assert _has_known_pricing(None) is False
|
||||
|
||||
def test_heuristic_matched_models(self):
|
||||
"""Models matched by keyword heuristics should be considered known."""
|
||||
assert _has_known_pricing("some-opus-model") is True
|
||||
assert _has_known_pricing("future-sonnet-v2") is True
|
||||
def test_heuristic_matched_models_are_not_considered_known(self):
|
||||
assert _has_known_pricing("some-opus-model") is False
|
||||
assert _has_known_pricing("future-sonnet-v2") is False
|
||||
|
||||
|
||||
class TestEstimateCost:
|
||||
def test_basic_cost(self):
|
||||
# gpt-4o: 2.50/M input, 10.00/M output
|
||||
cost = _estimate_cost("gpt-4o", 1_000_000, 1_000_000)
|
||||
assert cost == pytest.approx(12.50, abs=0.01)
|
||||
cost, status = _estimate_cost(
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
1_000_000,
|
||||
1_000_000,
|
||||
provider="anthropic",
|
||||
)
|
||||
assert status == "estimated"
|
||||
assert cost == pytest.approx(18.0, abs=0.01)
|
||||
|
||||
def test_zero_tokens(self):
|
||||
cost = _estimate_cost("gpt-4o", 0, 0)
|
||||
cost, status = _estimate_cost("gpt-4o", 0, 0, provider="openai")
|
||||
assert status == "estimated"
|
||||
assert cost == 0.0
|
||||
|
||||
def test_small_usage(self):
|
||||
cost = _estimate_cost("gpt-4o", 1000, 500)
|
||||
# 1000 * 2.50/1M + 500 * 10.00/1M = 0.0025 + 0.005 = 0.0075
|
||||
assert cost == pytest.approx(0.0075, abs=0.0001)
|
||||
def test_cache_aware_usage(self):
|
||||
cost, status = _estimate_cost(
|
||||
"anthropic/claude-sonnet-4-20250514",
|
||||
1000,
|
||||
500,
|
||||
cache_read_tokens=2000,
|
||||
cache_write_tokens=400,
|
||||
provider="anthropic",
|
||||
)
|
||||
assert status == "estimated"
|
||||
expected = (1000 * 3.0 + 500 * 15.0 + 2000 * 0.30 + 400 * 3.75) / 1_000_000
|
||||
assert cost == pytest.approx(expected, abs=0.0001)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
|
|
@ -660,8 +633,13 @@ class TestEdgeCases:
|
|||
|
||||
def test_mixed_commercial_and_custom_models(self, db):
|
||||
"""Mix of commercial and custom models: only commercial ones get costs."""
|
||||
db.create_session(session_id="s1", source="cli", model="gpt-4o")
|
||||
db.update_token_counts("s1", input_tokens=10000, output_tokens=5000)
|
||||
db.create_session(session_id="s1", source="cli", model="anthropic/claude-sonnet-4-20250514")
|
||||
db.update_token_counts(
|
||||
"s1",
|
||||
input_tokens=10000,
|
||||
output_tokens=5000,
|
||||
billing_provider="anthropic",
|
||||
)
|
||||
db.create_session(session_id="s2", source="cli", model="my-local-llama")
|
||||
db.update_token_counts("s2", input_tokens=10000, output_tokens=5000)
|
||||
db._conn.commit()
|
||||
|
|
@ -672,13 +650,13 @@ class TestEdgeCases:
|
|||
# Cost should only come from gpt-4o, not from the custom model
|
||||
overview = report["overview"]
|
||||
assert overview["estimated_cost"] > 0
|
||||
assert "gpt-4o" in overview["models_with_pricing"] # list now, not set
|
||||
assert "claude-sonnet-4-20250514" in overview["models_with_pricing"] # list now, not set
|
||||
assert "my-local-llama" in overview["models_without_pricing"]
|
||||
|
||||
# Verify individual model entries
|
||||
gpt = next(m for m in report["models"] if m["model"] == "gpt-4o")
|
||||
assert gpt["has_pricing"] is True
|
||||
assert gpt["cost"] > 0
|
||||
claude = next(m for m in report["models"] if m["model"] == "claude-sonnet-4-20250514")
|
||||
assert claude["has_pricing"] is True
|
||||
assert claude["cost"] > 0
|
||||
|
||||
llama = next(m for m in report["models"] if m["model"] == "my-local-llama")
|
||||
assert llama["has_pricing"] is False
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue