fix(analytics): prevent silent token loss and add Claude 4.5–4.7 pricing (#21455)

- Add pricing entries for Claude Opus 4.5/4.6/4.7, Sonnet 4.5/4.6, and
  Haiku 4.5 with updated source URLs (platform.claude.com)
- Add _normalize_anthropic_model_name() to handle dot-notation variants
  (e.g. claude-opus-4.7 → claude-opus-4-7) for pricing lookups
- Fix silent token loss: ensure session row exists before UPDATE in both
  run_agent.py and hermes_state.py (INSERT OR IGNORE is idempotent)
- Log token persistence failures at DEBUG level instead of swallowing
  them silently — makes undercounted analytics diagnosable
- Surface reasoning tokens in CLI /usage and TUI usage panel
- Add 'reasoning' and 'cost_status' fields to TUI Usage type
This commit is contained in:
Austin Pickett 2026-05-07 16:24:31 -04:00 committed by GitHub
parent cff821e2dc
commit d87c7b99e2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 186 additions and 16 deletions

View file

@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import re
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timezone from datetime import datetime, timezone
from decimal import Decimal from decimal import Decimal
@ -82,6 +83,121 @@ _UTC_NOW = lambda: datetime.now(timezone.utc)
# Official docs snapshot entries. Models whose published pricing and cache # Official docs snapshot entries. Models whose published pricing and cache
# semantics are stable enough to encode exactly. # semantics are stable enough to encode exactly.
_OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
# ── Anthropic Claude 4.7 ─────────────────────────────────────────────
# Opus 4.5/4.6/4.7 share $5/$25 pricing (new tokenizer, up to 35% more
# tokens for the same text).
# Source: https://platform.claude.com/docs/en/about-claude/pricing
(
"anthropic",
"claude-opus-4-7",
): PricingEntry(
input_cost_per_million=Decimal("5.00"),
output_cost_per_million=Decimal("25.00"),
cache_read_cost_per_million=Decimal("0.50"),
cache_write_cost_per_million=Decimal("6.25"),
source="official_docs_snapshot",
source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-05",
),
(
"anthropic",
"claude-opus-4-7-20250507",
): PricingEntry(
input_cost_per_million=Decimal("5.00"),
output_cost_per_million=Decimal("25.00"),
cache_read_cost_per_million=Decimal("0.50"),
cache_write_cost_per_million=Decimal("6.25"),
source="official_docs_snapshot",
source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-05",
),
# ── Anthropic Claude 4.6 ─────────────────────────────────────────────
(
"anthropic",
"claude-opus-4-6",
): PricingEntry(
input_cost_per_million=Decimal("5.00"),
output_cost_per_million=Decimal("25.00"),
cache_read_cost_per_million=Decimal("0.50"),
cache_write_cost_per_million=Decimal("6.25"),
source="official_docs_snapshot",
source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-05",
),
(
"anthropic",
"claude-opus-4-6-20250414",
): PricingEntry(
input_cost_per_million=Decimal("5.00"),
output_cost_per_million=Decimal("25.00"),
cache_read_cost_per_million=Decimal("0.50"),
cache_write_cost_per_million=Decimal("6.25"),
source="official_docs_snapshot",
source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-05",
),
(
"anthropic",
"claude-sonnet-4-6",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
cache_read_cost_per_million=Decimal("0.30"),
cache_write_cost_per_million=Decimal("3.75"),
source="official_docs_snapshot",
source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-05",
),
(
"anthropic",
"claude-sonnet-4-6-20250414",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
cache_read_cost_per_million=Decimal("0.30"),
cache_write_cost_per_million=Decimal("3.75"),
source="official_docs_snapshot",
source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-05",
),
# ── Anthropic Claude 4.5 ─────────────────────────────────────────────
(
"anthropic",
"claude-opus-4-5",
): PricingEntry(
input_cost_per_million=Decimal("5.00"),
output_cost_per_million=Decimal("25.00"),
cache_read_cost_per_million=Decimal("0.50"),
cache_write_cost_per_million=Decimal("6.25"),
source="official_docs_snapshot",
source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-05",
),
(
"anthropic",
"claude-sonnet-4-5",
): PricingEntry(
input_cost_per_million=Decimal("3.00"),
output_cost_per_million=Decimal("15.00"),
cache_read_cost_per_million=Decimal("0.30"),
cache_write_cost_per_million=Decimal("3.75"),
source="official_docs_snapshot",
source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-05",
),
(
"anthropic",
"claude-haiku-4-5",
): PricingEntry(
input_cost_per_million=Decimal("1.00"),
output_cost_per_million=Decimal("5.00"),
cache_read_cost_per_million=Decimal("0.10"),
cache_write_cost_per_million=Decimal("1.25"),
source="official_docs_snapshot",
source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-05",
),
# ── Anthropic Claude 4 / 4.1 ─────────────────────────────────────────
( (
"anthropic", "anthropic",
"claude-opus-4-20250514", "claude-opus-4-20250514",
@ -91,8 +207,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
cache_read_cost_per_million=Decimal("1.50"), cache_read_cost_per_million=Decimal("1.50"),
cache_write_cost_per_million=Decimal("18.75"), cache_write_cost_per_million=Decimal("18.75"),
source="official_docs_snapshot", source="official_docs_snapshot",
source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-prompt-caching-2026-03-16", pricing_version="anthropic-pricing-2026-05",
), ),
( (
"anthropic", "anthropic",
@ -103,8 +219,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
cache_read_cost_per_million=Decimal("0.30"), cache_read_cost_per_million=Decimal("0.30"),
cache_write_cost_per_million=Decimal("3.75"), cache_write_cost_per_million=Decimal("3.75"),
source="official_docs_snapshot", source="official_docs_snapshot",
source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-prompt-caching-2026-03-16", pricing_version="anthropic-pricing-2026-05",
), ),
# OpenAI # OpenAI
( (
@ -184,7 +300,7 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
source_url="https://openai.com/api/pricing/", source_url="https://openai.com/api/pricing/",
pricing_version="openai-pricing-2026-03-16", pricing_version="openai-pricing-2026-03-16",
), ),
# Anthropic older models (pre-4.6 generation) # ── Anthropic older models (pre-4.5 generation) ────────────────────────
( (
"anthropic", "anthropic",
"claude-3-5-sonnet-20241022", "claude-3-5-sonnet-20241022",
@ -194,8 +310,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
cache_read_cost_per_million=Decimal("0.30"), cache_read_cost_per_million=Decimal("0.30"),
cache_write_cost_per_million=Decimal("3.75"), cache_write_cost_per_million=Decimal("3.75"),
source="official_docs_snapshot", source="official_docs_snapshot",
source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-03-16", pricing_version="anthropic-pricing-2026-05",
), ),
( (
"anthropic", "anthropic",
@ -206,8 +322,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
cache_read_cost_per_million=Decimal("0.08"), cache_read_cost_per_million=Decimal("0.08"),
cache_write_cost_per_million=Decimal("1.00"), cache_write_cost_per_million=Decimal("1.00"),
source="official_docs_snapshot", source="official_docs_snapshot",
source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-03-16", pricing_version="anthropic-pricing-2026-05",
), ),
( (
"anthropic", "anthropic",
@ -218,8 +334,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
cache_read_cost_per_million=Decimal("1.50"), cache_read_cost_per_million=Decimal("1.50"),
cache_write_cost_per_million=Decimal("18.75"), cache_write_cost_per_million=Decimal("18.75"),
source="official_docs_snapshot", source="official_docs_snapshot",
source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-03-16", pricing_version="anthropic-pricing-2026-05",
), ),
( (
"anthropic", "anthropic",
@ -230,8 +346,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
cache_read_cost_per_million=Decimal("0.03"), cache_read_cost_per_million=Decimal("0.03"),
cache_write_cost_per_million=Decimal("0.30"), cache_write_cost_per_million=Decimal("0.30"),
source="official_docs_snapshot", source="official_docs_snapshot",
source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", source_url="https://platform.claude.com/docs/en/about-claude/pricing",
pricing_version="anthropic-pricing-2026-03-16", pricing_version="anthropic-pricing-2026-05",
), ),
# DeepSeek # DeepSeek
( (
@ -426,8 +542,37 @@ def resolve_billing_route(
return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown") return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
def _normalize_anthropic_model_name(model: str) -> str:
"""Normalize Anthropic model name variants to canonical form.
Handles:
- Dot notation: claude-opus-4.7 claude-opus-4-7
- Short aliases: claude-opus-4.7 claude-opus-4-7
- Strips anthropic/ prefix if present
"""
name = model.lower().strip()
if name.startswith("anthropic/"):
name = name[len("anthropic/"):]
# Normalize dots to dashes in version numbers (e.g. 4.7 → 4-7, 4.6 → 4-6)
# But preserve the rest of the name structure
name = re.sub(r"(\d+)\.(\d+)", r"\1-\2", name)
return name
def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]: def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]:
return _OFFICIAL_DOCS_PRICING.get((route.provider, route.model.lower())) model = route.model.lower()
# Direct lookup first
entry = _OFFICIAL_DOCS_PRICING.get((route.provider, model))
if entry:
return entry
# Try normalized name for Anthropic (handles dot-notation like opus-4.7)
if route.provider == "anthropic":
normalized = _normalize_anthropic_model_name(model)
if normalized != model:
entry = _OFFICIAL_DOCS_PRICING.get((route.provider, normalized))
if entry:
return entry
return None
def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]: def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:

3
cli.py
View file

@ -7991,6 +7991,7 @@ class HermesCLI:
output_tokens = getattr(agent, "session_output_tokens", 0) or 0 output_tokens = getattr(agent, "session_output_tokens", 0) or 0
cache_read_tokens = getattr(agent, "session_cache_read_tokens", 0) or 0 cache_read_tokens = getattr(agent, "session_cache_read_tokens", 0) or 0
cache_write_tokens = getattr(agent, "session_cache_write_tokens", 0) or 0 cache_write_tokens = getattr(agent, "session_cache_write_tokens", 0) or 0
reasoning_tokens = getattr(agent, "session_reasoning_tokens", 0) or 0
prompt = agent.session_prompt_tokens prompt = agent.session_prompt_tokens
completion = agent.session_completion_tokens completion = agent.session_completion_tokens
total = agent.session_total_tokens total = agent.session_total_tokens
@ -8022,6 +8023,8 @@ class HermesCLI:
print(f" Cache read tokens: {cache_read_tokens:>10,}") print(f" Cache read tokens: {cache_read_tokens:>10,}")
print(f" Cache write tokens: {cache_write_tokens:>10,}") print(f" Cache write tokens: {cache_write_tokens:>10,}")
print(f" Output tokens: {output_tokens:>10,}") print(f" Output tokens: {output_tokens:>10,}")
if reasoning_tokens:
print(f" ↳ Reasoning (subset): {reasoning_tokens:>10,}")
print(f" Prompt tokens (total): {prompt:>10,}") print(f" Prompt tokens (total): {prompt:>10,}")
print(f" Completion tokens: {completion:>10,}") print(f" Completion tokens: {completion:>10,}")
print(f" Total tokens: {total:>10,}") print(f" Total tokens: {total:>10,}")

View file

@ -612,6 +612,11 @@ class SessionDB:
the caller already holds cumulative totals (gateway path, where the the caller already holds cumulative totals (gateway path, where the
cached agent accumulates across messages). cached agent accumulates across messages).
""" """
# Ensure the session row exists so the UPDATE doesn't silently affect
# 0 rows. Under concurrent load (cron + kanban + delegate_task) the
# initial create_session() may have failed due to SQLite locking.
# INSERT OR IGNORE is cheap and idempotent.
self._insert_session_row(session_id, "unknown", model=model)
if absolute: if absolute:
sql = """UPDATE sessions SET sql = """UPDATE sessions SET
input_tokens = ?, input_tokens = ?,

View file

@ -12131,6 +12131,14 @@ class AIAgent:
# deltas instead of double-counting them. # deltas instead of double-counting them.
if self._session_db and self.session_id: if self._session_db and self.session_id:
try: try:
# Ensure the session row exists before attempting UPDATE.
# Under concurrent load (cron/kanban), the initial
# _ensure_db_session() may have failed due to SQLite
# locking. Retry here so per-call token deltas are
# not silently lost (UPDATE on a non-existent row
# affects 0 rows without error).
if not self._session_db_created:
self._ensure_db_session()
self._session_db.update_token_counts( self._session_db.update_token_counts(
self.session_id, self.session_id,
input_tokens=canonical_usage.input_tokens, input_tokens=canonical_usage.input_tokens,
@ -12149,8 +12157,14 @@ class AIAgent:
model=self.model, model=self.model,
api_call_count=1, api_call_count=1,
) )
except Exception: except Exception as e:
pass # never block the agent loop # Log token persistence failures so they're
# visible in agent.log — silent loss here is
# the root cause of undercounted analytics.
logger.debug(
"Token persistence failed (session=%s, tokens=%d): %s",
self.session_id, total_tokens, e,
)
if self.verbose_logging: if self.verbose_logging:
logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}") logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")

View file

@ -1280,6 +1280,7 @@ def _get_usage(agent) -> dict:
"output": g("session_output_tokens", "session_completion_tokens"), "output": g("session_output_tokens", "session_completion_tokens"),
"cache_read": g("session_cache_read_tokens"), "cache_read": g("session_cache_read_tokens"),
"cache_write": g("session_cache_write_tokens"), "cache_write": g("session_cache_write_tokens"),
"reasoning": g("session_reasoning_tokens"),
"prompt": g("session_prompt_tokens"), "prompt": g("session_prompt_tokens"),
"completion": g("session_completion_tokens"), "completion": g("session_completion_tokens"),
"total": g("session_total_tokens"), "total": g("session_total_tokens"),

View file

@ -164,9 +164,11 @@ export interface Usage {
context_max?: number context_max?: number
context_percent?: number context_percent?: number
context_used?: number context_used?: number
cost_status?: string
cost_usd?: number cost_usd?: number
input: number input: number
output: number output: number
reasoning?: number
total: number total: number
} }