diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py index 746f962097..467b72931c 100644 --- a/agent/usage_pricing.py +++ b/agent/usage_pricing.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from dataclasses import dataclass from datetime import datetime, timezone from decimal import Decimal @@ -82,6 +83,121 @@ _UTC_NOW = lambda: datetime.now(timezone.utc) # Official docs snapshot entries. Models whose published pricing and cache # semantics are stable enough to encode exactly. _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { + # ── Anthropic Claude 4.7 ───────────────────────────────────────────── + # Opus 4.5/4.6/4.7 share $5/$25 pricing (new tokenizer, up to 35% more + # tokens for the same text). + # Source: https://platform.claude.com/docs/en/about-claude/pricing + ( + "anthropic", + "claude-opus-4-7", + ): PricingEntry( + input_cost_per_million=Decimal("5.00"), + output_cost_per_million=Decimal("25.00"), + cache_read_cost_per_million=Decimal("0.50"), + cache_write_cost_per_million=Decimal("6.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-opus-4-7-20250507", + ): PricingEntry( + input_cost_per_million=Decimal("5.00"), + output_cost_per_million=Decimal("25.00"), + cache_read_cost_per_million=Decimal("0.50"), + cache_write_cost_per_million=Decimal("6.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + # ── Anthropic Claude 4.6 ───────────────────────────────────────────── + ( + "anthropic", + "claude-opus-4-6", + ): PricingEntry( + input_cost_per_million=Decimal("5.00"), + output_cost_per_million=Decimal("25.00"), + cache_read_cost_per_million=Decimal("0.50"), + cache_write_cost_per_million=Decimal("6.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-opus-4-6-20250414", + ): PricingEntry( + input_cost_per_million=Decimal("5.00"), + output_cost_per_million=Decimal("25.00"), + cache_read_cost_per_million=Decimal("0.50"), + cache_write_cost_per_million=Decimal("6.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-sonnet-4-6", + ): PricingEntry( + input_cost_per_million=Decimal("3.00"), + output_cost_per_million=Decimal("15.00"), + cache_read_cost_per_million=Decimal("0.30"), + cache_write_cost_per_million=Decimal("3.75"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-sonnet-4-6-20250414", + ): PricingEntry( + input_cost_per_million=Decimal("3.00"), + output_cost_per_million=Decimal("15.00"), + cache_read_cost_per_million=Decimal("0.30"), + cache_write_cost_per_million=Decimal("3.75"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + # ── Anthropic Claude 4.5 ───────────────────────────────────────────── + ( + "anthropic", + "claude-opus-4-5", + ): PricingEntry( + input_cost_per_million=Decimal("5.00"), + output_cost_per_million=Decimal("25.00"), + cache_read_cost_per_million=Decimal("0.50"), + cache_write_cost_per_million=Decimal("6.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-sonnet-4-5", + ): PricingEntry( + input_cost_per_million=Decimal("3.00"), + output_cost_per_million=Decimal("15.00"), + cache_read_cost_per_million=Decimal("0.30"), + cache_write_cost_per_million=Decimal("3.75"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + ( + "anthropic", + "claude-haiku-4-5", + ): PricingEntry( + input_cost_per_million=Decimal("1.00"), + output_cost_per_million=Decimal("5.00"), + cache_read_cost_per_million=Decimal("0.10"), + cache_write_cost_per_million=Decimal("1.25"), + source="official_docs_snapshot", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", + ), + # ── Anthropic Claude 4 / 4.1 ───────────────────────────────────────── ( "anthropic", "claude-opus-4-20250514", @@ -91,8 +207,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("1.50"), cache_write_cost_per_million=Decimal("18.75"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-prompt-caching-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), ( "anthropic", @@ -103,8 +219,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("0.30"), cache_write_cost_per_million=Decimal("3.75"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-prompt-caching-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), # OpenAI ( @@ -184,7 +300,7 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { source_url="https://openai.com/api/pricing/", pricing_version="openai-pricing-2026-03-16", ), - # Anthropic older models (pre-4.6 generation) + # ── Anthropic older models (pre-4.5 generation) ──────────────────────── ( "anthropic", "claude-3-5-sonnet-20241022", @@ -194,8 +310,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("0.30"), cache_write_cost_per_million=Decimal("3.75"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-pricing-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), ( "anthropic", @@ -206,8 +322,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("0.08"), cache_write_cost_per_million=Decimal("1.00"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-pricing-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), ( "anthropic", @@ -218,8 +334,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("1.50"), cache_write_cost_per_million=Decimal("18.75"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-pricing-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), ( "anthropic", @@ -230,8 +346,8 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = { cache_read_cost_per_million=Decimal("0.03"), cache_write_cost_per_million=Decimal("0.30"), source="official_docs_snapshot", - source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching", - pricing_version="anthropic-pricing-2026-03-16", + source_url="https://platform.claude.com/docs/en/about-claude/pricing", + pricing_version="anthropic-pricing-2026-05", ), # DeepSeek ( @@ -426,8 +542,37 @@ def resolve_billing_route( return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown") +def _normalize_anthropic_model_name(model: str) -> str: + """Normalize Anthropic model name variants to canonical form. + + Handles: + - Dot notation: claude-opus-4.7 → claude-opus-4-7 + - Short aliases: claude-opus-4.7 → claude-opus-4-7 + - Strips anthropic/ prefix if present + """ + name = model.lower().strip() + if name.startswith("anthropic/"): + name = name[len("anthropic/"):] + # Normalize dots to dashes in version numbers (e.g. 4.7 → 4-7, 4.6 → 4-6) + # But preserve the rest of the name structure + name = re.sub(r"(\d+)\.(\d+)", r"\1-\2", name) + return name + + def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]: - return _OFFICIAL_DOCS_PRICING.get((route.provider, route.model.lower())) + model = route.model.lower() + # Direct lookup first + entry = _OFFICIAL_DOCS_PRICING.get((route.provider, model)) + if entry: + return entry + # Try normalized name for Anthropic (handles dot-notation like opus-4.7) + if route.provider == "anthropic": + normalized = _normalize_anthropic_model_name(model) + if normalized != model: + entry = _OFFICIAL_DOCS_PRICING.get((route.provider, normalized)) + if entry: + return entry + return None def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]: diff --git a/cli.py b/cli.py index b802d00d26..08a9bb94ce 100644 --- a/cli.py +++ b/cli.py @@ -7991,6 +7991,7 @@ class HermesCLI: output_tokens = getattr(agent, "session_output_tokens", 0) or 0 cache_read_tokens = getattr(agent, "session_cache_read_tokens", 0) or 0 cache_write_tokens = getattr(agent, "session_cache_write_tokens", 0) or 0 + reasoning_tokens = getattr(agent, "session_reasoning_tokens", 0) or 0 prompt = agent.session_prompt_tokens completion = agent.session_completion_tokens total = agent.session_total_tokens @@ -8022,6 +8023,8 @@ class HermesCLI: print(f" Cache read tokens: {cache_read_tokens:>10,}") print(f" Cache write tokens: {cache_write_tokens:>10,}") print(f" Output tokens: {output_tokens:>10,}") + if reasoning_tokens: + print(f" ↳ Reasoning (subset): {reasoning_tokens:>10,}") print(f" Prompt tokens (total): {prompt:>10,}") print(f" Completion tokens: {completion:>10,}") print(f" Total tokens: {total:>10,}") diff --git a/hermes_state.py b/hermes_state.py index 444af16772..f31c360510 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -612,6 +612,11 @@ class SessionDB: the caller already holds cumulative totals (gateway path, where the cached agent accumulates across messages). """ + # Ensure the session row exists so the UPDATE doesn't silently affect + # 0 rows. Under concurrent load (cron + kanban + delegate_task) the + # initial create_session() may have failed due to SQLite locking. + # INSERT OR IGNORE is cheap and idempotent. + self._insert_session_row(session_id, "unknown", model=model) if absolute: sql = """UPDATE sessions SET input_tokens = ?, diff --git a/run_agent.py b/run_agent.py index d5f1dbef8d..403dba4e78 100644 --- a/run_agent.py +++ b/run_agent.py @@ -12131,6 +12131,14 @@ class AIAgent: # deltas instead of double-counting them. if self._session_db and self.session_id: try: + # Ensure the session row exists before attempting UPDATE. + # Under concurrent load (cron/kanban), the initial + # _ensure_db_session() may have failed due to SQLite + # locking. Retry here so per-call token deltas are + # not silently lost (UPDATE on a non-existent row + # affects 0 rows without error). + if not self._session_db_created: + self._ensure_db_session() self._session_db.update_token_counts( self.session_id, input_tokens=canonical_usage.input_tokens, @@ -12149,8 +12157,14 @@ class AIAgent: model=self.model, api_call_count=1, ) - except Exception: - pass # never block the agent loop + except Exception as e: + # Log token persistence failures so they're + # visible in agent.log — silent loss here is + # the root cause of undercounted analytics. + logger.debug( + "Token persistence failed (session=%s, tokens=%d): %s", + self.session_id, total_tokens, e, + ) if self.verbose_logging: logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}") diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 229aff17c0..7219b811e4 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1280,6 +1280,7 @@ def _get_usage(agent) -> dict: "output": g("session_output_tokens", "session_completion_tokens"), "cache_read": g("session_cache_read_tokens"), "cache_write": g("session_cache_write_tokens"), + "reasoning": g("session_reasoning_tokens"), "prompt": g("session_prompt_tokens"), "completion": g("session_completion_tokens"), "total": g("session_total_tokens"), diff --git a/ui-tui/src/types.ts b/ui-tui/src/types.ts index fb37a1826c..658b9cc13d 100644 --- a/ui-tui/src/types.ts +++ b/ui-tui/src/types.ts @@ -164,9 +164,11 @@ export interface Usage { context_max?: number context_percent?: number context_used?: number + cost_status?: string cost_usd?: number input: number output: number + reasoning?: number total: number }