From 42aeb4ecacb956e5dfcc6a70b1356276a8e53afb Mon Sep 17 00:00:00 2001 From: kshitijk4poor Date: Wed, 15 Apr 2026 12:16:58 +0530 Subject: [PATCH] fix(dashboard): include cache tokens in totals, track real API call count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The analytics dashboard had three accuracy issues: 1. TOTAL TOKENS excluded cache_read and cache_write tokens — only counted the non-cached input portion. With 90%+ cache hit rates typical in Hermes, this dramatically undercounted actual token usage (e.g. showing 9.1M when the real total was 169M+). 2. The 'API Calls' card displayed session count (COUNT(*) from sessions table), not actual LLM API requests. A single session makes 10-90 API calls through the tool loop, so this was ~30x lower than reality. 3. cache_write_tokens was stored in the DB but never exposed through the analytics API endpoint or frontend. Changes: - Add api_call_count column to sessions table (schema v7 migration) - Persist api_call_count=1 per LLM API call in run_agent.py - Analytics SQL queries now include cache_write_tokens and api_call_count in daily, by_model, and totals aggregations - Frontend TOTAL TOKENS card now shows input + cache_read + cache_write + output (the full prompt total + output) - API CALLS card now uses real api_call_count from DB - New Cache Hit Rate card shows cache efficiency percentage - Bar chart, tooltips, daily table, model table all use prompt totals (input + cache_read + cache_write) instead of just input - Labels changed from 'Input' to 'Prompt' to reflect the full prompt total - TypeScript interfaces and i18n strings updated (en + zh) --- hermes_cli/web_server.py | 18 +++++++--- hermes_state.py | 22 ++++++++++-- run_agent.py | 1 + tests/hermes_cli/test_web_server.py | 2 ++ tests/test_hermes_state.py | 31 ++++++++++++++-- web/src/i18n/en.ts | 4 ++- web/src/i18n/types.ts | 4 ++- web/src/i18n/zh.ts | 4 ++- web/src/lib/api.ts | 7 ++++ web/src/pages/AnalyticsPage.tsx | 55 +++++++++++++++++++++-------- 10 files changed, 121 insertions(+), 27 deletions(-) diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 22265faa5..7a0652b22 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -1977,7 +1977,8 @@ async def update_config_raw(body: RawConfigUpdate): @app.get("/api/analytics/usage") async def get_usage_analytics(days: int = 30): from hermes_state import SessionDB - db = SessionDB() + from hermes_constants import get_hermes_home + db = SessionDB(db_path=get_hermes_home() / "state.db") try: cutoff = time.time() - (days * 86400) cur = db._conn.execute(""" @@ -1985,10 +1986,12 @@ async def get_usage_analytics(days: int = 30): SUM(input_tokens) as input_tokens, SUM(output_tokens) as output_tokens, SUM(cache_read_tokens) as cache_read_tokens, + SUM(cache_write_tokens) as cache_write_tokens, SUM(reasoning_tokens) as reasoning_tokens, COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost, COALESCE(SUM(actual_cost_usd), 0) as actual_cost, - COUNT(*) as sessions + COUNT(*) as sessions, + SUM(COALESCE(api_call_count, 0)) as api_calls FROM sessions WHERE started_at > ? GROUP BY day ORDER BY day """, (cutoff,)) @@ -1998,10 +2001,13 @@ async def get_usage_analytics(days: int = 30): SELECT model, SUM(input_tokens) as input_tokens, SUM(output_tokens) as output_tokens, + SUM(cache_read_tokens) as cache_read_tokens, + SUM(cache_write_tokens) as cache_write_tokens, COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost, - COUNT(*) as sessions + COUNT(*) as sessions, + SUM(COALESCE(api_call_count, 0)) as api_calls FROM sessions WHERE started_at > ? AND model IS NOT NULL - GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC + GROUP BY model ORDER BY SUM(input_tokens) + SUM(cache_read_tokens) + SUM(cache_write_tokens) + SUM(output_tokens) DESC """, (cutoff,)) by_model = [dict(r) for r in cur2.fetchall()] @@ -2009,10 +2015,12 @@ async def get_usage_analytics(days: int = 30): SELECT SUM(input_tokens) as total_input, SUM(output_tokens) as total_output, SUM(cache_read_tokens) as total_cache_read, + SUM(cache_write_tokens) as total_cache_write, SUM(reasoning_tokens) as total_reasoning, COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost, COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost, - COUNT(*) as total_sessions + COUNT(*) as total_sessions, + SUM(COALESCE(api_call_count, 0)) as total_api_calls FROM sessions WHERE started_at > ? """, (cutoff,)) totals = dict(cur3.fetchone()) diff --git a/hermes_state.py b/hermes_state.py index 5e563666e..c0b174268 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -31,7 +31,7 @@ T = TypeVar("T") DEFAULT_DB_PATH = get_hermes_home() / "state.db" -SCHEMA_VERSION = 6 +SCHEMA_VERSION = 7 SCHEMA_SQL = """ CREATE TABLE IF NOT EXISTS schema_version ( @@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS sessions ( cost_source TEXT, pricing_version TEXT, title TEXT, + api_call_count INTEGER DEFAULT 0, FOREIGN KEY (parent_session_id) REFERENCES sessions(id) ); @@ -329,6 +330,17 @@ class SessionDB: except sqlite3.OperationalError: pass # Column already exists cursor.execute("UPDATE schema_version SET version = 6") + if current_version < 7: + # v7: add api_call_count column to sessions — tracks the number + # of individual LLM API calls made within a session (as opposed + # to the session count itself). + try: + cursor.execute( + 'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0' + ) + except sqlite3.OperationalError: + pass # Column already exists + cursor.execute("UPDATE schema_version SET version = 7") # Unique title index — always ensure it exists (safe to run after migrations # since the title column is guaranteed to exist at this point) @@ -426,6 +438,7 @@ class SessionDB: billing_provider: Optional[str] = None, billing_base_url: Optional[str] = None, billing_mode: Optional[str] = None, + api_call_count: int = 0, absolute: bool = False, ) -> None: """Update token counters and backfill model if not already set. @@ -455,7 +468,8 @@ class SessionDB: billing_provider = COALESCE(billing_provider, ?), billing_base_url = COALESCE(billing_base_url, ?), billing_mode = COALESCE(billing_mode, ?), - model = COALESCE(model, ?) + model = COALESCE(model, ?), + api_call_count = ? WHERE id = ?""" else: sql = """UPDATE sessions SET @@ -475,7 +489,8 @@ class SessionDB: billing_provider = COALESCE(billing_provider, ?), billing_base_url = COALESCE(billing_base_url, ?), billing_mode = COALESCE(billing_mode, ?), - model = COALESCE(model, ?) + model = COALESCE(model, ?), + api_call_count = COALESCE(api_call_count, 0) + ? WHERE id = ?""" params = ( input_tokens, @@ -493,6 +508,7 @@ class SessionDB: billing_base_url, billing_mode, model, + api_call_count, session_id, ) def _do(conn): diff --git a/run_agent.py b/run_agent.py index 48382389e..bd9782166 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8917,6 +8917,7 @@ class AIAgent: billing_mode="subscription_included" if cost_result.status == "included" else None, model=self.model, + api_call_count=1, ) except Exception: pass # never block the agent loop diff --git a/tests/hermes_cli/test_web_server.py b/tests/hermes_cli/test_web_server.py index 365e3d0fe..f28e00c2d 100644 --- a/tests/hermes_cli/test_web_server.py +++ b/tests/hermes_cli/test_web_server.py @@ -694,6 +694,8 @@ class TestNewEndpoints: assert "totals" in data assert isinstance(data["daily"], list) assert "total_sessions" in data["totals"] + assert "total_cache_write" in data["totals"] + assert "total_api_calls" in data["totals"] def test_session_token_endpoint_removed(self): """GET /api/auth/session-token no longer exists.""" diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index 5f9a16a52..c60fde764 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -62,6 +62,27 @@ class TestSessionLifecycle: assert session["input_tokens"] == 300 assert session["output_tokens"] == 150 + def test_update_token_counts_tracks_api_call_count(self, db): + """api_call_count increments with each update_token_counts call.""" + db.create_session(session_id="s1", source="cli") + db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1) + db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1) + db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1) + + session = db.get_session("s1") + assert session["api_call_count"] == 3 + + def test_update_token_counts_api_call_count_absolute(self, db): + """absolute mode sets api_call_count directly.""" + db.create_session(session_id="s1", source="cli") + db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1) + db.update_token_counts("s1", input_tokens=300, output_tokens=150, + api_call_count=5, absolute=True) + + session = db.get_session("s1") + assert session["api_call_count"] == 5 + assert session["input_tokens"] == 300 + def test_update_token_counts_backfills_model_when_null(self, db): db.create_session(session_id="s1", source="telegram") db.update_token_counts("s1", input_tokens=10, output_tokens=5, model="openai/gpt-5.4") @@ -935,7 +956,7 @@ class TestSchemaInit: def test_schema_version(self, db): cursor = db._conn.execute("SELECT version FROM schema_version") version = cursor.fetchone()[0] - assert version == 6 + assert version == 7 def test_title_column_exists(self, db): """Verify the title column was created in the sessions table.""" @@ -996,13 +1017,19 @@ class TestSchemaInit: # Verify migration cursor = migrated_db._conn.execute("SELECT version FROM schema_version") - assert cursor.fetchone()[0] == 6 + assert cursor.fetchone()[0] == 7 # Verify title column exists and is NULL for existing sessions session = migrated_db.get_session("existing") assert session is not None assert session["title"] is None + # Verify api_call_count column was added with default 0 + cursor = migrated_db._conn.execute( + "SELECT api_call_count FROM sessions WHERE id = 'existing'" + ) + assert cursor.fetchone()[0] == 0 + # Verify we can set title on migrated session assert migrated_db.set_session_title("existing", "Migrated Title") is True session = migrated_db.get_session("existing") diff --git a/web/src/i18n/en.ts b/web/src/i18n/en.ts index 3bf693f21..3f5c1e05c 100644 --- a/web/src/i18n/en.ts +++ b/web/src/i18n/en.ts @@ -112,11 +112,14 @@ export const en: Translations = { totalTokens: "Total Tokens", totalSessions: "Total Sessions", apiCalls: "API Calls", + cacheHitRate: "Cache Hit Rate", dailyTokenUsage: "Daily Token Usage", dailyBreakdown: "Daily Breakdown", perModelBreakdown: "Per-Model Breakdown", + prompt: "Prompt", input: "Input", output: "Output", + cached: "cached", total: "Total", noUsageData: "No usage data for this period", startSession: "Start a session to see analytics here", @@ -125,7 +128,6 @@ export const en: Translations = { tokens: "Tokens", perDayAvg: "/day avg", acrossModels: "across {count} models", - inOut: "{input} in / {output} out", }, logs: { diff --git a/web/src/i18n/types.ts b/web/src/i18n/types.ts index 34813c68f..bf44220dc 100644 --- a/web/src/i18n/types.ts +++ b/web/src/i18n/types.ts @@ -117,11 +117,14 @@ export interface Translations { totalTokens: string; totalSessions: string; apiCalls: string; + cacheHitRate: string; dailyTokenUsage: string; dailyBreakdown: string; perModelBreakdown: string; + prompt: string; input: string; output: string; + cached: string; total: string; noUsageData: string; startSession: string; @@ -130,7 +133,6 @@ export interface Translations { tokens: string; perDayAvg: string; acrossModels: string; - inOut: string; }; // ── Logs page ── diff --git a/web/src/i18n/zh.ts b/web/src/i18n/zh.ts index 18cb3ee38..05c6d3a90 100644 --- a/web/src/i18n/zh.ts +++ b/web/src/i18n/zh.ts @@ -112,11 +112,14 @@ export const zh: Translations = { totalTokens: "总 Token 数", totalSessions: "总会话数", apiCalls: "API 调用", + cacheHitRate: "缓存命中率", dailyTokenUsage: "每日 Token 用量", dailyBreakdown: "每日明细", perModelBreakdown: "模型用量明细", + prompt: "提示", input: "输入", output: "输出", + cached: "已缓存", total: "总计", noUsageData: "该时间段暂无使用数据", startSession: "开始会话后将在此显示分析数据", @@ -125,7 +128,6 @@ export const zh: Translations = { tokens: "Token", perDayAvg: "/天 平均", acrossModels: "共 {count} 个模型", - inOut: "输入 {input} / 输出 {output}", }, logs: { diff --git a/web/src/lib/api.ts b/web/src/lib/api.ts index e61043993..b7deda0b7 100644 --- a/web/src/lib/api.ts +++ b/web/src/lib/api.ts @@ -269,18 +269,23 @@ export interface AnalyticsDailyEntry { input_tokens: number; output_tokens: number; cache_read_tokens: number; + cache_write_tokens: number; reasoning_tokens: number; estimated_cost: number; actual_cost: number; sessions: number; + api_calls: number; } export interface AnalyticsModelEntry { model: string; input_tokens: number; output_tokens: number; + cache_read_tokens: number; + cache_write_tokens: number; estimated_cost: number; sessions: number; + api_calls: number; } export interface AnalyticsResponse { @@ -290,10 +295,12 @@ export interface AnalyticsResponse { total_input: number; total_output: number; total_cache_read: number; + total_cache_write: number; total_reasoning: number; total_estimated_cost: number; total_actual_cost: number; total_sessions: number; + total_api_calls: number; }; } diff --git a/web/src/pages/AnalyticsPage.tsx b/web/src/pages/AnalyticsPage.tsx index 2f947cbb6..408730306 100644 --- a/web/src/pages/AnalyticsPage.tsx +++ b/web/src/pages/AnalyticsPage.tsx @@ -4,6 +4,7 @@ import { Cpu, Hash, TrendingUp, + Zap, } from "lucide-react"; import { api } from "@/lib/api"; import type { AnalyticsResponse, AnalyticsDailyEntry, AnalyticsModelEntry } from "@/lib/api"; @@ -19,6 +20,11 @@ const PERIODS = [ const CHART_HEIGHT_PX = 160; +/** Compute total prompt tokens (input + cache_read + cache_write). */ +function getPromptTokens(d: { input_tokens: number; cache_read_tokens?: number; cache_write_tokens?: number }): number { + return d.input_tokens + (d.cache_read_tokens ?? 0) + (d.cache_write_tokens ?? 0); +} + function formatTokens(n: number): string { if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`; if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`; @@ -63,7 +69,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) { const { t } = useI18n(); if (daily.length === 0) return null; - const maxTokens = Math.max(...daily.map((d) => d.input_tokens + d.output_tokens), 1); + const maxTokens = Math.max(...daily.map((d) => getPromptTokens(d) + d.output_tokens), 1); return ( @@ -75,7 +81,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
- {t.analytics.input} + {t.analytics.prompt}
@@ -86,8 +92,9 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
{daily.map((d) => { - const total = d.input_tokens + d.output_tokens; - const inputH = Math.round((d.input_tokens / maxTokens) * CHART_HEIGHT_PX); + const promptTokens = getPromptTokens(d); + const total = promptTokens + d.output_tokens; + const inputH = Math.round((promptTokens / maxTokens) * CHART_HEIGHT_PX); const outputH = Math.round((d.output_tokens / maxTokens) * CHART_HEIGHT_PX); return (
{formatDate(d.day)}
-
{t.analytics.input}: {formatTokens(d.input_tokens)}
+
{t.analytics.prompt}: {formatTokens(promptTokens)}
{t.analytics.output}: {formatTokens(d.output_tokens)}
{t.analytics.total}: {formatTokens(total)}
@@ -152,18 +159,19 @@ function DailyTable({ daily }: { daily: AnalyticsDailyEntry[] }) { {t.analytics.date} {t.sessions.title} - {t.analytics.input} + {t.analytics.prompt} {t.analytics.output} {sorted.map((d) => { + const promptTokens = getPromptTokens(d); return ( {formatDate(d.day)} {d.sessions} - {formatTokens(d.input_tokens)} + {formatTokens(promptTokens)} {formatTokens(d.output_tokens)} @@ -184,7 +192,7 @@ function ModelTable({ models }: { models: AnalyticsModelEntry[] }) { if (models.length === 0) return null; const sorted = [...models].sort( - (a, b) => b.input_tokens + b.output_tokens - (a.input_tokens + a.output_tokens), + (a, b) => (getPromptTokens(b) + b.output_tokens) - (getPromptTokens(a) + a.output_tokens), ); return ( @@ -213,7 +221,7 @@ function ModelTable({ models }: { models: AnalyticsModelEntry[] }) { {m.sessions} - {formatTokens(m.input_tokens)} + {formatTokens(getPromptTokens(m))} {" / "} {formatTokens(m.output_tokens)} @@ -283,12 +291,17 @@ export default function AnalyticsPage() { {data && ( <> {/* Summary cards */} -
+
sum + d.sessions, 0))} + value={String(data.totals.total_api_calls ?? data.daily.reduce((sum, d) => sum + d.sessions, 0))} sub={t.analytics.acrossModels.replace("{count}", String(data.by_model.length))} /> + {(() => { + const promptSent = (data.totals.total_input ?? 0) + (data.totals.total_cache_read ?? 0); + const rate = promptSent > 0 + ? `${((data.totals.total_cache_read ?? 0) / promptSent * 100).toFixed(0)}%` + : "—"; + return ( + + ); + })()}
{/* Bar chart */}