fix(dashboard): include cache tokens in totals, track real API call count

The analytics dashboard had three accuracy issues:

1. TOTAL TOKENS excluded cache_read and cache_write tokens — only counted
   the non-cached input portion. With 90%+ cache hit rates typical in
   Hermes, this dramatically undercounted actual token usage (e.g. showing
   9.1M when the real total was 169M+).

2. The 'API Calls' card displayed session count (COUNT(*) from sessions
   table), not actual LLM API requests. A single session makes 10-90 API
   calls through the tool loop, so this was ~30x lower than reality.

3. cache_write_tokens was stored in the DB but never exposed through the
   analytics API endpoint or frontend.

Changes:
- Add api_call_count column to sessions table (schema v7 migration)
- Persist api_call_count=1 per LLM API call in run_agent.py
- Analytics SQL queries now include cache_write_tokens and api_call_count
  in daily, by_model, and totals aggregations
- Frontend TOTAL TOKENS card now shows input + cache_read + cache_write +
  output (the full prompt total + output)
- API CALLS card now uses real api_call_count from DB
- New Cache Hit Rate card shows cache efficiency percentage
- Bar chart, tooltips, daily table, model table all use prompt totals
  (input + cache_read + cache_write) instead of just input
- Labels changed from 'Input' to 'Prompt' to reflect the full prompt total
- TypeScript interfaces and i18n strings updated (en + zh)
This commit is contained in:
kshitijk4poor 2026-04-15 12:16:58 +05:30
parent da8bab77fb
commit 42aeb4ecac
10 changed files with 121 additions and 27 deletions

View file

@ -1977,7 +1977,8 @@ async def update_config_raw(body: RawConfigUpdate):
@app.get("/api/analytics/usage")
async def get_usage_analytics(days: int = 30):
from hermes_state import SessionDB
db = SessionDB()
from hermes_constants import get_hermes_home
db = SessionDB(db_path=get_hermes_home() / "state.db")
try:
cutoff = time.time() - (days * 86400)
cur = db._conn.execute("""
@ -1985,10 +1986,12 @@ async def get_usage_analytics(days: int = 30):
SUM(input_tokens) as input_tokens,
SUM(output_tokens) as output_tokens,
SUM(cache_read_tokens) as cache_read_tokens,
SUM(cache_write_tokens) as cache_write_tokens,
SUM(reasoning_tokens) as reasoning_tokens,
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
COUNT(*) as sessions
COUNT(*) as sessions,
SUM(COALESCE(api_call_count, 0)) as api_calls
FROM sessions WHERE started_at > ?
GROUP BY day ORDER BY day
""", (cutoff,))
@ -1998,10 +2001,13 @@ async def get_usage_analytics(days: int = 30):
SELECT model,
SUM(input_tokens) as input_tokens,
SUM(output_tokens) as output_tokens,
SUM(cache_read_tokens) as cache_read_tokens,
SUM(cache_write_tokens) as cache_write_tokens,
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
COUNT(*) as sessions
COUNT(*) as sessions,
SUM(COALESCE(api_call_count, 0)) as api_calls
FROM sessions WHERE started_at > ? AND model IS NOT NULL
GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
GROUP BY model ORDER BY SUM(input_tokens) + SUM(cache_read_tokens) + SUM(cache_write_tokens) + SUM(output_tokens) DESC
""", (cutoff,))
by_model = [dict(r) for r in cur2.fetchall()]
@ -2009,10 +2015,12 @@ async def get_usage_analytics(days: int = 30):
SELECT SUM(input_tokens) as total_input,
SUM(output_tokens) as total_output,
SUM(cache_read_tokens) as total_cache_read,
SUM(cache_write_tokens) as total_cache_write,
SUM(reasoning_tokens) as total_reasoning,
COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
COUNT(*) as total_sessions
COUNT(*) as total_sessions,
SUM(COALESCE(api_call_count, 0)) as total_api_calls
FROM sessions WHERE started_at > ?
""", (cutoff,))
totals = dict(cur3.fetchone())

View file

@ -31,7 +31,7 @@ T = TypeVar("T")
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
SCHEMA_VERSION = 6
SCHEMA_VERSION = 7
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS schema_version (
@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS sessions (
cost_source TEXT,
pricing_version TEXT,
title TEXT,
api_call_count INTEGER DEFAULT 0,
FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
);
@ -329,6 +330,17 @@ class SessionDB:
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 6")
if current_version < 7:
# v7: add api_call_count column to sessions — tracks the number
# of individual LLM API calls made within a session (as opposed
# to the session count itself).
try:
cursor.execute(
'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
)
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 7")
# Unique title index — always ensure it exists (safe to run after migrations
# since the title column is guaranteed to exist at this point)
@ -426,6 +438,7 @@ class SessionDB:
billing_provider: Optional[str] = None,
billing_base_url: Optional[str] = None,
billing_mode: Optional[str] = None,
api_call_count: int = 0,
absolute: bool = False,
) -> None:
"""Update token counters and backfill model if not already set.
@ -455,7 +468,8 @@ class SessionDB:
billing_provider = COALESCE(billing_provider, ?),
billing_base_url = COALESCE(billing_base_url, ?),
billing_mode = COALESCE(billing_mode, ?),
model = COALESCE(model, ?)
model = COALESCE(model, ?),
api_call_count = ?
WHERE id = ?"""
else:
sql = """UPDATE sessions SET
@ -475,7 +489,8 @@ class SessionDB:
billing_provider = COALESCE(billing_provider, ?),
billing_base_url = COALESCE(billing_base_url, ?),
billing_mode = COALESCE(billing_mode, ?),
model = COALESCE(model, ?)
model = COALESCE(model, ?),
api_call_count = COALESCE(api_call_count, 0) + ?
WHERE id = ?"""
params = (
input_tokens,
@ -493,6 +508,7 @@ class SessionDB:
billing_base_url,
billing_mode,
model,
api_call_count,
session_id,
)
def _do(conn):

View file

@ -8917,6 +8917,7 @@ class AIAgent:
billing_mode="subscription_included"
if cost_result.status == "included" else None,
model=self.model,
api_call_count=1,
)
except Exception:
pass # never block the agent loop

View file

@ -694,6 +694,8 @@ class TestNewEndpoints:
assert "totals" in data
assert isinstance(data["daily"], list)
assert "total_sessions" in data["totals"]
assert "total_cache_write" in data["totals"]
assert "total_api_calls" in data["totals"]
def test_session_token_endpoint_removed(self):
"""GET /api/auth/session-token no longer exists."""

View file

@ -62,6 +62,27 @@ class TestSessionLifecycle:
assert session["input_tokens"] == 300
assert session["output_tokens"] == 150
def test_update_token_counts_tracks_api_call_count(self, db):
"""api_call_count increments with each update_token_counts call."""
db.create_session(session_id="s1", source="cli")
db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
session = db.get_session("s1")
assert session["api_call_count"] == 3
def test_update_token_counts_api_call_count_absolute(self, db):
"""absolute mode sets api_call_count directly."""
db.create_session(session_id="s1", source="cli")
db.update_token_counts("s1", input_tokens=100, output_tokens=50, api_call_count=1)
db.update_token_counts("s1", input_tokens=300, output_tokens=150,
api_call_count=5, absolute=True)
session = db.get_session("s1")
assert session["api_call_count"] == 5
assert session["input_tokens"] == 300
def test_update_token_counts_backfills_model_when_null(self, db):
db.create_session(session_id="s1", source="telegram")
db.update_token_counts("s1", input_tokens=10, output_tokens=5, model="openai/gpt-5.4")
@ -935,7 +956,7 @@ class TestSchemaInit:
def test_schema_version(self, db):
cursor = db._conn.execute("SELECT version FROM schema_version")
version = cursor.fetchone()[0]
assert version == 6
assert version == 7
def test_title_column_exists(self, db):
"""Verify the title column was created in the sessions table."""
@ -996,13 +1017,19 @@ class TestSchemaInit:
# Verify migration
cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
assert cursor.fetchone()[0] == 6
assert cursor.fetchone()[0] == 7
# Verify title column exists and is NULL for existing sessions
session = migrated_db.get_session("existing")
assert session is not None
assert session["title"] is None
# Verify api_call_count column was added with default 0
cursor = migrated_db._conn.execute(
"SELECT api_call_count FROM sessions WHERE id = 'existing'"
)
assert cursor.fetchone()[0] == 0
# Verify we can set title on migrated session
assert migrated_db.set_session_title("existing", "Migrated Title") is True
session = migrated_db.get_session("existing")

View file

@ -112,11 +112,14 @@ export const en: Translations = {
totalTokens: "Total Tokens",
totalSessions: "Total Sessions",
apiCalls: "API Calls",
cacheHitRate: "Cache Hit Rate",
dailyTokenUsage: "Daily Token Usage",
dailyBreakdown: "Daily Breakdown",
perModelBreakdown: "Per-Model Breakdown",
prompt: "Prompt",
input: "Input",
output: "Output",
cached: "cached",
total: "Total",
noUsageData: "No usage data for this period",
startSession: "Start a session to see analytics here",
@ -125,7 +128,6 @@ export const en: Translations = {
tokens: "Tokens",
perDayAvg: "/day avg",
acrossModels: "across {count} models",
inOut: "{input} in / {output} out",
},
logs: {

View file

@ -117,11 +117,14 @@ export interface Translations {
totalTokens: string;
totalSessions: string;
apiCalls: string;
cacheHitRate: string;
dailyTokenUsage: string;
dailyBreakdown: string;
perModelBreakdown: string;
prompt: string;
input: string;
output: string;
cached: string;
total: string;
noUsageData: string;
startSession: string;
@ -130,7 +133,6 @@ export interface Translations {
tokens: string;
perDayAvg: string;
acrossModels: string;
inOut: string;
};
// ── Logs page ──

View file

@ -112,11 +112,14 @@ export const zh: Translations = {
totalTokens: "总 Token 数",
totalSessions: "总会话数",
apiCalls: "API 调用",
cacheHitRate: "缓存命中率",
dailyTokenUsage: "每日 Token 用量",
dailyBreakdown: "每日明细",
perModelBreakdown: "模型用量明细",
prompt: "提示",
input: "输入",
output: "输出",
cached: "已缓存",
total: "总计",
noUsageData: "该时间段暂无使用数据",
startSession: "开始会话后将在此显示分析数据",
@ -125,7 +128,6 @@ export const zh: Translations = {
tokens: "Token",
perDayAvg: "/天 平均",
acrossModels: "共 {count} 个模型",
inOut: "输入 {input} / 输出 {output}",
},
logs: {

View file

@ -269,18 +269,23 @@ export interface AnalyticsDailyEntry {
input_tokens: number;
output_tokens: number;
cache_read_tokens: number;
cache_write_tokens: number;
reasoning_tokens: number;
estimated_cost: number;
actual_cost: number;
sessions: number;
api_calls: number;
}
export interface AnalyticsModelEntry {
model: string;
input_tokens: number;
output_tokens: number;
cache_read_tokens: number;
cache_write_tokens: number;
estimated_cost: number;
sessions: number;
api_calls: number;
}
export interface AnalyticsResponse {
@ -290,10 +295,12 @@ export interface AnalyticsResponse {
total_input: number;
total_output: number;
total_cache_read: number;
total_cache_write: number;
total_reasoning: number;
total_estimated_cost: number;
total_actual_cost: number;
total_sessions: number;
total_api_calls: number;
};
}

View file

@ -4,6 +4,7 @@ import {
Cpu,
Hash,
TrendingUp,
Zap,
} from "lucide-react";
import { api } from "@/lib/api";
import type { AnalyticsResponse, AnalyticsDailyEntry, AnalyticsModelEntry } from "@/lib/api";
@ -19,6 +20,11 @@ const PERIODS = [
const CHART_HEIGHT_PX = 160;
/** Compute total prompt tokens (input + cache_read + cache_write). */
function getPromptTokens(d: { input_tokens: number; cache_read_tokens?: number; cache_write_tokens?: number }): number {
return d.input_tokens + (d.cache_read_tokens ?? 0) + (d.cache_write_tokens ?? 0);
}
function formatTokens(n: number): string {
if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
@ -63,7 +69,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
const { t } = useI18n();
if (daily.length === 0) return null;
const maxTokens = Math.max(...daily.map((d) => d.input_tokens + d.output_tokens), 1);
const maxTokens = Math.max(...daily.map((d) => getPromptTokens(d) + d.output_tokens), 1);
return (
<Card>
@ -75,7 +81,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
<div className="flex items-center gap-4 text-xs text-muted-foreground">
<div className="flex items-center gap-1.5">
<div className="h-2.5 w-2.5 bg-[#ffe6cb]" />
{t.analytics.input}
{t.analytics.prompt}
</div>
<div className="flex items-center gap-1.5">
<div className="h-2.5 w-2.5 bg-emerald-500" />
@ -86,8 +92,9 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
<CardContent>
<div className="flex items-end gap-[2px]" style={{ height: CHART_HEIGHT_PX }}>
{daily.map((d) => {
const total = d.input_tokens + d.output_tokens;
const inputH = Math.round((d.input_tokens / maxTokens) * CHART_HEIGHT_PX);
const promptTokens = getPromptTokens(d);
const total = promptTokens + d.output_tokens;
const inputH = Math.round((promptTokens / maxTokens) * CHART_HEIGHT_PX);
const outputH = Math.round((d.output_tokens / maxTokens) * CHART_HEIGHT_PX);
return (
<div
@ -99,7 +106,7 @@ function TokenBarChart({ daily }: { daily: AnalyticsDailyEntry[] }) {
<div className="absolute bottom-full left-1/2 -translate-x-1/2 mb-2 hidden group-hover:block z-10 pointer-events-none">
<div className="bg-card border border-border px-2.5 py-1.5 text-[10px] text-foreground shadow-lg whitespace-nowrap">
<div className="font-medium">{formatDate(d.day)}</div>
<div>{t.analytics.input}: {formatTokens(d.input_tokens)}</div>
<div>{t.analytics.prompt}: {formatTokens(promptTokens)}</div>
<div>{t.analytics.output}: {formatTokens(d.output_tokens)}</div>
<div>{t.analytics.total}: {formatTokens(total)}</div>
</div>
@ -152,18 +159,19 @@ function DailyTable({ daily }: { daily: AnalyticsDailyEntry[] }) {
<tr className="border-b border-border text-muted-foreground text-xs">
<th className="text-left py-2 pr-4 font-medium">{t.analytics.date}</th>
<th className="text-right py-2 px-4 font-medium">{t.sessions.title}</th>
<th className="text-right py-2 px-4 font-medium">{t.analytics.input}</th>
<th className="text-right py-2 px-4 font-medium">{t.analytics.prompt}</th>
<th className="text-right py-2 pl-4 font-medium">{t.analytics.output}</th>
</tr>
</thead>
<tbody>
{sorted.map((d) => {
const promptTokens = getPromptTokens(d);
return (
<tr key={d.day} className="border-b border-border/50 hover:bg-secondary/20 transition-colors">
<td className="py-2 pr-4 font-medium">{formatDate(d.day)}</td>
<td className="text-right py-2 px-4 text-muted-foreground">{d.sessions}</td>
<td className="text-right py-2 px-4">
<span className="text-[#ffe6cb]">{formatTokens(d.input_tokens)}</span>
<span className="text-[#ffe6cb]">{formatTokens(promptTokens)}</span>
</td>
<td className="text-right py-2 pl-4">
<span className="text-emerald-400">{formatTokens(d.output_tokens)}</span>
@ -184,7 +192,7 @@ function ModelTable({ models }: { models: AnalyticsModelEntry[] }) {
if (models.length === 0) return null;
const sorted = [...models].sort(
(a, b) => b.input_tokens + b.output_tokens - (a.input_tokens + a.output_tokens),
(a, b) => (getPromptTokens(b) + b.output_tokens) - (getPromptTokens(a) + a.output_tokens),
);
return (
@ -213,7 +221,7 @@ function ModelTable({ models }: { models: AnalyticsModelEntry[] }) {
</td>
<td className="text-right py-2 px-4 text-muted-foreground">{m.sessions}</td>
<td className="text-right py-2 pl-4">
<span className="text-[#ffe6cb]">{formatTokens(m.input_tokens)}</span>
<span className="text-[#ffe6cb]">{formatTokens(getPromptTokens(m))}</span>
{" / "}
<span className="text-emerald-400">{formatTokens(m.output_tokens)}</span>
</td>
@ -283,12 +291,17 @@ export default function AnalyticsPage() {
{data && (
<>
{/* Summary cards */}
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-3">
<div className="grid gap-4 sm:grid-cols-2 lg:grid-cols-4">
<SummaryCard
icon={Hash}
label={t.analytics.totalTokens}
value={formatTokens(data.totals.total_input + data.totals.total_output)}
sub={t.analytics.inOut.replace("{input}", formatTokens(data.totals.total_input)).replace("{output}", formatTokens(data.totals.total_output))}
value={formatTokens(
(data.totals.total_input ?? 0) +
(data.totals.total_cache_read ?? 0) +
(data.totals.total_cache_write ?? 0) +
(data.totals.total_output ?? 0)
)}
sub={`${formatTokens((data.totals.total_input ?? 0) + (data.totals.total_cache_read ?? 0) + (data.totals.total_cache_write ?? 0))} ${t.analytics.prompt} / ${formatTokens(data.totals.total_output ?? 0)} ${t.analytics.output.toLowerCase()}`}
/>
<SummaryCard
icon={BarChart3}
@ -297,11 +310,25 @@ export default function AnalyticsPage() {
sub={`~${(data.totals.total_sessions / days).toFixed(1)}${t.analytics.perDayAvg}`}
/>
<SummaryCard
icon={TrendingUp}
icon={Zap}
label={t.analytics.apiCalls}
value={String(data.daily.reduce((sum, d) => sum + d.sessions, 0))}
value={String(data.totals.total_api_calls ?? data.daily.reduce((sum, d) => sum + d.sessions, 0))}
sub={t.analytics.acrossModels.replace("{count}", String(data.by_model.length))}
/>
{(() => {
const promptSent = (data.totals.total_input ?? 0) + (data.totals.total_cache_read ?? 0);
const rate = promptSent > 0
? `${((data.totals.total_cache_read ?? 0) / promptSent * 100).toFixed(0)}%`
: "—";
return (
<SummaryCard
icon={TrendingUp}
label={t.analytics.cacheHitRate}
value={rate}
sub={`${formatTokens(data.totals.total_cache_read ?? 0)} ${t.analytics.cached}`}
/>
);
})()}
</div>
{/* Bar chart */}