mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-27 01:11:40 +00:00
fix(dashboard): include cache tokens in totals, track real API call count
The analytics dashboard had three accuracy issues: 1. TOTAL TOKENS excluded cache_read and cache_write tokens — only counted the non-cached input portion. With 90%+ cache hit rates typical in Hermes, this dramatically undercounted actual token usage (e.g. showing 9.1M when the real total was 169M+). 2. The 'API Calls' card displayed session count (COUNT(*) from sessions table), not actual LLM API requests. A single session makes 10-90 API calls through the tool loop, so this was ~30x lower than reality. 3. cache_write_tokens was stored in the DB but never exposed through the analytics API endpoint or frontend. Changes: - Add api_call_count column to sessions table (schema v7 migration) - Persist api_call_count=1 per LLM API call in run_agent.py - Analytics SQL queries now include cache_write_tokens and api_call_count in daily, by_model, and totals aggregations - Frontend TOTAL TOKENS card now shows input + cache_read + cache_write + output (the full prompt total + output) - API CALLS card now uses real api_call_count from DB - New Cache Hit Rate card shows cache efficiency percentage - Bar chart, tooltips, daily table, model table all use prompt totals (input + cache_read + cache_write) instead of just input - Labels changed from 'Input' to 'Prompt' to reflect the full prompt total - TypeScript interfaces and i18n strings updated (en + zh)
This commit is contained in:
parent
da8bab77fb
commit
42aeb4ecac
10 changed files with 121 additions and 27 deletions
|
|
@ -1977,7 +1977,8 @@ async def update_config_raw(body: RawConfigUpdate):
|
|||
@app.get("/api/analytics/usage")
|
||||
async def get_usage_analytics(days: int = 30):
|
||||
from hermes_state import SessionDB
|
||||
db = SessionDB()
|
||||
from hermes_constants import get_hermes_home
|
||||
db = SessionDB(db_path=get_hermes_home() / "state.db")
|
||||
try:
|
||||
cutoff = time.time() - (days * 86400)
|
||||
cur = db._conn.execute("""
|
||||
|
|
@ -1985,10 +1986,12 @@ async def get_usage_analytics(days: int = 30):
|
|||
SUM(input_tokens) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens,
|
||||
SUM(cache_read_tokens) as cache_read_tokens,
|
||||
SUM(cache_write_tokens) as cache_write_tokens,
|
||||
SUM(reasoning_tokens) as reasoning_tokens,
|
||||
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
|
||||
COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
|
||||
COUNT(*) as sessions
|
||||
COUNT(*) as sessions,
|
||||
SUM(COALESCE(api_call_count, 0)) as api_calls
|
||||
FROM sessions WHERE started_at > ?
|
||||
GROUP BY day ORDER BY day
|
||||
""", (cutoff,))
|
||||
|
|
@ -1998,10 +2001,13 @@ async def get_usage_analytics(days: int = 30):
|
|||
SELECT model,
|
||||
SUM(input_tokens) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens,
|
||||
SUM(cache_read_tokens) as cache_read_tokens,
|
||||
SUM(cache_write_tokens) as cache_write_tokens,
|
||||
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
|
||||
COUNT(*) as sessions
|
||||
COUNT(*) as sessions,
|
||||
SUM(COALESCE(api_call_count, 0)) as api_calls
|
||||
FROM sessions WHERE started_at > ? AND model IS NOT NULL
|
||||
GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
|
||||
GROUP BY model ORDER BY SUM(input_tokens) + SUM(cache_read_tokens) + SUM(cache_write_tokens) + SUM(output_tokens) DESC
|
||||
""", (cutoff,))
|
||||
by_model = [dict(r) for r in cur2.fetchall()]
|
||||
|
||||
|
|
@ -2009,10 +2015,12 @@ async def get_usage_analytics(days: int = 30):
|
|||
SELECT SUM(input_tokens) as total_input,
|
||||
SUM(output_tokens) as total_output,
|
||||
SUM(cache_read_tokens) as total_cache_read,
|
||||
SUM(cache_write_tokens) as total_cache_write,
|
||||
SUM(reasoning_tokens) as total_reasoning,
|
||||
COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
|
||||
COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
|
||||
COUNT(*) as total_sessions
|
||||
COUNT(*) as total_sessions,
|
||||
SUM(COALESCE(api_call_count, 0)) as total_api_calls
|
||||
FROM sessions WHERE started_at > ?
|
||||
""", (cutoff,))
|
||||
totals = dict(cur3.fetchone())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue