feat(dashboard): hide token/cost analytics behind config flag (default off) (#25438)

The Analytics page and the token/cost surfaces on the Models page show
local debug estimates only. They count input+output (and a bar viz adds
cache_read+reasoning, missing cache_write entirely) from successful
main-agent responses that returned a usable usage block.

Excluded silently:
- All auxiliary calls — context compression, title generation, vision,
  session search, web extract, smart approvals, MCP routing, plugin LLM
  access (13 production call sites bypass update_token_counts)
- Provider-side retries, fallback attempts
- Any call whose usage block didn't come back
- cache_write_tokens (column exists in sessions table but not returned
  by /api/analytics/models)

Real-world impact: a user on Kimi K2.6 saw 150K local vs 27M on the
OpenRouter side over the same window. Precise-looking numbers next to
provider billing create false confidence and support load.

This change adds dashboard.show_token_analytics (default False) to gate:
- The Analytics nav item (hidden from sidebar when off)
- The Analytics page (renders an explanation card instead of charts)
- Token bars, totals, cost figures, avg/api_calls on the Models page

The Models page keeps capability metadata (context window, vision,
tools, reasoning), the use-as-main/aux menu, sessions count, and
last-used timestamps when the flag is off.

Set dashboard.show_token_analytics: true in config.yaml to opt back in
to the local debug estimate. Fixing the underlying accounting (issue
#23270) is a separate, larger workstream.

Refs: #23270, #21705
This commit is contained in:
Teknium 2026-05-13 22:20:25 -07:00 committed by GitHub
parent e90508103c
commit f7ad2f1115
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 249 additions and 100 deletions

View file

@ -971,6 +971,21 @@ DEFAULT_CONFIG = {
# Web dashboard settings # Web dashboard settings
"dashboard": { "dashboard": {
"theme": "default", # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose" "theme": "default", # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose"
# Hide the token/cost analytics surfaces (Analytics page, token bars and
# cost figures on the Models page) by default. The numbers shown there
# are a local debug estimate: they only count successful main-agent
# responses with a usable ``response.usage``, and silently exclude every
# auxiliary call (context compression, title generation, vision,
# session search, web extract, smart approval, MCP routing, plugin LLM
# access) plus provider-side retries, fallback attempts, and any call
# whose usage block didn't come back. Cache writes are also missing
# from the API response. On models with heavy auxiliary traffic
# (Kimi K2.6, MiniMax M2.7) the local total can be 10x-100x lower than
# the provider bill, which is worse than hiding the numbers entirely
# because they look precise enough to compare against the provider.
# Set this to True to re-enable the surfaces with the understanding
# that the numbers are a local lower-bound estimate, not billing.
"show_token_analytics": False,
}, },
# Privacy settings # Privacy settings

View file

@ -75,6 +75,7 @@ import { PluginPage, PluginSlot, usePlugins } from "@/plugins";
import type { PluginManifest } from "@/plugins"; import type { PluginManifest } from "@/plugins";
import { useTheme } from "@/themes"; import { useTheme } from "@/themes";
import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags"; import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags";
import { api } from "@/lib/api";
function RootRedirect() { function RootRedirect() {
return <Navigate to="/sessions" replace />; return <Navigate to="/sessions" replace />;
@ -316,6 +317,21 @@ export default function App() {
const isChatRoute = normalizedPath === "/chat"; const isChatRoute = normalizedPath === "/chat";
const embeddedChat = isDashboardEmbeddedChatEnabled(); const embeddedChat = isDashboardEmbeddedChatEnabled();
// `dashboard.show_token_analytics` gates the Analytics nav item. The
// page itself remains reachable by URL (it renders an explanation when
// the flag is off — see AnalyticsPage), but hiding the nav entry avoids
// surfacing misleading token/cost numbers in the sidebar. Default off.
const [showTokenAnalytics, setShowTokenAnalytics] = useState(false);
useEffect(() => {
api
.getConfig()
.then((cfg) => {
const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
setShowTokenAnalytics(dash.show_token_analytics === true);
})
.catch(() => setShowTokenAnalytics(false));
}, []);
// A plugin can replace the built-in /chat page via `tab.override: "/chat"` // A plugin can replace the built-in /chat page via `tab.override: "/chat"`
// in its manifest. When one does, `buildRoutes` already swaps the route // in its manifest. When one does, `buildRoutes` already swaps the route
// element for <PluginPage /> — but we also have to suppress the // element for <PluginPage /> — but we also have to suppress the
@ -346,11 +362,12 @@ export default function App() {
[embeddedChat], [embeddedChat],
); );
const builtinNav = useMemo( const builtinNav = useMemo(() => {
() => const base = embeddedChat
embeddedChat ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST] : BUILTIN_NAV_REST, ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST]
[embeddedChat], : BUILTIN_NAV_REST;
); return showTokenAnalytics ? base : base.filter((n) => n.path !== "/analytics");
}, [embeddedChat, showTokenAnalytics]);
const sidebarNav = useMemo( const sidebarNav = useMemo(
() => partitionSidebarNav(builtinNav, manifests), () => partitionSidebarNav(builtinNav, manifests),

View file

@ -397,10 +397,26 @@ export default function AnalyticsPage() {
const [data, setData] = useState<AnalyticsResponse | null>(null); const [data, setData] = useState<AnalyticsResponse | null>(null);
const [loading, setLoading] = useState(true); const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null); const [error, setError] = useState<string | null>(null);
// Gated on `dashboard.show_token_analytics` (default off). When off the
// page renders an explanation card instead of fetching analytics — the
// local token counts exclude auxiliary calls and provider retries, so
// they diverge from provider billing in ways that mislead users.
const [showTokens, setShowTokens] = useState<boolean | null>(null);
const { t } = useI18n(); const { t } = useI18n();
const { setAfterTitle, setEnd } = usePageHeader(); const { setAfterTitle, setEnd } = usePageHeader();
useEffect(() => {
api
.getConfig()
.then((cfg) => {
const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
setShowTokens(dash.show_token_analytics === true);
})
.catch(() => setShowTokens(false));
}, []);
const load = useCallback(() => { const load = useCallback(() => {
if (!showTokens) return;
setLoading(true); setLoading(true);
setError(null); setError(null);
api api
@ -408,7 +424,7 @@ export default function AnalyticsPage() {
.then(setData) .then(setData)
.catch((err) => setError(String(err))) .catch((err) => setError(String(err)))
.finally(() => setLoading(false)); .finally(() => setLoading(false));
}, [days]); }, [days, showTokens]);
useLayoutEffect(() => { useLayoutEffect(() => {
const periodLabel = const periodLabel =
@ -422,37 +438,39 @@ export default function AnalyticsPage() {
</span>, </span>,
); );
setEnd( setEnd(
<div className="flex w-full min-w-0 flex-wrap items-center justify-end gap-2 sm:gap-2"> showTokens === false ? null : (
<div className="flex flex-wrap items-center gap-1.5"> <div className="flex w-full min-w-0 flex-wrap items-center justify-end gap-2 sm:gap-2">
{PERIODS.map((p) => ( <div className="flex flex-wrap items-center gap-1.5">
<Button {PERIODS.map((p) => (
key={p.label} <Button
type="button" key={p.label}
size="sm" type="button"
outlined={days !== p.days} size="sm"
onClick={() => setDays(p.days)} outlined={days !== p.days}
> onClick={() => setDays(p.days)}
{p.label} >
</Button> {p.label}
))} </Button>
))}
</div>
<Button
type="button"
size="sm"
outlined
onClick={load}
disabled={loading}
prefix={loading ? <Spinner /> : <RefreshCw />}
>
{t.common.refresh}
</Button>
</div> </div>
<Button ),
type="button"
size="sm"
outlined
onClick={load}
disabled={loading}
prefix={loading ? <Spinner /> : <RefreshCw />}
>
{t.common.refresh}
</Button>
</div>,
); );
return () => { return () => {
setAfterTitle(null); setAfterTitle(null);
setEnd(null); setEnd(null);
}; };
}, [days, loading, load, setAfterTitle, setEnd, t.common.refresh]); }, [days, loading, load, setAfterTitle, setEnd, t.common.refresh, showTokens]);
useEffect(() => { useEffect(() => {
load(); load();
@ -461,13 +479,51 @@ export default function AnalyticsPage() {
return ( return (
<div className="flex flex-col gap-6"> <div className="flex flex-col gap-6">
<PluginSlot name="analytics:top" /> <PluginSlot name="analytics:top" />
{loading && !data && (
{showTokens === false && (
<Card>
<CardContent className="py-12">
<div className="mx-auto flex max-w-2xl flex-col gap-3 text-sm text-muted-foreground">
<h2 className="font-display text-base tracking-wider uppercase text-foreground">
Token analytics hidden
</h2>
<p>
The token, cost, and per-day analytics on this page are a
local debug estimate. They only count successful main-agent
responses with a usable <span className="font-mono">usage</span>{" "}
block, and silently exclude auxiliary calls (context
compression, title generation, vision, session search, web
extract, smart approvals, MCP routing, plugin LLM access)
plus provider-side retries and fallback attempts. Cache
writes are missing entirely.
</p>
<p>
On models with heavy auxiliary traffic (Kimi K2.6, MiniMax
M2.7) the local total can be 10x100x lower than what your
provider bills. Hiding these numbers is safer than letting
them look authoritative.
</p>
<p>
Check your provider dashboard (OpenRouter, Anthropic, etc.)
for actual usage and billing. To re-enable the local debug
estimate anyway, set{" "}
<span className="font-mono">
dashboard.show_token_analytics: true
</span>{" "}
in <a href="/config" className="underline">Config</a>.
</p>
</div>
</CardContent>
</Card>
)}
{showTokens && loading && !data && (
<div className="flex items-center justify-center py-24"> <div className="flex items-center justify-center py-24">
<Spinner className="text-2xl text-primary" /> <Spinner className="text-2xl text-primary" />
</div> </div>
)} )}
{error && ( {showTokens && error && (
<Card> <Card>
<CardContent className="py-6"> <CardContent className="py-6">
<p className="text-sm text-destructive text-center">{error}</p> <p className="text-sm text-destructive text-center">{error}</p>
@ -475,7 +531,7 @@ export default function AnalyticsPage() {
</Card> </Card>
)} )}
{data && ( {showTokens && data && (
<> <>
<div className="grid gap-6 lg:grid-cols-2"> <div className="grid gap-6 lg:grid-cols-2">
<Card> <Card>

View file

@ -310,12 +310,14 @@ function ModelCard({
main, main,
aux, aux,
onAssigned, onAssigned,
showTokens,
}: { }: {
entry: ModelsAnalyticsModelEntry; entry: ModelsAnalyticsModelEntry;
rank: number; rank: number;
main: { provider: string; model: string } | null; main: { provider: string; model: string } | null;
aux: AuxiliaryTaskAssignment[]; aux: AuxiliaryTaskAssignment[];
onAssigned(): void; onAssigned(): void;
showTokens: boolean;
}) { }) {
const { t } = useI18n(); const { t } = useI18n();
const provider = entry.provider || modelVendor(entry.model); const provider = entry.provider || modelVendor(entry.model);
@ -375,14 +377,27 @@ function ModelCard({
</div> </div>
</div> </div>
<div className="flex flex-col items-end gap-1 shrink-0"> <div className="flex flex-col items-end gap-1 shrink-0">
<div className="text-right"> {showTokens ? (
<div className="text-xs font-mono font-semibold"> <div className="text-right">
{formatTokens(totalTokens)} <div className="text-xs font-mono font-semibold">
{formatTokens(totalTokens)}
</div>
<div className="text-[10px] text-muted-foreground">
{t.models.tokens}
</div>
</div> </div>
<div className="text-[10px] text-muted-foreground"> ) : (
{t.models.tokens} entry.sessions > 0 && (
</div> <div className="text-right">
</div> <div className="text-xs font-mono font-semibold">
{entry.sessions}
</div>
<div className="text-[10px] text-muted-foreground">
{t.models.sessions}
</div>
</div>
)
)}
<UseAsMenu <UseAsMenu
provider={provider} provider={provider}
model={entry.model} model={entry.model}
@ -394,47 +409,51 @@ function ModelCard({
</div> </div>
</CardHeader> </CardHeader>
<CardContent className="space-y-3 pt-3"> <CardContent className="space-y-3 pt-3">
<TokenBar {showTokens && (
input={entry.input_tokens} <>
output={entry.output_tokens} <TokenBar
cacheRead={entry.cache_read_tokens} input={entry.input_tokens}
reasoning={entry.reasoning_tokens} output={entry.output_tokens}
/> cacheRead={entry.cache_read_tokens}
reasoning={entry.reasoning_tokens}
/>
<div className="grid grid-cols-3 gap-2 text-xs"> <div className="grid grid-cols-3 gap-2 text-xs">
<div className="text-center"> <div className="text-center">
<div className="font-mono font-semibold">{entry.sessions}</div> <div className="font-mono font-semibold">{entry.sessions}</div>
<div className="text-[10px] text-muted-foreground"> <div className="text-[10px] text-muted-foreground">
{t.models.sessions} {t.models.sessions}
</div>
</div>
<div className="text-center">
<div className="font-mono font-semibold">
{formatTokens(entry.avg_tokens_per_session)}
</div>
<div className="text-[10px] text-muted-foreground">
{t.models.avgPerSession}
</div>
</div>
<div className="text-center">
<div className="font-mono font-semibold">
{entry.api_calls > 0 ? formatTokens(entry.api_calls) : "—"}
</div>
<div className="text-[10px] text-muted-foreground">
{t.models.apiCalls}
</div>
</div>
</div> </div>
</div> </>
<div className="text-center"> )}
<div className="font-mono font-semibold">
{formatTokens(entry.avg_tokens_per_session)}
</div>
<div className="text-[10px] text-muted-foreground">
{t.models.avgPerSession}
</div>
</div>
<div className="text-center">
<div className="font-mono font-semibold">
{entry.api_calls > 0 ? formatTokens(entry.api_calls) : "—"}
</div>
<div className="text-[10px] text-muted-foreground">
{t.models.apiCalls}
</div>
</div>
</div>
<div className="flex items-center justify-between text-[10px] text-muted-foreground border-t border-border/30 pt-2"> <div className="flex items-center justify-between text-[10px] text-muted-foreground border-t border-border/30 pt-2">
<div className="flex items-center gap-3"> <div className="flex items-center gap-3">
{entry.estimated_cost > 0 && ( {showTokens && entry.estimated_cost > 0 && (
<span className="flex items-center gap-0.5"> <span className="flex items-center gap-0.5">
<DollarSign className="h-2.5 w-2.5" /> <DollarSign className="h-2.5 w-2.5" />
{formatCost(entry.estimated_cost)} {formatCost(entry.estimated_cost)}
</span> </span>
)} )}
{entry.tool_calls > 0 && ( {showTokens && entry.tool_calls > 0 && (
<span className="flex items-center gap-0.5"> <span className="flex items-center gap-0.5">
<Zap className="h-2.5 w-2.5" /> <Zap className="h-2.5 w-2.5" />
{entry.tool_calls} {t.models.toolCalls} {entry.tool_calls} {t.models.toolCalls}
@ -752,9 +771,26 @@ export default function ModelsPage() {
const [loading, setLoading] = useState(true); const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null); const [error, setError] = useState<string | null>(null);
const [saveKey, setSaveKey] = useState(0); const [saveKey, setSaveKey] = useState(0);
// Gate the token/cost UI on `dashboard.show_token_analytics`. See
// hermes_cli/config.py for the rationale: the numbers exclude auxiliary
// calls and retries, so they're misleading next to provider billing.
const [showTokens, setShowTokens] = useState(false);
const { t } = useI18n(); const { t } = useI18n();
const { setAfterTitle, setEnd } = usePageHeader(); const { setAfterTitle, setEnd } = usePageHeader();
useEffect(() => {
api
.getConfig()
.then((cfg) => {
const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
setShowTokens(dash.show_token_analytics === true);
})
.catch(() => {
// Default to hidden on any failure — safer than showing wrong numbers.
setShowTokens(false);
});
}, []);
const load = useCallback(() => { const load = useCallback(() => {
setLoading(true); setLoading(true);
setError(null); setError(null);
@ -842,35 +878,59 @@ export default function ModelsPage() {
<Card> <Card>
<CardContent className="py-6"> <CardContent className="py-6">
<Stats <Stats
items={[ items={
{ showTokens
label: t.models.modelsUsed, ? [
value: String(data.totals.distinct_models), {
}, label: t.models.modelsUsed,
{ value: String(data.totals.distinct_models),
label: t.analytics.totalTokens, },
value: formatTokens( {
data.totals.total_input + data.totals.total_output, label: t.analytics.totalTokens,
), value: formatTokens(
}, data.totals.total_input + data.totals.total_output,
{ ),
label: t.analytics.input, },
value: formatTokens(data.totals.total_input), {
}, label: t.analytics.input,
{ value: formatTokens(data.totals.total_input),
label: t.analytics.output, },
value: formatTokens(data.totals.total_output), {
}, label: t.analytics.output,
{ value: formatTokens(data.totals.total_output),
label: t.models.estimatedCost, },
value: formatCost(data.totals.total_estimated_cost), {
}, label: t.models.estimatedCost,
{ value: formatCost(data.totals.total_estimated_cost),
label: t.analytics.totalSessions, },
value: String(data.totals.total_sessions), {
}, label: t.analytics.totalSessions,
]} value: String(data.totals.total_sessions),
},
]
: [
{
label: t.models.modelsUsed,
value: String(data.totals.distinct_models),
},
{
label: t.analytics.totalSessions,
value: String(data.totals.total_sessions),
},
]
}
/> />
{!showTokens && (
<p className="mt-4 text-[10px] text-muted-foreground/70 leading-relaxed">
Token & cost analytics are hidden because the local counts
exclude auxiliary calls (compression, vision, web extract,
) and provider retries, so they diverge from your provider
bill. Enable{" "}
<span className="font-mono">dashboard.show_token_analytics</span>{" "}
in <a href="/config" className="underline">Config</a> to
show the local debug estimate anyway.
</p>
)}
</CardContent> </CardContent>
</Card> </Card>
)} )}
@ -902,6 +962,7 @@ export default function ModelsPage() {
main={aux?.main ?? null} main={aux?.main ?? null}
aux={aux?.tasks ?? []} aux={aux?.tasks ?? []}
onAssigned={onAssigned} onAssigned={onAssigned}
showTokens={showTokens}
/> />
))} ))}
</div> </div>