From f7ad2f1115eb370798abe1aca4802d96fe889795 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 13 May 2026 22:20:25 -0700
Subject: [PATCH] feat(dashboard): hide token/cost analytics behind config flag
(default off) (#25438)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The Analytics page and the token/cost surfaces on the Models page show
local debug estimates only. They count input+output (and a bar viz adds
cache_read+reasoning, missing cache_write entirely) from successful
main-agent responses that returned a usable usage block.
Excluded silently:
- All auxiliary calls — context compression, title generation, vision,
session search, web extract, smart approvals, MCP routing, plugin LLM
access (13 production call sites bypass update_token_counts)
- Provider-side retries, fallback attempts
- Any call whose usage block didn't come back
- cache_write_tokens (column exists in sessions table but not returned
by /api/analytics/models)
Real-world impact: a user on Kimi K2.6 saw 150K local vs 27M on the
OpenRouter side over the same window. Precise-looking numbers next to
provider billing create false confidence and support load.
This change adds dashboard.show_token_analytics (default False) to gate:
- The Analytics nav item (hidden from sidebar when off)
- The Analytics page (renders an explanation card instead of charts)
- Token bars, totals, cost figures, avg/api_calls on the Models page
The Models page keeps capability metadata (context window, vision,
tools, reasoning), the use-as-main/aux menu, sessions count, and
last-used timestamps when the flag is off.
Set dashboard.show_token_analytics: true in config.yaml to opt back in
to the local debug estimate. Fixing the underlying accounting (issue
#23270) is a separate, larger workstream.
Refs: #23270, #21705
---
hermes_cli/config.py | 15 +++
web/src/App.tsx | 27 ++++-
web/src/pages/AnalyticsPage.tsx | 114 ++++++++++++++-----
web/src/pages/ModelsPage.tsx | 193 +++++++++++++++++++++-----------
4 files changed, 249 insertions(+), 100 deletions(-)
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 6fd772e84ca..5d4ecb5b619 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -971,6 +971,21 @@ DEFAULT_CONFIG = {
# Web dashboard settings
"dashboard": {
"theme": "default", # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose"
+ # Hide the token/cost analytics surfaces (Analytics page, token bars and
+ # cost figures on the Models page) by default. The numbers shown there
+ # are a local debug estimate: they only count successful main-agent
+ # responses with a usable ``response.usage``, and silently exclude every
+ # auxiliary call (context compression, title generation, vision,
+ # session search, web extract, smart approval, MCP routing, plugin LLM
+ # access) plus provider-side retries, fallback attempts, and any call
+ # whose usage block didn't come back. Cache writes are also missing
+ # from the API response. On models with heavy auxiliary traffic
+ # (Kimi K2.6, MiniMax M2.7) the local total can be 10x-100x lower than
+ # the provider bill, which is worse than hiding the numbers entirely
+ # because they look precise enough to compare against the provider.
+ # Set this to True to re-enable the surfaces with the understanding
+ # that the numbers are a local lower-bound estimate, not billing.
+ "show_token_analytics": False,
},
# Privacy settings
diff --git a/web/src/App.tsx b/web/src/App.tsx
index d7239c2ad11..71a97113c24 100644
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@@ -75,6 +75,7 @@ import { PluginPage, PluginSlot, usePlugins } from "@/plugins";
import type { PluginManifest } from "@/plugins";
import { useTheme } from "@/themes";
import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags";
+import { api } from "@/lib/api";
function RootRedirect() {
return ;
@@ -316,6 +317,21 @@ export default function App() {
const isChatRoute = normalizedPath === "/chat";
const embeddedChat = isDashboardEmbeddedChatEnabled();
+ // `dashboard.show_token_analytics` gates the Analytics nav item. The
+ // page itself remains reachable by URL (it renders an explanation when
+ // the flag is off — see AnalyticsPage), but hiding the nav entry avoids
+ // surfacing misleading token/cost numbers in the sidebar. Default off.
+ const [showTokenAnalytics, setShowTokenAnalytics] = useState(false);
+ useEffect(() => {
+ api
+ .getConfig()
+ .then((cfg) => {
+ const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
+ setShowTokenAnalytics(dash.show_token_analytics === true);
+ })
+ .catch(() => setShowTokenAnalytics(false));
+ }, []);
+
// A plugin can replace the built-in /chat page via `tab.override: "/chat"`
// in its manifest. When one does, `buildRoutes` already swaps the route
// element for — but we also have to suppress the
@@ -346,11 +362,12 @@ export default function App() {
[embeddedChat],
);
- const builtinNav = useMemo(
- () =>
- embeddedChat ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST] : BUILTIN_NAV_REST,
- [embeddedChat],
- );
+ const builtinNav = useMemo(() => {
+ const base = embeddedChat
+ ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST]
+ : BUILTIN_NAV_REST;
+ return showTokenAnalytics ? base : base.filter((n) => n.path !== "/analytics");
+ }, [embeddedChat, showTokenAnalytics]);
const sidebarNav = useMemo(
() => partitionSidebarNav(builtinNav, manifests),
diff --git a/web/src/pages/AnalyticsPage.tsx b/web/src/pages/AnalyticsPage.tsx
index 57943eba6f2..4896e760636 100644
--- a/web/src/pages/AnalyticsPage.tsx
+++ b/web/src/pages/AnalyticsPage.tsx
@@ -397,10 +397,26 @@ export default function AnalyticsPage() {
const [data, setData] = useState(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState(null);
+ // Gated on `dashboard.show_token_analytics` (default off). When off the
+ // page renders an explanation card instead of fetching analytics — the
+ // local token counts exclude auxiliary calls and provider retries, so
+ // they diverge from provider billing in ways that mislead users.
+ const [showTokens, setShowTokens] = useState(null);
const { t } = useI18n();
const { setAfterTitle, setEnd } = usePageHeader();
+ useEffect(() => {
+ api
+ .getConfig()
+ .then((cfg) => {
+ const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
+ setShowTokens(dash.show_token_analytics === true);
+ })
+ .catch(() => setShowTokens(false));
+ }, []);
+
const load = useCallback(() => {
+ if (!showTokens) return;
setLoading(true);
setError(null);
api
@@ -408,7 +424,7 @@ export default function AnalyticsPage() {
.then(setData)
.catch((err) => setError(String(err)))
.finally(() => setLoading(false));
- }, [days]);
+ }, [days, showTokens]);
useLayoutEffect(() => {
const periodLabel =
@@ -422,37 +438,39 @@ export default function AnalyticsPage() {
,
);
setEnd(
-
+ The token, cost, and per-day analytics on this page are a
+ local debug estimate. They only count successful main-agent
+ responses with a usable usage{" "}
+ block, and silently exclude auxiliary calls (context
+ compression, title generation, vision, session search, web
+ extract, smart approvals, MCP routing, plugin LLM access)
+ plus provider-side retries and fallback attempts. Cache
+ writes are missing entirely.
+
+
+ On models with heavy auxiliary traffic (Kimi K2.6, MiniMax
+ M2.7) the local total can be 10x–100x lower than what your
+ provider bills. Hiding these numbers is safer than letting
+ them look authoritative.
+
+
+ Check your provider dashboard (OpenRouter, Anthropic, etc.)
+ for actual usage and billing. To re-enable the local debug
+ estimate anyway, set{" "}
+
+ dashboard.show_token_analytics: true
+ {" "}
+ in Config.
+
- {entry.estimated_cost > 0 && (
+ {showTokens && entry.estimated_cost > 0 && (
{formatCost(entry.estimated_cost)}
)}
- {entry.tool_calls > 0 && (
+ {showTokens && entry.tool_calls > 0 && (
{entry.tool_calls} {t.models.toolCalls}
@@ -752,9 +771,26 @@ export default function ModelsPage() {
const [loading, setLoading] = useState(true);
const [error, setError] = useState(null);
const [saveKey, setSaveKey] = useState(0);
+ // Gate the token/cost UI on `dashboard.show_token_analytics`. See
+ // hermes_cli/config.py for the rationale: the numbers exclude auxiliary
+ // calls and retries, so they're misleading next to provider billing.
+ const [showTokens, setShowTokens] = useState(false);
const { t } = useI18n();
const { setAfterTitle, setEnd } = usePageHeader();
+ useEffect(() => {
+ api
+ .getConfig()
+ .then((cfg) => {
+ const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
+ setShowTokens(dash.show_token_analytics === true);
+ })
+ .catch(() => {
+ // Default to hidden on any failure — safer than showing wrong numbers.
+ setShowTokens(false);
+ });
+ }, []);
+
const load = useCallback(() => {
setLoading(true);
setError(null);
@@ -842,35 +878,59 @@ export default function ModelsPage() {
+ {!showTokens && (
+
+ Token & cost analytics are hidden because the local counts
+ exclude auxiliary calls (compression, vision, web extract,
+ …) and provider retries, so they diverge from your provider
+ bill. Enable{" "}
+ dashboard.show_token_analytics{" "}
+ in Config to
+ show the local debug estimate anyway.
+