diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 6fd772e84ca..5d4ecb5b619 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -971,6 +971,21 @@ DEFAULT_CONFIG = { # Web dashboard settings "dashboard": { "theme": "default", # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose" + # Hide the token/cost analytics surfaces (Analytics page, token bars and + # cost figures on the Models page) by default. The numbers shown there + # are a local debug estimate: they only count successful main-agent + # responses with a usable ``response.usage``, and silently exclude every + # auxiliary call (context compression, title generation, vision, + # session search, web extract, smart approval, MCP routing, plugin LLM + # access) plus provider-side retries, fallback attempts, and any call + # whose usage block didn't come back. Cache writes are also missing + # from the API response. On models with heavy auxiliary traffic + # (Kimi K2.6, MiniMax M2.7) the local total can be 10x-100x lower than + # the provider bill, which is worse than hiding the numbers entirely + # because they look precise enough to compare against the provider. + # Set this to True to re-enable the surfaces with the understanding + # that the numbers are a local lower-bound estimate, not billing. + "show_token_analytics": False, }, # Privacy settings diff --git a/web/src/App.tsx b/web/src/App.tsx index d7239c2ad11..71a97113c24 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -75,6 +75,7 @@ import { PluginPage, PluginSlot, usePlugins } from "@/plugins"; import type { PluginManifest } from "@/plugins"; import { useTheme } from "@/themes"; import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags"; +import { api } from "@/lib/api"; function RootRedirect() { return ; @@ -316,6 +317,21 @@ export default function App() { const isChatRoute = normalizedPath === "/chat"; const embeddedChat = isDashboardEmbeddedChatEnabled(); + // `dashboard.show_token_analytics` gates the Analytics nav item. The + // page itself remains reachable by URL (it renders an explanation when + // the flag is off — see AnalyticsPage), but hiding the nav entry avoids + // surfacing misleading token/cost numbers in the sidebar. Default off. + const [showTokenAnalytics, setShowTokenAnalytics] = useState(false); + useEffect(() => { + api + .getConfig() + .then((cfg) => { + const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown }; + setShowTokenAnalytics(dash.show_token_analytics === true); + }) + .catch(() => setShowTokenAnalytics(false)); + }, []); + // A plugin can replace the built-in /chat page via `tab.override: "/chat"` // in its manifest. When one does, `buildRoutes` already swaps the route // element for — but we also have to suppress the @@ -346,11 +362,12 @@ export default function App() { [embeddedChat], ); - const builtinNav = useMemo( - () => - embeddedChat ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST] : BUILTIN_NAV_REST, - [embeddedChat], - ); + const builtinNav = useMemo(() => { + const base = embeddedChat + ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST] + : BUILTIN_NAV_REST; + return showTokenAnalytics ? base : base.filter((n) => n.path !== "/analytics"); + }, [embeddedChat, showTokenAnalytics]); const sidebarNav = useMemo( () => partitionSidebarNav(builtinNav, manifests), diff --git a/web/src/pages/AnalyticsPage.tsx b/web/src/pages/AnalyticsPage.tsx index 57943eba6f2..4896e760636 100644 --- a/web/src/pages/AnalyticsPage.tsx +++ b/web/src/pages/AnalyticsPage.tsx @@ -397,10 +397,26 @@ export default function AnalyticsPage() { const [data, setData] = useState(null); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); + // Gated on `dashboard.show_token_analytics` (default off). When off the + // page renders an explanation card instead of fetching analytics — the + // local token counts exclude auxiliary calls and provider retries, so + // they diverge from provider billing in ways that mislead users. + const [showTokens, setShowTokens] = useState(null); const { t } = useI18n(); const { setAfterTitle, setEnd } = usePageHeader(); + useEffect(() => { + api + .getConfig() + .then((cfg) => { + const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown }; + setShowTokens(dash.show_token_analytics === true); + }) + .catch(() => setShowTokens(false)); + }, []); + const load = useCallback(() => { + if (!showTokens) return; setLoading(true); setError(null); api @@ -408,7 +424,7 @@ export default function AnalyticsPage() { .then(setData) .catch((err) => setError(String(err))) .finally(() => setLoading(false)); - }, [days]); + }, [days, showTokens]); useLayoutEffect(() => { const periodLabel = @@ -422,37 +438,39 @@ export default function AnalyticsPage() { , ); setEnd( -
-
- {PERIODS.map((p) => ( - - ))} + showTokens === false ? null : ( +
+
+ {PERIODS.map((p) => ( + + ))} +
+
- -
, + ), ); return () => { setAfterTitle(null); setEnd(null); }; - }, [days, loading, load, setAfterTitle, setEnd, t.common.refresh]); + }, [days, loading, load, setAfterTitle, setEnd, t.common.refresh, showTokens]); useEffect(() => { load(); @@ -461,13 +479,51 @@ export default function AnalyticsPage() { return (
- {loading && !data && ( + + {showTokens === false && ( + + +
+

+ Token analytics hidden +

+

+ The token, cost, and per-day analytics on this page are a + local debug estimate. They only count successful main-agent + responses with a usable usage{" "} + block, and silently exclude auxiliary calls (context + compression, title generation, vision, session search, web + extract, smart approvals, MCP routing, plugin LLM access) + plus provider-side retries and fallback attempts. Cache + writes are missing entirely. +

+

+ On models with heavy auxiliary traffic (Kimi K2.6, MiniMax + M2.7) the local total can be 10x–100x lower than what your + provider bills. Hiding these numbers is safer than letting + them look authoritative. +

+

+ Check your provider dashboard (OpenRouter, Anthropic, etc.) + for actual usage and billing. To re-enable the local debug + estimate anyway, set{" "} + + dashboard.show_token_analytics: true + {" "} + in Config. +

+
+
+
+ )} + + {showTokens && loading && !data && (
)} - {error && ( + {showTokens && error && (

{error}

@@ -475,7 +531,7 @@ export default function AnalyticsPage() {
)} - {data && ( + {showTokens && data && ( <>
diff --git a/web/src/pages/ModelsPage.tsx b/web/src/pages/ModelsPage.tsx index 01c239d7034..f09104d4241 100644 --- a/web/src/pages/ModelsPage.tsx +++ b/web/src/pages/ModelsPage.tsx @@ -310,12 +310,14 @@ function ModelCard({ main, aux, onAssigned, + showTokens, }: { entry: ModelsAnalyticsModelEntry; rank: number; main: { provider: string; model: string } | null; aux: AuxiliaryTaskAssignment[]; onAssigned(): void; + showTokens: boolean; }) { const { t } = useI18n(); const provider = entry.provider || modelVendor(entry.model); @@ -375,14 +377,27 @@ function ModelCard({
-
-
- {formatTokens(totalTokens)} + {showTokens ? ( +
+
+ {formatTokens(totalTokens)} +
+
+ {t.models.tokens} +
-
- {t.models.tokens} -
-
+ ) : ( + entry.sessions > 0 && ( +
+
+ {entry.sessions} +
+
+ {t.models.sessions} +
+
+ ) + )} - + {showTokens && ( + <> + -
-
-
{entry.sessions}
-
- {t.models.sessions} +
+
+
{entry.sessions}
+
+ {t.models.sessions} +
+
+
+
+ {formatTokens(entry.avg_tokens_per_session)} +
+
+ {t.models.avgPerSession} +
+
+
+
+ {entry.api_calls > 0 ? formatTokens(entry.api_calls) : "—"} +
+
+ {t.models.apiCalls} +
+
-
-
-
- {formatTokens(entry.avg_tokens_per_session)} -
-
- {t.models.avgPerSession} -
-
-
-
- {entry.api_calls > 0 ? formatTokens(entry.api_calls) : "—"} -
-
- {t.models.apiCalls} -
-
-
+ + )}
- {entry.estimated_cost > 0 && ( + {showTokens && entry.estimated_cost > 0 && ( {formatCost(entry.estimated_cost)} )} - {entry.tool_calls > 0 && ( + {showTokens && entry.tool_calls > 0 && ( {entry.tool_calls} {t.models.toolCalls} @@ -752,9 +771,26 @@ export default function ModelsPage() { const [loading, setLoading] = useState(true); const [error, setError] = useState(null); const [saveKey, setSaveKey] = useState(0); + // Gate the token/cost UI on `dashboard.show_token_analytics`. See + // hermes_cli/config.py for the rationale: the numbers exclude auxiliary + // calls and retries, so they're misleading next to provider billing. + const [showTokens, setShowTokens] = useState(false); const { t } = useI18n(); const { setAfterTitle, setEnd } = usePageHeader(); + useEffect(() => { + api + .getConfig() + .then((cfg) => { + const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown }; + setShowTokens(dash.show_token_analytics === true); + }) + .catch(() => { + // Default to hidden on any failure — safer than showing wrong numbers. + setShowTokens(false); + }); + }, []); + const load = useCallback(() => { setLoading(true); setError(null); @@ -842,35 +878,59 @@ export default function ModelsPage() { + {!showTokens && ( +

+ Token & cost analytics are hidden because the local counts + exclude auxiliary calls (compression, vision, web extract, + …) and provider retries, so they diverge from your provider + bill. Enable{" "} + dashboard.show_token_analytics{" "} + in Config to + show the local debug estimate anyway. +

+ )}
)} @@ -902,6 +962,7 @@ export default function ModelsPage() { main={aux?.main ?? null} aux={aux?.tasks ?? []} onAssigned={onAssigned} + showTokens={showTokens} /> ))}