From f7ad2f1115eb370798abe1aca4802d96fe889795 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 13 May 2026 22:20:25 -0700
Subject: [PATCH] feat(dashboard): hide token/cost analytics behind config flag
 (default off) (#25438)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Analytics page and the token/cost surfaces on the Models page show
local debug estimates only. They count input+output (and a bar viz adds
cache_read+reasoning, missing cache_write entirely) from successful
main-agent responses that returned a usable usage block.

Excluded silently:
- All auxiliary calls — context compression, title generation, vision,
  session search, web extract, smart approvals, MCP routing, plugin LLM
  access (13 production call sites bypass update_token_counts)
- Provider-side retries, fallback attempts
- Any call whose usage block didn't come back
- cache_write_tokens (column exists in sessions table but not returned
  by /api/analytics/models)

Real-world impact: a user on Kimi K2.6 saw 150K local vs 27M on the
OpenRouter side over the same window. Precise-looking numbers next to
provider billing create false confidence and support load.

This change adds dashboard.show_token_analytics (default False) to gate:
- The Analytics nav item (hidden from sidebar when off)
- The Analytics page (renders an explanation card instead of charts)
- Token bars, totals, cost figures, avg/api_calls on the Models page

The Models page keeps capability metadata (context window, vision,
tools, reasoning), the use-as-main/aux menu, sessions count, and
last-used timestamps when the flag is off.

Set dashboard.show_token_analytics: true in config.yaml to opt back in
to the local debug estimate. Fixing the underlying accounting (issue
#23270) is a separate, larger workstream.

Refs: #23270, #21705
---
 hermes_cli/config.py            |  15 +++
 web/src/App.tsx                 |  27 ++++-
 web/src/pages/AnalyticsPage.tsx | 114 ++++++++++++++-----
 web/src/pages/ModelsPage.tsx    | 193 +++++++++++++++++++++-----------
 4 files changed, 249 insertions(+), 100 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 6fd772e84ca..5d4ecb5b619 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -971,6 +971,21 @@ DEFAULT_CONFIG = {
     # Web dashboard settings
     "dashboard": {
         "theme": "default",  # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose"
+        # Hide the token/cost analytics surfaces (Analytics page, token bars and
+        # cost figures on the Models page) by default.  The numbers shown there
+        # are a local debug estimate: they only count successful main-agent
+        # responses with a usable ``response.usage``, and silently exclude every
+        # auxiliary call (context compression, title generation, vision,
+        # session search, web extract, smart approval, MCP routing, plugin LLM
+        # access) plus provider-side retries, fallback attempts, and any call
+        # whose usage block didn't come back.  Cache writes are also missing
+        # from the API response.  On models with heavy auxiliary traffic
+        # (Kimi K2.6, MiniMax M2.7) the local total can be 10x-100x lower than
+        # the provider bill, which is worse than hiding the numbers entirely
+        # because they look precise enough to compare against the provider.
+        # Set this to True to re-enable the surfaces with the understanding
+        # that the numbers are a local lower-bound estimate, not billing.
+        "show_token_analytics": False,
     },
 
     # Privacy settings
diff --git a/web/src/App.tsx b/web/src/App.tsx
index d7239c2ad11..71a97113c24 100644
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@@ -75,6 +75,7 @@ import { PluginPage, PluginSlot, usePlugins } from "@/plugins";
 import type { PluginManifest } from "@/plugins";
 import { useTheme } from "@/themes";
 import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags";
+import { api } from "@/lib/api";
 
 function RootRedirect() {
   return <Navigate to="/sessions" replace />;
@@ -316,6 +317,21 @@ export default function App() {
   const isChatRoute = normalizedPath === "/chat";
   const embeddedChat = isDashboardEmbeddedChatEnabled();
 
+  // `dashboard.show_token_analytics` gates the Analytics nav item.  The
+  // page itself remains reachable by URL (it renders an explanation when
+  // the flag is off — see AnalyticsPage), but hiding the nav entry avoids
+  // surfacing misleading token/cost numbers in the sidebar.  Default off.
+  const [showTokenAnalytics, setShowTokenAnalytics] = useState(false);
+  useEffect(() => {
+    api
+      .getConfig()
+      .then((cfg) => {
+        const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
+        setShowTokenAnalytics(dash.show_token_analytics === true);
+      })
+      .catch(() => setShowTokenAnalytics(false));
+  }, []);
+
   // A plugin can replace the built-in /chat page via `tab.override: "/chat"`
   // in its manifest.  When one does, `buildRoutes` already swaps the route
   // element for <PluginPage /> — but we also have to suppress the
@@ -346,11 +362,12 @@ export default function App() {
     [embeddedChat],
   );
 
-  const builtinNav = useMemo(
-    () =>
-      embeddedChat ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST] : BUILTIN_NAV_REST,
-    [embeddedChat],
-  );
+  const builtinNav = useMemo(() => {
+    const base = embeddedChat
+      ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST]
+      : BUILTIN_NAV_REST;
+    return showTokenAnalytics ? base : base.filter((n) => n.path !== "/analytics");
+  }, [embeddedChat, showTokenAnalytics]);
 
   const sidebarNav = useMemo(
     () => partitionSidebarNav(builtinNav, manifests),
diff --git a/web/src/pages/AnalyticsPage.tsx b/web/src/pages/AnalyticsPage.tsx
index 57943eba6f2..4896e760636 100644
--- a/web/src/pages/AnalyticsPage.tsx
+++ b/web/src/pages/AnalyticsPage.tsx
@@ -397,10 +397,26 @@ export default function AnalyticsPage() {
   const [data, setData] = useState<AnalyticsResponse | null>(null);
   const [loading, setLoading] = useState(true);
   const [error, setError] = useState<string | null>(null);
+  // Gated on `dashboard.show_token_analytics` (default off).  When off the
+  // page renders an explanation card instead of fetching analytics — the
+  // local token counts exclude auxiliary calls and provider retries, so
+  // they diverge from provider billing in ways that mislead users.
+  const [showTokens, setShowTokens] = useState<boolean | null>(null);
   const { t } = useI18n();
   const { setAfterTitle, setEnd } = usePageHeader();
 
+  useEffect(() => {
+    api
+      .getConfig()
+      .then((cfg) => {
+        const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
+        setShowTokens(dash.show_token_analytics === true);
+      })
+      .catch(() => setShowTokens(false));
+  }, []);
+
   const load = useCallback(() => {
+    if (!showTokens) return;
     setLoading(true);
     setError(null);
     api
@@ -408,7 +424,7 @@ export default function AnalyticsPage() {
       .then(setData)
       .catch((err) => setError(String(err)))
       .finally(() => setLoading(false));
-  }, [days]);
+  }, [days, showTokens]);
 
   useLayoutEffect(() => {
     const periodLabel =
@@ -422,37 +438,39 @@ export default function AnalyticsPage() {
       </span>,
     );
     setEnd(
-      <div className="flex w-full min-w-0 flex-wrap items-center justify-end gap-2 sm:gap-2">
-        <div className="flex flex-wrap items-center gap-1.5">
-          {PERIODS.map((p) => (
-            <Button
-              key={p.label}
-              type="button"
-              size="sm"
-              outlined={days !== p.days}
-              onClick={() => setDays(p.days)}
-            >
-              {p.label}
-            </Button>
-          ))}
+      showTokens === false ? null : (
+        <div className="flex w-full min-w-0 flex-wrap items-center justify-end gap-2 sm:gap-2">
+          <div className="flex flex-wrap items-center gap-1.5">
+            {PERIODS.map((p) => (
+              <Button
+                key={p.label}
+                type="button"
+                size="sm"
+                outlined={days !== p.days}
+                onClick={() => setDays(p.days)}
+              >
+                {p.label}
+              </Button>
+            ))}
+          </div>
+          <Button
+            type="button"
+            size="sm"
+            outlined
+            onClick={load}
+            disabled={loading}
+            prefix={loading ? <Spinner /> : <RefreshCw />}
+          >
+            {t.common.refresh}
+          </Button>
         </div>
-        <Button
-          type="button"
-          size="sm"
-          outlined
-          onClick={load}
-          disabled={loading}
-          prefix={loading ? <Spinner /> : <RefreshCw />}
-        >
-          {t.common.refresh}
-        </Button>
-      </div>,
+      ),
     );
     return () => {
       setAfterTitle(null);
       setEnd(null);
     };
-  }, [days, loading, load, setAfterTitle, setEnd, t.common.refresh]);
+  }, [days, loading, load, setAfterTitle, setEnd, t.common.refresh, showTokens]);
 
   useEffect(() => {
     load();
@@ -461,13 +479,51 @@ export default function AnalyticsPage() {
   return (
     <div className="flex flex-col gap-6">
       <PluginSlot name="analytics:top" />
-      {loading && !data && (
+
+      {showTokens === false && (
+        <Card>
+          <CardContent className="py-12">
+            <div className="mx-auto flex max-w-2xl flex-col gap-3 text-sm text-muted-foreground">
+              <h2 className="font-display text-base tracking-wider uppercase text-foreground">
+                Token analytics hidden
+              </h2>
+              <p>
+                The token, cost, and per-day analytics on this page are a
+                local debug estimate. They only count successful main-agent
+                responses with a usable <span className="font-mono">usage</span>{" "}
+                block, and silently exclude auxiliary calls (context
+                compression, title generation, vision, session search, web
+                extract, smart approvals, MCP routing, plugin LLM access)
+                plus provider-side retries and fallback attempts. Cache
+                writes are missing entirely.
+              </p>
+              <p>
+                On models with heavy auxiliary traffic (Kimi K2.6, MiniMax
+                M2.7) the local total can be 10x–100x lower than what your
+                provider bills. Hiding these numbers is safer than letting
+                them look authoritative.
+              </p>
+              <p>
+                Check your provider dashboard (OpenRouter, Anthropic, etc.)
+                for actual usage and billing. To re-enable the local debug
+                estimate anyway, set{" "}
+                <span className="font-mono">
+                  dashboard.show_token_analytics: true
+                </span>{" "}
+                in <a href="/config" className="underline">Config</a>.
+              </p>
+            </div>
+          </CardContent>
+        </Card>
+      )}
+
+      {showTokens && loading && !data && (
         <div className="flex items-center justify-center py-24">
           <Spinner className="text-2xl text-primary" />
         </div>
       )}
 
-      {error && (
+      {showTokens && error && (
         <Card>
           <CardContent className="py-6">
             <p className="text-sm text-destructive text-center">{error}</p>
@@ -475,7 +531,7 @@ export default function AnalyticsPage() {
         </Card>
       )}
 
-      {data && (
+      {showTokens && data && (
         <>
           <div className="grid gap-6 lg:grid-cols-2">
             <Card>
diff --git a/web/src/pages/ModelsPage.tsx b/web/src/pages/ModelsPage.tsx
index 01c239d7034..f09104d4241 100644
--- a/web/src/pages/ModelsPage.tsx
+++ b/web/src/pages/ModelsPage.tsx
@@ -310,12 +310,14 @@ function ModelCard({
   main,
   aux,
   onAssigned,
+  showTokens,
 }: {
   entry: ModelsAnalyticsModelEntry;
   rank: number;
   main: { provider: string; model: string } | null;
   aux: AuxiliaryTaskAssignment[];
   onAssigned(): void;
+  showTokens: boolean;
 }) {
   const { t } = useI18n();
   const provider = entry.provider || modelVendor(entry.model);
@@ -375,14 +377,27 @@ function ModelCard({
             </div>
           </div>
           <div className="flex flex-col items-end gap-1 shrink-0">
-            <div className="text-right">
-              <div className="text-xs font-mono font-semibold">
-                {formatTokens(totalTokens)}
+            {showTokens ? (
+              <div className="text-right">
+                <div className="text-xs font-mono font-semibold">
+                  {formatTokens(totalTokens)}
+                </div>
+                <div className="text-[10px] text-muted-foreground">
+                  {t.models.tokens}
+                </div>
               </div>
-              <div className="text-[10px] text-muted-foreground">
-                {t.models.tokens}
-              </div>
-            </div>
+            ) : (
+              entry.sessions > 0 && (
+                <div className="text-right">
+                  <div className="text-xs font-mono font-semibold">
+                    {entry.sessions}
+                  </div>
+                  <div className="text-[10px] text-muted-foreground">
+                    {t.models.sessions}
+                  </div>
+                </div>
+              )
+            )}
             <UseAsMenu
               provider={provider}
               model={entry.model}
@@ -394,47 +409,51 @@ function ModelCard({
         </div>
       </CardHeader>
       <CardContent className="space-y-3 pt-3">
-        <TokenBar
-          input={entry.input_tokens}
-          output={entry.output_tokens}
-          cacheRead={entry.cache_read_tokens}
-          reasoning={entry.reasoning_tokens}
-        />
+        {showTokens && (
+          <>
+            <TokenBar
+              input={entry.input_tokens}
+              output={entry.output_tokens}
+              cacheRead={entry.cache_read_tokens}
+              reasoning={entry.reasoning_tokens}
+            />
 
-        <div className="grid grid-cols-3 gap-2 text-xs">
-          <div className="text-center">
-            <div className="font-mono font-semibold">{entry.sessions}</div>
-            <div className="text-[10px] text-muted-foreground">
-              {t.models.sessions}
+            <div className="grid grid-cols-3 gap-2 text-xs">
+              <div className="text-center">
+                <div className="font-mono font-semibold">{entry.sessions}</div>
+                <div className="text-[10px] text-muted-foreground">
+                  {t.models.sessions}
+                </div>
+              </div>
+              <div className="text-center">
+                <div className="font-mono font-semibold">
+                  {formatTokens(entry.avg_tokens_per_session)}
+                </div>
+                <div className="text-[10px] text-muted-foreground">
+                  {t.models.avgPerSession}
+                </div>
+              </div>
+              <div className="text-center">
+                <div className="font-mono font-semibold">
+                  {entry.api_calls > 0 ? formatTokens(entry.api_calls) : "—"}
+                </div>
+                <div className="text-[10px] text-muted-foreground">
+                  {t.models.apiCalls}
+                </div>
+              </div>
             </div>
-          </div>
-          <div className="text-center">
-            <div className="font-mono font-semibold">
-              {formatTokens(entry.avg_tokens_per_session)}
-            </div>
-            <div className="text-[10px] text-muted-foreground">
-              {t.models.avgPerSession}
-            </div>
-          </div>
-          <div className="text-center">
-            <div className="font-mono font-semibold">
-              {entry.api_calls > 0 ? formatTokens(entry.api_calls) : "—"}
-            </div>
-            <div className="text-[10px] text-muted-foreground">
-              {t.models.apiCalls}
-            </div>
-          </div>
-        </div>
+          </>
+        )}
 
         <div className="flex items-center justify-between text-[10px] text-muted-foreground border-t border-border/30 pt-2">
           <div className="flex items-center gap-3">
-            {entry.estimated_cost > 0 && (
+            {showTokens && entry.estimated_cost > 0 && (
               <span className="flex items-center gap-0.5">
                 <DollarSign className="h-2.5 w-2.5" />
                 {formatCost(entry.estimated_cost)}
               </span>
             )}
-            {entry.tool_calls > 0 && (
+            {showTokens && entry.tool_calls > 0 && (
               <span className="flex items-center gap-0.5">
                 <Zap className="h-2.5 w-2.5" />
                 {entry.tool_calls} {t.models.toolCalls}
@@ -752,9 +771,26 @@ export default function ModelsPage() {
   const [loading, setLoading] = useState(true);
   const [error, setError] = useState<string | null>(null);
   const [saveKey, setSaveKey] = useState(0);
+  // Gate the token/cost UI on `dashboard.show_token_analytics`.  See
+  // hermes_cli/config.py for the rationale: the numbers exclude auxiliary
+  // calls and retries, so they're misleading next to provider billing.
+  const [showTokens, setShowTokens] = useState(false);
   const { t } = useI18n();
   const { setAfterTitle, setEnd } = usePageHeader();
 
+  useEffect(() => {
+    api
+      .getConfig()
+      .then((cfg) => {
+        const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
+        setShowTokens(dash.show_token_analytics === true);
+      })
+      .catch(() => {
+        // Default to hidden on any failure — safer than showing wrong numbers.
+        setShowTokens(false);
+      });
+  }, []);
+
   const load = useCallback(() => {
     setLoading(true);
     setError(null);
@@ -842,35 +878,59 @@ export default function ModelsPage() {
           <Card>
             <CardContent className="py-6">
               <Stats
-                items={[
-                  {
-                    label: t.models.modelsUsed,
-                    value: String(data.totals.distinct_models),
-                  },
-                  {
-                    label: t.analytics.totalTokens,
-                    value: formatTokens(
-                      data.totals.total_input + data.totals.total_output,
-                    ),
-                  },
-                  {
-                    label: t.analytics.input,
-                    value: formatTokens(data.totals.total_input),
-                  },
-                  {
-                    label: t.analytics.output,
-                    value: formatTokens(data.totals.total_output),
-                  },
-                  {
-                    label: t.models.estimatedCost,
-                    value: formatCost(data.totals.total_estimated_cost),
-                  },
-                  {
-                    label: t.analytics.totalSessions,
-                    value: String(data.totals.total_sessions),
-                  },
-                ]}
+                items={
+                  showTokens
+                    ? [
+                        {
+                          label: t.models.modelsUsed,
+                          value: String(data.totals.distinct_models),
+                        },
+                        {
+                          label: t.analytics.totalTokens,
+                          value: formatTokens(
+                            data.totals.total_input + data.totals.total_output,
+                          ),
+                        },
+                        {
+                          label: t.analytics.input,
+                          value: formatTokens(data.totals.total_input),
+                        },
+                        {
+                          label: t.analytics.output,
+                          value: formatTokens(data.totals.total_output),
+                        },
+                        {
+                          label: t.models.estimatedCost,
+                          value: formatCost(data.totals.total_estimated_cost),
+                        },
+                        {
+                          label: t.analytics.totalSessions,
+                          value: String(data.totals.total_sessions),
+                        },
+                      ]
+                    : [
+                        {
+                          label: t.models.modelsUsed,
+                          value: String(data.totals.distinct_models),
+                        },
+                        {
+                          label: t.analytics.totalSessions,
+                          value: String(data.totals.total_sessions),
+                        },
+                      ]
+                }
               />
+              {!showTokens && (
+                <p className="mt-4 text-[10px] text-muted-foreground/70 leading-relaxed">
+                  Token & cost analytics are hidden because the local counts
+                  exclude auxiliary calls (compression, vision, web extract,
+                  …) and provider retries, so they diverge from your provider
+                  bill. Enable{" "}
+                  <span className="font-mono">dashboard.show_token_analytics</span>{" "}
+                  in <a href="/config" className="underline">Config</a> to
+                  show the local debug estimate anyway.
+                </p>
+              )}
             </CardContent>
           </Card>
         )}
@@ -902,6 +962,7 @@ export default function ModelsPage() {
                   main={aux?.main ?? null}
                   aux={aux?.tasks ?? []}
                   onAssigned={onAssigned}
+                  showTokens={showTokens}
                 />
               ))}
             </div>