feat(dashboard): hide token/cost analytics behind config flag (default off) (#25438)

The Analytics page and the token/cost surfaces on the Models page show local debug estimates only. They count input+output (and a bar viz adds cache_read+reasoning, missing cache_write entirely) from successful main-agent responses that returned a usable usage block. Excluded silently: - All auxiliary calls — context compression, title generation, vision, session search, web extract, smart approvals, MCP routing, plugin LLM access (13 production call sites bypass update_token_counts) - Provider-side retries, fallback attempts - Any call whose usage block didn't come back - cache_write_tokens (column exists in sessions table but not returned by /api/analytics/models) Real-world impact: a user on Kimi K2.6 saw 150K local vs 27M on the OpenRouter side over the same window. Precise-looking numbers next to provider billing create false confidence and support load. This change adds dashboard.show_token_analytics (default False) to gate: - The Analytics nav item (hidden from sidebar when off) - The Analytics page (renders an explanation card instead of charts) - Token bars, totals, cost figures, avg/api_calls on the Models page The Models page keeps capability metadata (context window, vision, tools, reasoning), the use-as-main/aux menu, sessions count, and last-used timestamps when the flag is off. Set dashboard.show_token_analytics: true in config.yaml to opt back in to the local debug estimate. Fixing the underlying accounting (issue #23270) is a separate, larger workstream. Refs: #23270, #21705
2026-05-18 04:41:56 +00:00 · 2026-05-13 22:20:25 -07:00 · 2026-05-13 22:20:25 -07:00 · f7ad2f1115
commit f7ad2f1115
parent e90508103c
4 changed files with 249 additions and 100 deletions
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@ -971,6 +971,21 @@ DEFAULT_CONFIG = {
    # Web dashboard settings
    "dashboard": {
        "theme": "default",  # Dashboard visual theme: "default", "midnight", "ember", "mono", "cyberpunk", "rose"
        # Hide the token/cost analytics surfaces (Analytics page, token bars and
        # cost figures on the Models page) by default.  The numbers shown there
        # are a local debug estimate: they only count successful main-agent
        # responses with a usable ``response.usage``, and silently exclude every
        # auxiliary call (context compression, title generation, vision,
        # session search, web extract, smart approval, MCP routing, plugin LLM
        # access) plus provider-side retries, fallback attempts, and any call
        # whose usage block didn't come back.  Cache writes are also missing
        # from the API response.  On models with heavy auxiliary traffic
        # (Kimi K2.6, MiniMax M2.7) the local total can be 10x-100x lower than
        # the provider bill, which is worse than hiding the numbers entirely
        # because they look precise enough to compare against the provider.
        # Set this to True to re-enable the surfaces with the understanding
        # that the numbers are a local lower-bound estimate, not billing.
        "show_token_analytics": False,
    },
    # Privacy settings
--- a/web/src/App.tsx
+++ b/web/src/App.tsx
@ -75,6 +75,7 @@ import { PluginPage, PluginSlot, usePlugins } from "@/plugins";
 import type { PluginManifest } from "@/plugins";
 import { useTheme } from "@/themes";
 import { isDashboardEmbeddedChatEnabled } from "@/lib/dashboard-flags";
 import { api } from "@/lib/api";
 function RootRedirect() {
  return <Navigate to="/sessions" replace />;
@ -316,6 +317,21 @@ export default function App() {
  const isChatRoute = normalizedPath === "/chat";
  const embeddedChat = isDashboardEmbeddedChatEnabled();
  // `dashboard.show_token_analytics` gates the Analytics nav item.  The
  // page itself remains reachable by URL (it renders an explanation when
  // the flag is off — see AnalyticsPage), but hiding the nav entry avoids
  // surfacing misleading token/cost numbers in the sidebar.  Default off.
  const [showTokenAnalytics, setShowTokenAnalytics] = useState(false);
  useEffect(() => {
    api
      .getConfig()
      .then((cfg) => {
        const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
        setShowTokenAnalytics(dash.show_token_analytics === true);
      })
      .catch(() => setShowTokenAnalytics(false));
  }, []);
  // A plugin can replace the built-in /chat page via `tab.override: "/chat"`
  // in its manifest.  When one does, `buildRoutes` already swaps the route
  // element for <PluginPage /> — but we also have to suppress the
@ -346,11 +362,12 @@ export default function App() {
    [embeddedChat],
  );
-  const builtinNav = useMemo(
+  const builtinNav = useMemo(() => {
-    () =>
+    const base = embeddedChat
-      embeddedChat ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST] : BUILTIN_NAV_REST,
+      ? [CHAT_NAV_ITEM, ...BUILTIN_NAV_REST]
-    [embeddedChat],
+      : BUILTIN_NAV_REST;
-  );
+    return showTokenAnalytics ? base : base.filter((n) => n.path !== "/analytics");
  }, [embeddedChat, showTokenAnalytics]);
  const sidebarNav = useMemo(
    () => partitionSidebarNav(builtinNav, manifests),
--- a/web/src/pages/AnalyticsPage.tsx
+++ b/web/src/pages/AnalyticsPage.tsx
@ -397,10 +397,26 @@ export default function AnalyticsPage() {
  const [data, setData] = useState<AnalyticsResponse | null>(null);
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);
  // Gated on `dashboard.show_token_analytics` (default off).  When off the
  // page renders an explanation card instead of fetching analytics — the
  // local token counts exclude auxiliary calls and provider retries, so
  // they diverge from provider billing in ways that mislead users.
  const [showTokens, setShowTokens] = useState<boolean | null>(null);
  const { t } = useI18n();
  const { setAfterTitle, setEnd } = usePageHeader();
  useEffect(() => {
    api
      .getConfig()
      .then((cfg) => {
        const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
        setShowTokens(dash.show_token_analytics === true);
      })
      .catch(() => setShowTokens(false));
  }, []);
  const load = useCallback(() => {
    if (!showTokens) return;
    setLoading(true);
    setError(null);
    api
@ -408,7 +424,7 @@ export default function AnalyticsPage() {
      .then(setData)
      .catch((err) => setError(String(err)))
      .finally(() => setLoading(false));
-  }, [days]);
+  }, [days, showTokens]);
  useLayoutEffect(() => {
    const periodLabel =
@ -422,37 +438,39 @@ export default function AnalyticsPage() {
      </span>,
    );
    setEnd(
-      <div className="flex w-full min-w-0 flex-wrap items-center justify-end gap-2 sm:gap-2">
+      showTokens === false ? null : (
-        <div className="flex flex-wrap items-center gap-1.5">
+        <div className="flex w-full min-w-0 flex-wrap items-center justify-end gap-2 sm:gap-2">
-          {PERIODS.map((p) => (
+          <div className="flex flex-wrap items-center gap-1.5">
-            <Button
+            {PERIODS.map((p) => (
-              key={p.label}
+              <Button
-              type="button"
+                key={p.label}
-              size="sm"
+                type="button"
-              outlined={days !== p.days}
+                size="sm"
-              onClick={() => setDays(p.days)}
+                outlined={days !== p.days}
-            >
+                onClick={() => setDays(p.days)}
-              {p.label}
+              >
-            </Button>
+                {p.label}
-          ))}
+              </Button>
            ))}
          </div>
          <Button
            type="button"
            size="sm"
            outlined
            onClick={load}
            disabled={loading}
            prefix={loading ? <Spinner /> : <RefreshCw />}
          >
            {t.common.refresh}
          </Button>
        </div>
-        <Button
+      ),
          type="button"
          size="sm"
          outlined
          onClick={load}
          disabled={loading}
          prefix={loading ? <Spinner /> : <RefreshCw />}
        >
          {t.common.refresh}
        </Button>
      </div>,
    );
    return () => {
      setAfterTitle(null);
      setEnd(null);
    };
-  }, [days, loading, load, setAfterTitle, setEnd, t.common.refresh]);
+  }, [days, loading, load, setAfterTitle, setEnd, t.common.refresh, showTokens]);
  useEffect(() => {
    load();
@ -461,13 +479,51 @@ export default function AnalyticsPage() {
  return (
    <div className="flex flex-col gap-6">
      <PluginSlot name="analytics:top" />
-      {loading && !data && (
+
      {showTokens === false && (
        <Card>
          <CardContent className="py-12">
            <div className="mx-auto flex max-w-2xl flex-col gap-3 text-sm text-muted-foreground">
              <h2 className="font-display text-base tracking-wider uppercase text-foreground">
                Token analytics hidden
              </h2>
              <p>
                The token, cost, and per-day analytics on this page are a
                local debug estimate. They only count successful main-agent
                responses with a usable <span className="font-mono">usage</span>{" "}
                block, and silently exclude auxiliary calls (context
                compression, title generation, vision, session search, web
                extract, smart approvals, MCP routing, plugin LLM access)
                plus provider-side retries and fallback attempts. Cache
                writes are missing entirely.
              </p>
              <p>
                On models with heavy auxiliary traffic (Kimi K2.6, MiniMax
                M2.7) the local total can be 10x–100x lower than what your
                provider bills. Hiding these numbers is safer than letting
                them look authoritative.
              </p>
              <p>
                Check your provider dashboard (OpenRouter, Anthropic, etc.)
                for actual usage and billing. To re-enable the local debug
                estimate anyway, set{" "}
                <span className="font-mono">
                  dashboard.show_token_analytics: true
                </span>{" "}
                in <a href="/config" className="underline">Config</a>.
              </p>
            </div>
          </CardContent>
        </Card>
      )}
      {showTokens && loading && !data && (
        <div className="flex items-center justify-center py-24">
          <Spinner className="text-2xl text-primary" />
        </div>
      )}
-      {error && (
+      {showTokens && error && (
        <Card>
          <CardContent className="py-6">
            <p className="text-sm text-destructive text-center">{error}</p>
@ -475,7 +531,7 @@ export default function AnalyticsPage() {
        </Card>
      )}
-      {data && (
+      {showTokens && data && (
        <>
          <div className="grid gap-6 lg:grid-cols-2">
            <Card>
--- a/web/src/pages/ModelsPage.tsx
+++ b/web/src/pages/ModelsPage.tsx
@ -310,12 +310,14 @@ function ModelCard({
  main,
  aux,
  onAssigned,
  showTokens,
 }: {
  entry: ModelsAnalyticsModelEntry;
  rank: number;
  main: { provider: string; model: string } | null;
  aux: AuxiliaryTaskAssignment[];
  onAssigned(): void;
  showTokens: boolean;
 }) {
  const { t } = useI18n();
  const provider = entry.provider || modelVendor(entry.model);
@ -375,14 +377,27 @@ function ModelCard({
            </div>
          </div>
          <div className="flex flex-col items-end gap-1 shrink-0">
-            <div className="text-right">
+            {showTokens ? (
-              <div className="text-xs font-mono font-semibold">
+              <div className="text-right">
-                {formatTokens(totalTokens)}
+                <div className="text-xs font-mono font-semibold">
                  {formatTokens(totalTokens)}
                </div>
                <div className="text-[10px] text-muted-foreground">
                  {t.models.tokens}
                </div>
              </div>
-              <div className="text-[10px] text-muted-foreground">
+            ) : (
-                {t.models.tokens}
+              entry.sessions > 0 && (
-              </div>
+                <div className="text-right">
-            </div>
+                  <div className="text-xs font-mono font-semibold">
                    {entry.sessions}
                  </div>
                  <div className="text-[10px] text-muted-foreground">
                    {t.models.sessions}
                  </div>
                </div>
              )
            )}
            <UseAsMenu
              provider={provider}
              model={entry.model}
@ -394,47 +409,51 @@ function ModelCard({
        </div>
      </CardHeader>
      <CardContent className="space-y-3 pt-3">
-        <TokenBar
+        {showTokens && (
-          input={entry.input_tokens}
+          <>
-          output={entry.output_tokens}
+            <TokenBar
-          cacheRead={entry.cache_read_tokens}
+              input={entry.input_tokens}
-          reasoning={entry.reasoning_tokens}
+              output={entry.output_tokens}
-        />
+              cacheRead={entry.cache_read_tokens}
              reasoning={entry.reasoning_tokens}
            />
-        <div className="grid grid-cols-3 gap-2 text-xs">
+            <div className="grid grid-cols-3 gap-2 text-xs">
-          <div className="text-center">
+              <div className="text-center">
-            <div className="font-mono font-semibold">{entry.sessions}</div>
+                <div className="font-mono font-semibold">{entry.sessions}</div>
-            <div className="text-[10px] text-muted-foreground">
+                <div className="text-[10px] text-muted-foreground">
-              {t.models.sessions}
+                  {t.models.sessions}
                </div>
              </div>
              <div className="text-center">
                <div className="font-mono font-semibold">
                  {formatTokens(entry.avg_tokens_per_session)}
                </div>
                <div className="text-[10px] text-muted-foreground">
                  {t.models.avgPerSession}
                </div>
              </div>
              <div className="text-center">
                <div className="font-mono font-semibold">
                  {entry.api_calls > 0 ? formatTokens(entry.api_calls) : "—"}
                </div>
                <div className="text-[10px] text-muted-foreground">
                  {t.models.apiCalls}
                </div>
              </div>
            </div>
-          </div>
+          </>
-          <div className="text-center">
+        )}
            <div className="font-mono font-semibold">
              {formatTokens(entry.avg_tokens_per_session)}
            </div>
            <div className="text-[10px] text-muted-foreground">
              {t.models.avgPerSession}
            </div>
          </div>
          <div className="text-center">
            <div className="font-mono font-semibold">
              {entry.api_calls > 0 ? formatTokens(entry.api_calls) : "—"}
            </div>
            <div className="text-[10px] text-muted-foreground">
              {t.models.apiCalls}
            </div>
          </div>
        </div>
        <div className="flex items-center justify-between text-[10px] text-muted-foreground border-t border-border/30 pt-2">
          <div className="flex items-center gap-3">
-            {entry.estimated_cost > 0 && (
+            {showTokens && entry.estimated_cost > 0 && (
              <span className="flex items-center gap-0.5">
                <DollarSign className="h-2.5 w-2.5" />
                {formatCost(entry.estimated_cost)}
              </span>
            )}
-            {entry.tool_calls > 0 && (
+            {showTokens && entry.tool_calls > 0 && (
              <span className="flex items-center gap-0.5">
                <Zap className="h-2.5 w-2.5" />
                {entry.tool_calls} {t.models.toolCalls}
@ -752,9 +771,26 @@ export default function ModelsPage() {
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);
  const [saveKey, setSaveKey] = useState(0);
  // Gate the token/cost UI on `dashboard.show_token_analytics`.  See
  // hermes_cli/config.py for the rationale: the numbers exclude auxiliary
  // calls and retries, so they're misleading next to provider billing.
  const [showTokens, setShowTokens] = useState(false);
  const { t } = useI18n();
  const { setAfterTitle, setEnd } = usePageHeader();
  useEffect(() => {
    api
      .getConfig()
      .then((cfg) => {
        const dash = (cfg?.dashboard ?? {}) as { show_token_analytics?: unknown };
        setShowTokens(dash.show_token_analytics === true);
      })
      .catch(() => {
        // Default to hidden on any failure — safer than showing wrong numbers.
        setShowTokens(false);
      });
  }, []);
  const load = useCallback(() => {
    setLoading(true);
    setError(null);
@ -842,35 +878,59 @@ export default function ModelsPage() {
          <Card>
            <CardContent className="py-6">
              <Stats
-                items={[
+                items={
-                  {
+                  showTokens
-                    label: t.models.modelsUsed,
+                    ? [
-                    value: String(data.totals.distinct_models),
+                        {
-                  },
+                          label: t.models.modelsUsed,
-                  {
+                          value: String(data.totals.distinct_models),
-                    label: t.analytics.totalTokens,
+                        },
-                    value: formatTokens(
+                        {
-                      data.totals.total_input + data.totals.total_output,
+                          label: t.analytics.totalTokens,
-                    ),
+                          value: formatTokens(
-                  },
+                            data.totals.total_input + data.totals.total_output,
-                  {
+                          ),
-                    label: t.analytics.input,
+                        },
-                    value: formatTokens(data.totals.total_input),
+                        {
-                  },
+                          label: t.analytics.input,
-                  {
+                          value: formatTokens(data.totals.total_input),
-                    label: t.analytics.output,
+                        },
-                    value: formatTokens(data.totals.total_output),
+                        {
-                  },
+                          label: t.analytics.output,
-                  {
+                          value: formatTokens(data.totals.total_output),
-                    label: t.models.estimatedCost,
+                        },
-                    value: formatCost(data.totals.total_estimated_cost),
+                        {
-                  },
+                          label: t.models.estimatedCost,
-                  {
+                          value: formatCost(data.totals.total_estimated_cost),
-                    label: t.analytics.totalSessions,
+                        },
-                    value: String(data.totals.total_sessions),
+                        {
-                  },
+                          label: t.analytics.totalSessions,
-                ]}
+                          value: String(data.totals.total_sessions),
                        },
                      ]
                    : [
                        {
                          label: t.models.modelsUsed,
                          value: String(data.totals.distinct_models),
                        },
                        {
                          label: t.analytics.totalSessions,
                          value: String(data.totals.total_sessions),
                        },
                      ]
                }
              />
              {!showTokens && (
                <p className="mt-4 text-[10px] text-muted-foreground/70 leading-relaxed">
                  Token & cost analytics are hidden because the local counts
                  exclude auxiliary calls (compression, vision, web extract,
                  …) and provider retries, so they diverge from your provider
                  bill. Enable{" "}
                  <span className="font-mono">dashboard.show_token_analytics</span>{" "}
                  in <a href="/config" className="underline">Config</a> to
                  show the local debug estimate anyway.
                </p>
              )}
            </CardContent>
          </Card>
        )}
@ -902,6 +962,7 @@ export default function ModelsPage() {
                  main={aux?.main ?? null}
                  aux={aux?.tasks ?? []}
                  onAssigned={onAssigned}
                  showTokens={showTokens}
                />
              ))}
            </div>