From fd2a35b1691138b79b606e7961d3c78f7019722b Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Thu, 25 Jun 2026 15:21:22 -0700
Subject: [PATCH] fix: stop reporting cache-hit rate and cost across all UI
 surfaces (#52717)

* fix: stop reporting cache-hit rate and cost across all UI surfaces

Cost estimates and cache read/write token reporting are unreliable on
providers that don't surface cached_tokens (e.g. ollama-cloud, which doesn't
implement prompt_tokens_details.cached_tokens), producing misleading
near-zero 'cache hit' readouts and cost figures. Remove cost + cache-hit
reporting from every user-facing surface; keep input/output/total token
counts (provider-agnostic and accurate) and the Nous account billing UI
(real account money, separate from per-conversation estimates).

Surfaces:
- CLI /usage + model-info: drop cost lines + cache read/write token lines
- Gateway /usage + /model: drop cost + cache lines
- tui_gateway/server.py: stop emitting cost_usd / cache_read in usage and
  subagent.complete payloads
- TUI (Ink): drop cost from status bar (+ showCost plumbing), /usage panel,
  thinking rollup, agents overlay (incl. compare view); keep token counts
- Desktop Command Center: drop cost stat, per-model cost, actual-cost hint

Underlying estimate_usage_cost / format_cost / insights cost columns are
left intact but no longer surfaced (display-only change, reversible).

* test: update TUI + gateway + CLI tests for removed cost/cache-hit reporting

- CLI /usage test asserts cost/cache lines are absent, tokens present
- gateway /usage test drops cost + cache asserts; removes cost-included test
- TUI subagentTree summary expectation drops the cost segment
- useConfigSync + appChrome status-rule tests drop showCost prop/state
---
 apps/desktop/src/app/command-center/index.tsx | 23 +-------
 cli.py                                        | 30 ----------
 gateway/slash_commands.py                     | 32 -----------
 tests/cli/test_cli_status_bar.py              | 55 ++++---------------
 tests/gateway/test_usage_command.py           | 26 ++-------
 tui_gateway/server.py                         | 26 ---------
 .../__tests__/appChromeStatusRule.test.tsx    |  8 +--
 .../appChromeStatusRuleDevCredits.test.tsx    |  1 -
 ui-tui/src/__tests__/subagentTree.test.ts     |  6 +-
 ui-tui/src/__tests__/useConfigSync.test.ts    |  3 -
 ui-tui/src/app/interfaces.ts                  |  1 -
 ui-tui/src/app/slash/commands/session.ts      |  7 ---
 ui-tui/src/app/uiStore.ts                     |  1 -
 ui-tui/src/app/useConfigSync.ts               |  1 -
 ui-tui/src/components/agentsOverlay.tsx       | 20 +------
 ui-tui/src/components/appChrome.tsx           | 16 +-----
 ui-tui/src/components/appLayout.tsx           |  1 -
 ui-tui/src/components/thinking.tsx            | 13 -----
 ui-tui/src/lib/subagentTree.ts                |  4 --
 19 files changed, 27 insertions(+), 247 deletions(-)
diff --git a/apps/desktop/src/app/command-center/index.tsx b/apps/desktop/src/app/command-center/index.tsx
index 57358186a03..bade25a6549 100644
--- a/apps/desktop/src/app/command-center/index.tsx
+++ b/apps/desktop/src/app/command-center/index.tsx
@@ -455,20 +455,6 @@ function formatTokens(value: null | number | undefined): string {
   return num.toLocaleString()
 }
 
-function formatCost(value: null | number | undefined): string {
-  const num = Number(value || 0)
-
-  if (num === 0) {
-    return '$0.00'
-  }
-
-  if (num < 0.01) {
-    return '<$0.01'
-  }
-
-  return `$${num.toFixed(2)}`
-}
-
 function formatInteger(value: null | number | undefined): string {
   return Number(value ?? 0).toLocaleString()
 }
@@ -525,18 +511,13 @@ function UsagePanel({ error, loading, onRefresh, period, usage }: UsagePanelProp
         </span>
       )}
 
-      <div className="grid grid-cols-2 gap-x-4 gap-y-4 border-b border-(--ui-stroke-tertiary) pb-5 sm:grid-cols-4">
+      <div className="grid grid-cols-2 gap-x-4 gap-y-4 border-b border-(--ui-stroke-tertiary) pb-5 sm:grid-cols-3">
         <UsageStat label={cc.statSessions} value={formatInteger(totals.total_sessions)} />
         <UsageStat label={cc.statApiCalls} value={formatInteger(totals.total_api_calls)} />
         <UsageStat
           label={cc.statTokens}
           value={`${formatTokens(totals.total_input)} / ${formatTokens(totals.total_output)}`}
         />
-        <UsageStat
-          hint={totals.total_actual_cost > 0 ? cc.actualCost(formatCost(totals.total_actual_cost)) : undefined}
-          label={cc.statCost}
-          value={formatCost(totals.total_estimated_cost)}
-        />
       </div>
 
       <section>
@@ -596,7 +577,7 @@ function UsagePanel({ error, loading, onRefresh, period, usage }: UsagePanelProp
           rows={byModel.slice(0, 6).map(entry => ({
             key: entry.model,
             label: entry.model,
-            value: `${formatTokens((entry.input_tokens || 0) + (entry.output_tokens || 0))} · ${formatCost(entry.estimated_cost)}`
+            value: `${formatTokens((entry.input_tokens || 0) + (entry.output_tokens || 0))}`
           }))}
           title={cc.topModels}
         />
diff --git a/cli.py b/cli.py
index 8cf2e089a2b..7442bfe5ca2 100644
--- a/cli.py
+++ b/cli.py
@@ -7488,8 +7488,6 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
         if mi:
             if mi.max_output:
                 _cprint(f"    Max output: {mi.max_output:,} tokens")
-            if mi.has_cost_data():
-                _cprint(f"    Cost: {mi.format_cost()}")
             _cprint(f"    Capabilities: {mi.format_capabilities()}")
 
         cache_enabled = (
@@ -7796,8 +7794,6 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
         if mi:
             if mi.max_output:
                 _cprint(f"    Max output: {mi.max_output:,} tokens")
-            if mi.has_cost_data():
-                _cprint(f"    Cost: {mi.format_cost()}")
             _cprint(f"    Capabilities: {mi.format_capabilities()}")
 
         # Cache notice
@@ -9113,8 +9109,6 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
         # ── Session token usage ─────────────────────────────────────
         input_tokens = getattr(agent, "session_input_tokens", 0) or 0
         output_tokens = getattr(agent, "session_output_tokens", 0) or 0
-        cache_read_tokens = getattr(agent, "session_cache_read_tokens", 0) or 0
-        cache_write_tokens = getattr(agent, "session_cache_write_tokens", 0) or 0
         reasoning_tokens = getattr(agent, "session_reasoning_tokens", 0) or 0
         prompt = agent.session_prompt_tokens
         completion = agent.session_completion_tokens
@@ -9127,25 +9121,12 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
         compressions = compressor.compression_count
 
         msg_count = len(self.conversation_history)
-        cost_result = estimate_usage_cost(
-            agent.model,
-            CanonicalUsage(
-                input_tokens=input_tokens,
-                output_tokens=output_tokens,
-                cache_read_tokens=cache_read_tokens,
-                cache_write_tokens=cache_write_tokens,
-            ),
-            provider=getattr(agent, "provider", None),
-            base_url=getattr(agent, "base_url", None),
-        )
         elapsed = format_duration_compact((datetime.now() - self.session_start).total_seconds())
 
         print("  📊 Session Token Usage")
         print(f"  {'─' * 40}")
         print(f"  Model:                     {agent.model}")
         print(f"  Input tokens:              {input_tokens:>10,}")
-        print(f"  Cache read tokens:         {cache_read_tokens:>10,}")
-        print(f"  Cache write tokens:        {cache_write_tokens:>10,}")
         print(f"  Output tokens:             {output_tokens:>10,}")
         if reasoning_tokens:
             print(f"  ↳ Reasoning (subset):      {reasoning_tokens:>10,}")
@@ -9154,21 +9135,10 @@ class HermesCLI(CLIAgentSetupMixin, CLICommandsMixin):
         print(f"  Total tokens:              {total:>10,}")
         print(f"  API calls:                 {calls:>10,}")
         print(f"  Session duration:          {elapsed:>10}")
-        print(f"  Cost status:              {cost_result.status:>10}")
-        print(f"  Cost source:              {cost_result.source:>10}")
-        if cost_result.amount_usd is not None:
-            prefix = "~" if cost_result.status == "estimated" else ""
-            print(f"  Total cost:              {prefix}${float(cost_result.amount_usd):>10.4f}")
-        elif cost_result.status == "included":
-            print(f"  Total cost:              {'included':>10}")
-        else:
-            print(f"  Total cost:              {'n/a':>10}")
         print(f"  {'─' * 40}")
         print(f"  Current context:  {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)")
         print(f"  Messages:         {msg_count}")
         print(f"  Compressions:     {compressions}")
-        if cost_result.status == "unknown":
-            print(f"  Note:             Pricing unknown for {agent.model}")
 
         # Account limits -- fetched off-thread with a hard timeout so slow
         # provider APIs don't hang the prompt.
diff --git a/gateway/slash_commands.py b/gateway/slash_commands.py
index 24754336b36..b2b8089b51b 100644
--- a/gateway/slash_commands.py
+++ b/gateway/slash_commands.py
@@ -1391,8 +1391,6 @@ class GatewaySlashCommandsMixin:
                         if mi:
                             if mi.max_output:
                                 lines.append(t("gateway.model.max_output_label", tokens=f"{mi.max_output:,}"))
-                            if mi.has_cost_data():
-                                lines.append(t("gateway.model.cost_label", cost=mi.format_cost()))
                             lines.append(t("gateway.model.capabilities_label", capabilities=mi.format_capabilities()))
                         if result.warning_message:
                             lines.append(t("gateway.model.warning_prefix", warning=result.warning_message))
@@ -1628,8 +1626,6 @@ class GatewaySlashCommandsMixin:
             if mi:
                 if mi.max_output:
                     lines.append(t("gateway.model.max_output_label", tokens=f"{mi.max_output:,}"))
-                if mi.has_cost_data():
-                    lines.append(t("gateway.model.cost_label", cost=mi.format_cost()))
                 lines.append(t("gateway.model.capabilities_label", capabilities=mi.format_capabilities()))
 
             # Cache notice
@@ -3514,42 +3510,14 @@ class GatewaySlashCommandsMixin:
             # Session token usage — detailed breakdown matching CLI
             input_tokens = getattr(agent, "session_input_tokens", 0) or 0
             output_tokens = getattr(agent, "session_output_tokens", 0) or 0
-            cache_read = getattr(agent, "session_cache_read_tokens", 0) or 0
-            cache_write = getattr(agent, "session_cache_write_tokens", 0) or 0
 
             lines.append(t("gateway.usage.header_session"))
             lines.append(t("gateway.usage.label_model", model=agent.model))
             lines.append(t("gateway.usage.label_input_tokens", count=f"{input_tokens:,}"))
-            if cache_read:
-                lines.append(t("gateway.usage.label_cache_read", count=f"{cache_read:,}"))
-            if cache_write:
-                lines.append(t("gateway.usage.label_cache_write", count=f"{cache_write:,}"))
             lines.append(t("gateway.usage.label_output_tokens", count=f"{output_tokens:,}"))
             lines.append(t("gateway.usage.label_total", count=f"{agent.session_total_tokens:,}"))
             lines.append(t("gateway.usage.label_api_calls", count=agent.session_api_calls))
 
-            # Cost estimation
-            try:
-                from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
-                cost_result = estimate_usage_cost(
-                    agent.model,
-                    CanonicalUsage(
-                        input_tokens=input_tokens,
-                        output_tokens=output_tokens,
-                        cache_read_tokens=cache_read,
-                        cache_write_tokens=cache_write,
-                    ),
-                    provider=getattr(agent, "provider", None),
-                    base_url=getattr(agent, "base_url", None),
-                )
-                if cost_result.amount_usd is not None:
-                    prefix = "~" if cost_result.status == "estimated" else ""
-                    lines.append(t("gateway.usage.label_cost", prefix=prefix, amount=f"{float(cost_result.amount_usd):.4f}"))
-                elif cost_result.status == "included":
-                    lines.append(t("gateway.usage.label_cost_included"))
-            except Exception:
-                pass
-
             # Context window and compressions
             ctx = agent.context_compressor
             if ctx.last_prompt_tokens:
diff --git a/tests/cli/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py
index e27ade6af7d..3edc1e94c80 100644
--- a/tests/cli/test_cli_status_bar.py
+++ b/tests/cli/test_cli_status_bar.py
@@ -500,7 +500,7 @@ class TestCLIStatusBar:
 
 
 class TestCLIUsageReport:
-    def test_show_usage_includes_estimated_cost(self, capsys):
+    def test_show_usage_omits_cost_reporting(self, capsys):
         cli_obj = _attach_agent(
             _make_cli(),
             prompt_tokens=10_230,
@@ -516,52 +516,19 @@ class TestCLIUsageReport:
         cli_obj._show_usage()
         output = capsys.readouterr().out
 
+        # Token counts and session metadata still shown.
         assert "Model:" in output
-        assert "Cost status:" in output
-        assert "Cost source:" in output
-        assert "Total cost:" in output
-        assert "$" in output
-        assert "0.064" in output
+        assert "Input tokens:" in output
+        assert "Output tokens:" in output
+        assert "Total tokens:" in output
         assert "Session duration:" in output
         assert "Compressions:" in output
-
-    def test_show_usage_marks_unknown_pricing(self, capsys):
-        cli_obj = _attach_agent(
-            _make_cli(model="local/my-custom-model"),
-            prompt_tokens=1_000,
-            completion_tokens=500,
-            total_tokens=1_500,
-            api_calls=1,
-            context_tokens=1_000,
-            context_length=32_000,
-        )
-        cli_obj.verbose = False
-
-        cli_obj._show_usage()
-        output = capsys.readouterr().out
-
-        assert "Total cost:" in output
-        assert "n/a" in output
-        assert "Pricing unknown for local/my-custom-model" in output
-
-    def test_zero_priced_provider_models_stay_unknown(self, capsys):
-        cli_obj = _attach_agent(
-            _make_cli(model="glm-5"),
-            prompt_tokens=1_000,
-            completion_tokens=500,
-            total_tokens=1_500,
-            api_calls=1,
-            context_tokens=1_000,
-            context_length=32_000,
-        )
-        cli_obj.verbose = False
-
-        cli_obj._show_usage()
-        output = capsys.readouterr().out
-
-        assert "Total cost:" in output
-        assert "n/a" in output
-        assert "Pricing unknown for glm-5" in output
+        # Cost and cache-hit reporting is removed everywhere.
+        assert "Total cost:" not in output
+        assert "Cost status:" not in output
+        assert "Cost source:" not in output
+        assert "Cache read tokens:" not in output
+        assert "Cache write tokens:" not in output
 
 
 class TestStatusBarWidthSource:
diff --git a/tests/gateway/test_usage_command.py b/tests/gateway/test_usage_command.py
index 9fbb80e3123..d58c57613dd 100644
--- a/tests/gateway/test_usage_command.py
+++ b/tests/gateway/test_usage_command.py
@@ -76,20 +76,20 @@ class TestUsageCachedAgent:
         runner = _make_runner(SK, cached_agent=agent)
         event = MagicMock()
 
-        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
-             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
-            mock_cost.return_value = MagicMock(amount_usd=0.1234, status="estimated")
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
             result = await runner._handle_usage_command(event)
 
         assert "claude-sonnet-4.6" in result
         assert "35,000" in result  # input tokens
         assert "10,000" in result  # output tokens
-        assert "5,000" in result   # cache read
-        assert "2,000" in result   # cache write
         assert "50,000" in result  # total
-        assert "$0.1234" in result
         assert "30,000" in result  # context
         assert "Compressions: 1" in result
+        # Cost and cache-hit reporting is removed everywhere.
+        assert "$" not in result
+        assert "Cache read" not in result
+        assert "Cache write" not in result
+        assert "Cost" not in result
 
     @pytest.mark.asyncio
     async def test_running_agent_preferred_over_cache(self):
@@ -161,20 +161,6 @@ class TestUsageCachedAgent:
         assert "Cache read" not in result
         assert "Cache write" not in result
 
-    @pytest.mark.asyncio
-    async def test_cost_included_status(self):
-        """Subscription-included providers show 'included' instead of dollar amount."""
-        agent = _make_mock_agent(provider="openai-codex")
-        runner = _make_runner(SK, cached_agent=agent)
-        event = MagicMock()
-
-        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
-             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
-            mock_cost.return_value = MagicMock(amount_usd=None, status="included")
-            result = await runner._handle_usage_command(event)
-
-        assert "Cost: included" in result
-
 
 class TestUsageAccountSection:
     """Account-limits section appended to /usage output (PR #2486)."""
diff --git a/tui_gateway/server.py b/tui_gateway/server.py
index 24299a82ceb..e94692ddc93 100644
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -2790,8 +2790,6 @@ def _get_usage(agent) -> dict:
         "model": getattr(agent, "model", "") or "",
         "input": g("session_input_tokens", "session_prompt_tokens"),
         "output": g("session_output_tokens", "session_completion_tokens"),
-        "cache_read": g("session_cache_read_tokens"),
-        "cache_write": g("session_cache_write_tokens"),
         "reasoning": g("session_reasoning_tokens"),
         "prompt": g("session_prompt_tokens"),
         "completion": g("session_completion_tokens"),
@@ -2815,25 +2813,6 @@ def _get_usage(agent) -> dict:
         usage["active_subagents"] = _async_active_count()
     except Exception:
         pass
-    try:
-        from agent.usage_pricing import CanonicalUsage, estimate_usage_cost
-
-        cost = estimate_usage_cost(
-            usage["model"],
-            CanonicalUsage(
-                input_tokens=usage["input"],
-                output_tokens=usage["output"],
-                cache_read_tokens=usage["cache_read"],
-                cache_write_tokens=usage["cache_write"],
-            ),
-            provider=getattr(agent, "provider", None),
-            base_url=getattr(agent, "base_url", None),
-        )
-        usage["cost_status"] = cost.status
-        if cost.amount_usd is not None:
-            usage["cost_usd"] = float(cost.amount_usd)
-    except Exception:
-        pass
     # Dev-only live credits-spent readout (L0 usage-aware-credits). Gated on
     # HERMES_DEV_CREDITS so the payload stays clean when the flag is off.
     if is_truthy_value(os.environ.get("HERMES_DEV_CREDITS")):
@@ -3289,11 +3268,6 @@ def _on_tool_progress(
                     payload[int_key] = int(val)
                 except (TypeError, ValueError):
                     pass
-        if _kwargs.get("cost_usd") is not None:
-            try:
-                payload["cost_usd"] = float(_kwargs["cost_usd"])
-            except (TypeError, ValueError):
-                pass
         if _kwargs.get("files_read"):
             payload["files_read"] = [str(p) for p in _kwargs["files_read"]]
         if _kwargs.get("files_written"):
diff --git a/ui-tui/src/__tests__/appChromeStatusRule.test.tsx b/ui-tui/src/__tests__/appChromeStatusRule.test.tsx
index c7f2a00eefc..de823162df2 100644
--- a/ui-tui/src/__tests__/appChromeStatusRule.test.tsx
+++ b/ui-tui/src/__tests__/appChromeStatusRule.test.tsx
@@ -96,7 +96,6 @@ const baseProps = {
   liveSessionCount: 0,
   model: 'opus-4.8',
   sessionStartedAt: null,
-  showCost: false,
   status: 'ready',
   statusColor: DEFAULT_THEME.color.ok,
   t: DEFAULT_THEME,
@@ -157,7 +156,6 @@ describe('StatusRule session count click target', () => {
       model: 'kimi-k2.6',
       onSessionCountClick: openSwitcher,
       sessionStartedAt: null,
-      showCost: false,
       status: 'ready',
       statusColor: DEFAULT_THEME.color.ok,
       t: DEFAULT_THEME,
@@ -183,12 +181,11 @@ describe('StatusRule session count click target', () => {
       model: 'opus-4.8',
       onSessionCountClick: vi.fn(),
       sessionStartedAt: Date.now() - 60_000,
-      showCost: true,
       status: 'ready',
       statusColor: DEFAULT_THEME.color.ok,
       t: DEFAULT_THEME,
       turnStartedAt: null,
-      usage: { context_max: 200_000, context_percent: 25, context_used: 50_000, cost_usd: 0.5, total: 50_000 },
+      usage: { calls: 0, context_max: 200_000, context_percent: 25, context_used: 50_000, input: 0, output: 0, total: 50_000 },
       voiceLabel: 'voice off'
     })
 
@@ -197,9 +194,8 @@ describe('StatusRule session count click target', () => {
     // Must-keep essentials survive intact …
     expect(rendered).toContain('ready')
     expect(rendered).toContain('opus 4.8')
-    // … while the low-value tail (session count, cost) is dropped, not truncated.
+    // … while the low-value tail (session count) is dropped, not truncated.
     expect(rendered).not.toContain('3 sessions')
-    expect(rendered).not.toContain('$0.5000')
   })
 })
 
diff --git a/ui-tui/src/__tests__/appChromeStatusRuleDevCredits.test.tsx b/ui-tui/src/__tests__/appChromeStatusRuleDevCredits.test.tsx
index 514ff5f5c7c..7d04cfe2758 100644
--- a/ui-tui/src/__tests__/appChromeStatusRuleDevCredits.test.tsx
+++ b/ui-tui/src/__tests__/appChromeStatusRuleDevCredits.test.tsx
@@ -45,7 +45,6 @@ const baseProps = {
   liveSessionCount: 0,
   model: 'opus-4.8',
   sessionStartedAt: null,
-  showCost: false,
   status: 'ready',
   statusColor: DEFAULT_THEME.color.ok,
   t: DEFAULT_THEME,
diff --git a/ui-tui/src/__tests__/subagentTree.test.ts b/ui-tui/src/__tests__/subagentTree.test.ts
index bd892d7ac07..863646a8e52 100644
--- a/ui-tui/src/__tests__/subagentTree.test.ts
+++ b/ui-tui/src/__tests__/subagentTree.test.ts
@@ -139,8 +139,8 @@ describe('fmtCost + fmtTokens', () => {
   })
 })
 
-describe('formatSummary with tokens + cost', () => {
-  it('includes token + cost when present', () => {
+describe('formatSummary with tokens', () => {
+  it('includes tokens but not cost', () => {
     expect(
       formatSummary({
         activeCount: 0,
@@ -154,7 +154,7 @@ describe('formatSummary with tokens + cost', () => {
         totalDuration: 30,
         totalTools: 14
       })
-    ).toBe('d2 · 3 agents · 14 tools · 30s · 10k tok · $0.42')
+    ).toBe('d2 · 3 agents · 14 tools · 30s · 10k tok')
   })
 })
 
diff --git a/ui-tui/src/__tests__/useConfigSync.test.ts b/ui-tui/src/__tests__/useConfigSync.test.ts
index 2a6f7262456..c82984bac4d 100644
--- a/ui-tui/src/__tests__/useConfigSync.test.ts
+++ b/ui-tui/src/__tests__/useConfigSync.test.ts
@@ -26,7 +26,6 @@ describe('applyDisplay', () => {
             bell_on_complete: true,
             details_mode: 'expanded',
             inline_diffs: false,
-            show_cost: true,
             show_reasoning: true,
             streaming: false,
             tui_compact: true,
@@ -42,7 +41,6 @@ describe('applyDisplay', () => {
     expect(s.compact).toBe(true)
     expect(s.detailsMode).toBe('expanded')
     expect(s.inlineDiffs).toBe(false)
-    expect(s.showCost).toBe(true)
     expect(s.showReasoning).toBe(true)
     expect(s.statusBar).toBe('off')
     expect(s.streaming).toBe(false)
@@ -66,7 +64,6 @@ describe('applyDisplay', () => {
     const s = $uiState.get()
     expect(setBell).toHaveBeenCalledWith(false)
     expect(s.inlineDiffs).toBe(true)
-    expect(s.showCost).toBe(false)
     expect(s.showReasoning).toBe(false)
     expect(s.statusBar).toBe('top')
     expect(s.streaming).toBe(true)
diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts
index 463372a3522..cd8789d44ec 100644
--- a/ui-tui/src/app/interfaces.ts
+++ b/ui-tui/src/app/interfaces.ts
@@ -173,7 +173,6 @@ export interface UiState {
 
   sections: SectionVisibility
   sessionTitle: string
-  showCost: boolean
   showReasoning: boolean
   indicatorStyle: IndicatorStyle
   sid: null | string
diff --git a/ui-tui/src/app/slash/commands/session.ts b/ui-tui/src/app/slash/commands/session.ts
index a8b4b3ca4e1..27848eaf69d 100644
--- a/ui-tui/src/app/slash/commands/session.ts
+++ b/ui-tui/src/app/slash/commands/session.ts
@@ -593,22 +593,15 @@ export const sessionCommands: SlashCommand[] = [
         }
 
         const f = (v: number | undefined) => (v ?? 0).toLocaleString()
-        const cost = r.cost_usd != null ? `${r.cost_status === 'estimated' ? '~' : ''}$${r.cost_usd.toFixed(4)}` : null
 
         const rows: [string, string][] = [
           ['Model', r.model ?? ''],
           ['Input tokens', f(r.input)],
-          ['Cache read tokens', f(r.cache_read)],
-          ['Cache write tokens', f(r.cache_write)],
           ['Output tokens', f(r.output)],
           ['Total tokens', f(r.total)],
           ['API calls', f(r.calls)]
         ]
 
-        if (cost) {
-          rows.push(['Cost', cost])
-        }
-
         const sections: PanelSection[] = [{ rows }]
 
         if (r.context_max) {
diff --git a/ui-tui/src/app/uiStore.ts b/ui-tui/src/app/uiStore.ts
index 470f4264b94..b9d62fbe14c 100644
--- a/ui-tui/src/app/uiStore.ts
+++ b/ui-tui/src/app/uiStore.ts
@@ -23,7 +23,6 @@ const buildUiState = (): UiState => ({
   pasteCollapseChars: 2000,
   sections: {},
   sessionTitle: '',
-  showCost: false,
   showReasoning: false,
   sid: null,
   status: 'summoning hermes…',
diff --git a/ui-tui/src/app/useConfigSync.ts b/ui-tui/src/app/useConfigSync.ts
index f159bbbd17b..6d964213f25 100644
--- a/ui-tui/src/app/useConfigSync.ts
+++ b/ui-tui/src/app/useConfigSync.ts
@@ -213,7 +213,6 @@ export const applyDisplay = (
     pasteCollapseLines: _pasteCollapseLinesFromConfig(cfg),
     pasteCollapseChars: _pasteCollapseCharsFromConfig(cfg),
     sections: resolveSections(d.sections),
-    showCost: !!d.show_cost,
     showReasoning: !!d.show_reasoning,
     statusBar: normalizeStatusBar(d.tui_statusbar),
     streaming: d.streaming !== false
diff --git a/ui-tui/src/components/agentsOverlay.tsx b/ui-tui/src/components/agentsOverlay.tsx
index 497230c3934..b04c20551d8 100644
--- a/ui-tui/src/components/agentsOverlay.tsx
+++ b/ui-tui/src/components/agentsOverlay.tsx
@@ -18,7 +18,6 @@ import {
   buildSubagentTree,
   descendantIds,
   flattenTree,
-  fmtCost,
   fmtDuration,
   fmtTokens,
   formatSummary,
@@ -407,8 +406,6 @@ function Detail({ id, node, t }: { id?: string; node: SubagentNode; t: Theme })
   const outputTokens = item.outputTokens ?? 0
   const localTokens = inputTokens + outputTokens
   const subtreeTokens = agg.inputTokens + agg.outputTokens - localTokens
-  const localCost = item.costUsd ?? 0
-  const subtreeCost = agg.costUsd - localCost
 
   const filesRead = item.filesRead ?? []
   const filesWritten = item.filesWritten ?? []
@@ -442,7 +439,7 @@ function Detail({ id, node, t }: { id?: string; node: SubagentNode; t: Theme })
         {item.apiCalls ? <Field name="api calls" t={t} value={String(item.apiCalls)} /> : null}
       </Box>
 
-      {localTokens > 0 || localCost > 0 ? (
+      {localTokens > 0 ? (
         <OverlaySection defaultOpen t={t} title="Budget">
           {localTokens > 0 ? (
             <Field
@@ -457,19 +454,6 @@ function Detail({ id, node, t }: { id?: string; node: SubagentNode; t: Theme })
             />
           ) : null}
 
-          {localCost > 0 ? (
-            <Field
-              name="cost"
-              t={t}
-              value={
-                <>
-                  {fmtCost(localCost)}
-                  {subtreeCost >= 0.01 ? ` · subtree +${fmtCost(subtreeCost)}` : ''}
-                </>
-              }
-            />
-          ) : null}
-
           {subtreeTokens > 0 ? <Field name="subtree tokens" t={t} value={`+${fmtTokens(subtreeTokens)}`} /> : null}
         </OverlaySection>
       ) : null}
@@ -650,7 +634,6 @@ function DiffView({
 
   const round = (n: number) => String(Math.round(n))
   const sumTokens = (x: typeof aTotals) => x.inputTokens + x.outputTokens
-  const dollars = (n: number) => fmtCost(n) || '$0.00'
 
   return (
     <Box flexDirection="column" flexGrow={1} paddingX={1} paddingY={1}>
@@ -683,7 +666,6 @@ function DiffView({
           {diffMetricLine('duration', aTotals.totalDuration, bTotals.totalDuration, n => `${n.toFixed(1)}s`)}
         </Text>
         <Text color={t.color.text}>{diffMetricLine('tokens', sumTokens(aTotals), sumTokens(bTotals), fmtTokens)}</Text>
-        <Text color={t.color.text}>{diffMetricLine('cost', aTotals.costUsd, bTotals.costUsd, dollars)}</Text>
       </Box>
     </Box>
   )
diff --git a/ui-tui/src/components/appChrome.tsx b/ui-tui/src/components/appChrome.tsx
index b3ec8bff21b..ed0588f5420 100644
--- a/ui-tui/src/components/appChrome.tsx
+++ b/ui-tui/src/components/appChrome.tsx
@@ -248,7 +248,6 @@ export interface StatusBarSegments {
   bg: boolean
   compactCtx: boolean
   compressions: boolean
-  cost: boolean
   duration: boolean
   subagents: boolean
   voice: boolean
@@ -264,8 +263,7 @@ export function statusBarSegments(cols: number): StatusBarSegments {
     compressions: w >= 80,
     voice: w >= 84,
     bg: w >= 88,
-    subagents: w >= 92,
-    cost: w >= 96
+    subagents: w >= 92
   }
 }
 
@@ -420,7 +418,6 @@ export function StatusRule({
   lastTurnEndedAt,
   liveSessionCount,
   sessionStartedAt,
-  showCost,
   turnStartedAt,
   voiceLabel,
   onSessionCountClick,
@@ -494,7 +491,6 @@ export function StatusRule({
 
   const sessionCountText = liveSessionCount > 0 ? statusSessionCountLabel(liveSessionCount) : ''
   const compressions = typeof usage.compressions === 'number' ? usage.compressions : 0
-  const costText = typeof usage.cost_usd === 'number' ? `$${usage.cost_usd.toFixed(4)}` : ''
   // Dev-only readout (HERMES_DEV_CREDITS). The server omits the key entirely unless the
   // flag is on, so this segment self-hides for normal users. micros→cents is allowed money
   // math (display formatting) — never parseFloat a *_usd. Signed: a mid-session top-up that
@@ -516,8 +512,7 @@ export function StatusRule({
   const showBg = segs.bg && bgCount > 0 && fits(SEP + stringWidth(`${bgCount} bg`))
   const subagentCount = typeof usage.active_subagents === 'number' ? usage.active_subagents : 0
   const showSubagents = segs.subagents && subagentCount > 0 && fits(SEP + stringWidth(`⛓ ${subagentCount}`))
-  const showCostSeg = segs.cost && showCost && !!costText && fits(SEP + stringWidth(costText))
-  // No segs flag / no showCost coupling — it's a server-gated dev readout, lowest priority,
+  // Dev-gated readout (HERMES_DEV_CREDITS), lowest priority,
   // so it consumes tail budget LAST and drops first on a narrow terminal.
   const showDevCredits = !!devCreditsText && fits(SEP + stringWidth(devCreditsText))
 
@@ -629,12 +624,6 @@ export function StatusRule({
             ⛓ {subagentCount}
           </Text>
         ) : null}
-        {showCostSeg ? (
-          <Text color={t.color.muted} wrap="truncate-end">
-            {' │ '}
-            {costText}
-          </Text>
-        ) : null}
         {showDevCredits ? (
           <Text color={t.color.accent} wrap="truncate-end">
             {' │ '}
@@ -772,7 +761,6 @@ interface StatusRuleProps {
   indicatorStyle?: IndicatorStyle
   notice?: Notice | null
   sessionStartedAt?: null | number
-  showCost: boolean
   status: string
   statusColor: string
   t: Theme
diff --git a/ui-tui/src/components/appLayout.tsx b/ui-tui/src/components/appLayout.tsx
index 7fa5dec1886..d3cd8383d52 100644
--- a/ui-tui/src/components/appLayout.tsx
+++ b/ui-tui/src/components/appLayout.tsx
@@ -394,7 +394,6 @@ const StatusRulePane = memo(function StatusRulePane({
         notice={ui.notice}
         onSessionCountClick={() => patchOverlayState({ sessions: true })}
         sessionStartedAt={status.sessionStartedAt}
-        showCost={ui.showCost}
         status={ui.status}
         statusColor={status.statusColor}
         t={ui.theme}
diff --git a/ui-tui/src/components/thinking.tsx b/ui-tui/src/components/thinking.tsx
index ce90cca2138..016c99138af 100644
--- a/ui-tui/src/components/thinking.tsx
+++ b/ui-tui/src/components/thinking.tsx
@@ -6,7 +6,6 @@ import { THINKING_COT_MAX } from '../config/limits.js'
 import { sectionMode } from '../domain/details.js'
 import {
   buildSubagentTree,
-  fmtCost,
   fmtTokens,
   formatSummary as formatSpawnSummary,
   hotnessBucket,
@@ -361,12 +360,6 @@ function SubagentAccordion({
     rollupBits.push(`${fmtTokens(localTokens)} tok`)
   }
 
-  const localCost = item.costUsd ?? 0
-
-  if (localCost > 0) {
-    rollupBits.push(fmtCost(localCost))
-  }
-
   const filesLocal = (item.filesWritten?.length ?? 0) + (item.filesRead?.length ?? 0)
 
   if (filesLocal > 0) {
@@ -380,12 +373,6 @@ function SubagentAccordion({
       rollupBits.push(`+${subtreeTools}t sub`)
     }
 
-    const subCost = aggregate.costUsd - localCost
-
-    if (subCost >= 0.01) {
-      rollupBits.push(`+${fmtCost(subCost)} sub`)
-    }
-
     if (aggregate.activeCount > 0 && item.status !== 'running') {
       rollupBits.push(`⚡${aggregate.activeCount}`)
     }
diff --git a/ui-tui/src/lib/subagentTree.ts b/ui-tui/src/lib/subagentTree.ts
index 513559b8076..3770bd2003f 100644
--- a/ui-tui/src/lib/subagentTree.ts
+++ b/ui-tui/src/lib/subagentTree.ts
@@ -252,10 +252,6 @@ export function formatSummary(totals: SubagentAggregate): string {
     pieces.push(`${fmtTokens(tokens)} tok`)
   }
 
-  if (totals.costUsd > 0) {
-    pieces.push(fmtCost(totals.costUsd))
-  }
-
   if (totals.activeCount > 0) {
     pieces.push(`⚡${totals.activeCount}`)
   }