hermes-agent/tests/gateway/test_usage_command.py
Teknium fd2a35b169
fix: stop reporting cache-hit rate and cost across all UI surfaces (#52717)
* fix: stop reporting cache-hit rate and cost across all UI surfaces

Cost estimates and cache read/write token reporting are unreliable on
providers that don't surface cached_tokens (e.g. ollama-cloud, which doesn't
implement prompt_tokens_details.cached_tokens), producing misleading
near-zero 'cache hit' readouts and cost figures. Remove cost + cache-hit
reporting from every user-facing surface; keep input/output/total token
counts (provider-agnostic and accurate) and the Nous account billing UI
(real account money, separate from per-conversation estimates).

Surfaces:
- CLI /usage + model-info: drop cost lines + cache read/write token lines
- Gateway /usage + /model: drop cost + cache lines
- tui_gateway/server.py: stop emitting cost_usd / cache_read in usage and
  subagent.complete payloads
- TUI (Ink): drop cost from status bar (+ showCost plumbing), /usage panel,
  thinking rollup, agents overlay (incl. compare view); keep token counts
- Desktop Command Center: drop cost stat, per-model cost, actual-cost hint

Underlying estimate_usage_cost / format_cost / insights cost columns are
left intact but no longer surfaced (display-only change, reversible).

* test: update TUI + gateway + CLI tests for removed cost/cache-hit reporting

- CLI /usage test asserts cost/cache lines are absent, tokens present
- gateway /usage test drops cost + cache asserts; removes cost-included test
- TUI subagentTree summary expectation drops the cost segment
- useConfigSync + appChrome status-rule tests drop showCost prop/state
2026-06-25 15:21:22 -07:00

244 lines
9.6 KiB
Python

"""Tests for gateway /usage command — agent cache lookup and output fields."""
import threading
from unittest.mock import MagicMock, patch
import pytest
def _make_mock_agent(**overrides):
"""Create a mock AIAgent with realistic session counters."""
agent = MagicMock()
defaults = {
"model": "anthropic/claude-sonnet-4.6",
"provider": "openrouter",
"base_url": None,
"session_total_tokens": 50_000,
"session_api_calls": 5,
"session_prompt_tokens": 40_000,
"session_completion_tokens": 10_000,
"session_input_tokens": 35_000,
"session_output_tokens": 10_000,
"session_cache_read_tokens": 5_000,
"session_cache_write_tokens": 2_000,
}
defaults.update(overrides)
for k, v in defaults.items():
setattr(agent, k, v)
# Rate limit state
rl = MagicMock()
rl.has_data = True
agent.get_rate_limit_state.return_value = rl
# Context compressor
ctx = MagicMock()
ctx.last_prompt_tokens = 30_000
ctx.context_length = 200_000
ctx.compression_count = 1
agent.context_compressor = ctx
return agent
def _make_runner(session_key, agent=None, cached_agent=None):
"""Build a bare GatewayRunner with just the fields _handle_usage_command needs."""
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
runner._running_agents = {}
runner._running_agents_ts = {}
runner._agent_cache = {}
runner._agent_cache_lock = threading.Lock()
runner.session_store = MagicMock()
if agent is not None:
runner._running_agents[session_key] = agent
if cached_agent is not None:
runner._agent_cache[session_key] = (cached_agent, "sig")
# Wire helper
runner._session_key_for_source = MagicMock(return_value=session_key)
return runner
SK = "agent:main:telegram:private:12345"
class TestUsageCachedAgent:
"""The main fix: /usage should find agents in _agent_cache between turns."""
@pytest.mark.asyncio
async def test_cached_agent_shows_detailed_usage(self):
agent = _make_mock_agent()
runner = _make_runner(SK, cached_agent=agent)
event = MagicMock()
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"):
result = await runner._handle_usage_command(event)
assert "claude-sonnet-4.6" in result
assert "35,000" in result # input tokens
assert "10,000" in result # output tokens
assert "50,000" in result # total
assert "30,000" in result # context
assert "Compressions: 1" in result
# Cost and cache-hit reporting is removed everywhere.
assert "$" not in result
assert "Cache read" not in result
assert "Cache write" not in result
assert "Cost" not in result
@pytest.mark.asyncio
async def test_running_agent_preferred_over_cache(self):
"""When agent is in both dicts, the running one wins."""
running = _make_mock_agent(session_api_calls=10, session_total_tokens=80_000)
cached = _make_mock_agent(session_api_calls=5, session_total_tokens=50_000)
runner = _make_runner(SK, agent=running, cached_agent=cached)
event = MagicMock()
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
mock_cost.return_value = MagicMock(amount_usd=None, status="unknown")
result = await runner._handle_usage_command(event)
assert "80,000" in result # running agent's total
assert "API calls: 10" in result
@pytest.mark.asyncio
async def test_sentinel_skipped_uses_cache(self):
"""PENDING sentinel in _running_agents should fall through to cache."""
from gateway.run import _AGENT_PENDING_SENTINEL
cached = _make_mock_agent()
runner = _make_runner(SK, cached_agent=cached)
runner._running_agents[SK] = _AGENT_PENDING_SENTINEL
event = MagicMock()
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
mock_cost.return_value = MagicMock(amount_usd=None, status="unknown")
result = await runner._handle_usage_command(event)
assert "claude-sonnet-4.6" in result
assert "Session Token Usage" in result
@pytest.mark.asyncio
async def test_no_agent_anywhere_falls_to_history(self):
"""No running or cached agent → rough estimate from transcript."""
runner = _make_runner(SK)
event = MagicMock()
session_entry = MagicMock()
session_entry.session_id = "sess123"
runner.session_store.get_or_create_session.return_value = session_entry
runner.session_store.load_transcript.return_value = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi there"},
]
with patch("agent.model_metadata.estimate_messages_tokens_rough", return_value=500):
result = await runner._handle_usage_command(event)
assert "Session Info" in result
assert "Messages: 2" in result
assert "~500" in result
@pytest.mark.asyncio
async def test_cache_read_write_hidden_when_zero(self):
"""Cache token lines should be omitted when zero."""
agent = _make_mock_agent(session_cache_read_tokens=0, session_cache_write_tokens=0)
runner = _make_runner(SK, cached_agent=agent)
event = MagicMock()
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
mock_cost.return_value = MagicMock(amount_usd=None, status="unknown")
result = await runner._handle_usage_command(event)
assert "Cache read" not in result
assert "Cache write" not in result
class TestUsageAccountSection:
"""Account-limits section appended to /usage output (PR #2486)."""
@pytest.mark.asyncio
async def test_usage_command_includes_account_section(self, monkeypatch):
agent = _make_mock_agent(provider="openai-codex")
agent.base_url = "https://chatgpt.com/backend-api/codex"
agent.api_key = "unused"
runner = _make_runner(SK, cached_agent=agent)
event = MagicMock()
monkeypatch.setattr(
"gateway.slash_commands.fetch_account_usage",
lambda provider, base_url=None, api_key=None: object(),
)
monkeypatch.setattr(
"gateway.slash_commands.render_account_usage_lines",
lambda snapshot, markdown=False: [
"📈 **Account limits**",
"Provider: openai-codex (Pro)",
"Session: 85% remaining (15% used)",
],
)
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
mock_cost.return_value = MagicMock(amount_usd=None, status="included")
result = await runner._handle_usage_command(event)
assert "📊 **Session Token Usage**" in result
assert "📈 **Account limits**" in result
assert "Provider: openai-codex (Pro)" in result
@pytest.mark.asyncio
async def test_usage_command_uses_persisted_provider_when_agent_not_running(self, monkeypatch):
runner = _make_runner(SK)
runner._session_db = MagicMock()
runner._session_db.get_session.return_value = {
"billing_provider": "openai-codex",
"billing_base_url": "https://chatgpt.com/backend-api/codex",
}
session_entry = MagicMock()
session_entry.session_id = "sess-1"
runner.session_store.get_or_create_session.return_value = session_entry
runner.session_store.load_transcript.return_value = [
{"role": "user", "content": "earlier"},
]
calls = []
async def _fake_to_thread(fn, *args, **kwargs):
# /usage dispatches BOTH the account fetch (fetch_account_usage, called
# with the provider positionally) and the Nous credits fetch
# (nous_credits_lines, markdown-only) through to_thread — record every
# call rather than last-wins so we can pick out the account fetch.
calls.append({"args": args, "kwargs": kwargs})
return fn(*args, **kwargs)
monkeypatch.setattr("gateway.run.asyncio.to_thread", _fake_to_thread)
monkeypatch.setattr(
"gateway.slash_commands.fetch_account_usage",
lambda provider, base_url=None, api_key=None: object(),
)
monkeypatch.setattr(
"gateway.slash_commands.render_account_usage_lines",
lambda snapshot, markdown=False: [
"📈 **Account limits**",
"Provider: openai-codex (Pro)",
],
)
# The credits block routes through the shared nous_credits_lines() helper;
# stub it so this account-section test stays hermetic (no portal/auth lookup).
monkeypatch.setattr("agent.account_usage.nous_credits_lines", lambda markdown=False: [])
event = MagicMock()
result = await runner._handle_usage_command(event)
account_call = next(c for c in calls if c["args"] == ("openai-codex",))
assert account_call["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
assert "📊 **Session Info**" in result
assert "📈 **Account limits**" in result