diff --git a/agent/insights.py b/agent/insights.py index df3b9e85c..8fc55e043 100644 --- a/agent/insights.py +++ b/agent/insights.py @@ -20,65 +20,16 @@ import json import time from collections import Counter, defaultdict from datetime import datetime -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List -# ========================================================================= -# Model pricing (USD per million tokens) — approximate as of early 2026 -# ========================================================================= -MODEL_PRICING = { - # OpenAI - "gpt-4o": {"input": 2.50, "output": 10.00}, - "gpt-4o-mini": {"input": 0.15, "output": 0.60}, - "gpt-4.1": {"input": 2.00, "output": 8.00}, - "gpt-4.1-mini": {"input": 0.40, "output": 1.60}, - "gpt-4.1-nano": {"input": 0.10, "output": 0.40}, - "gpt-4.5-preview": {"input": 75.00, "output": 150.00}, - "gpt-5": {"input": 10.00, "output": 30.00}, - "gpt-5.4": {"input": 10.00, "output": 30.00}, - "o3": {"input": 10.00, "output": 40.00}, - "o3-mini": {"input": 1.10, "output": 4.40}, - "o4-mini": {"input": 1.10, "output": 4.40}, - # Anthropic - "claude-opus-4-20250514": {"input": 15.00, "output": 75.00}, - "claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00}, - "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00}, - "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00}, - "claude-3-opus-20240229": {"input": 15.00, "output": 75.00}, - "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25}, - # DeepSeek - "deepseek-chat": {"input": 0.14, "output": 0.28}, - "deepseek-reasoner": {"input": 0.55, "output": 2.19}, - # Google - "gemini-2.5-pro": {"input": 1.25, "output": 10.00}, - "gemini-2.5-flash": {"input": 0.15, "output": 0.60}, - "gemini-2.0-flash": {"input": 0.10, "output": 0.40}, - # Meta (via providers) - "llama-4-maverick": {"input": 0.50, "output": 0.70}, - "llama-4-scout": {"input": 0.20, "output": 0.30}, - # Z.AI / GLM (direct provider — pricing not published externally, treat as local) - "glm-5": {"input": 0.0, "output": 0.0}, - "glm-4.7": {"input": 0.0, "output": 0.0}, - "glm-4.5": {"input": 0.0, "output": 0.0}, - "glm-4.5-flash": {"input": 0.0, "output": 0.0}, - # Kimi / Moonshot (direct provider — pricing not published externally, treat as local) - "kimi-k2.5": {"input": 0.0, "output": 0.0}, - "kimi-k2-thinking": {"input": 0.0, "output": 0.0}, - "kimi-k2-turbo-preview": {"input": 0.0, "output": 0.0}, - "kimi-k2-0905-preview": {"input": 0.0, "output": 0.0}, - # MiniMax (direct provider — pricing not published externally, treat as local) - "MiniMax-M2.5": {"input": 0.0, "output": 0.0}, - "MiniMax-M2.5-highspeed": {"input": 0.0, "output": 0.0}, - "MiniMax-M2.1": {"input": 0.0, "output": 0.0}, -} +from agent.usage_pricing import DEFAULT_PRICING, estimate_cost_usd, format_duration_compact, get_pricing, has_known_pricing -# Fallback: unknown/custom models get zero cost (we can't assume pricing -# for self-hosted models, custom OAI endpoints, local inference, etc.) -_DEFAULT_PRICING = {"input": 0.0, "output": 0.0} +_DEFAULT_PRICING = DEFAULT_PRICING def _has_known_pricing(model_name: str) -> bool: """Check if a model has known pricing (vs unknown/custom endpoint).""" - return _get_pricing(model_name) is not _DEFAULT_PRICING + return has_known_pricing(model_name) def _get_pricing(model_name: str) -> Dict[str, float]: @@ -87,67 +38,17 @@ def _get_pricing(model_name: str) -> Dict[str, float]: Returns _DEFAULT_PRICING (zero cost) for unknown/custom models — we can't assume costs for self-hosted endpoints, local inference, etc. """ - if not model_name: - return _DEFAULT_PRICING - - # Strip provider prefix (e.g., "anthropic/claude-..." -> "claude-...") - bare = model_name.split("/")[-1].lower() - - # Exact match first - if bare in MODEL_PRICING: - return MODEL_PRICING[bare] - - # Fuzzy prefix match — prefer the LONGEST matching key to avoid - # e.g. "gpt-4o" matching before "gpt-4o-mini" for "gpt-4o-mini-2024-07-18" - best_match = None - best_len = 0 - for key, price in MODEL_PRICING.items(): - if bare.startswith(key) and len(key) > best_len: - best_match = price - best_len = len(key) - if best_match: - return best_match - - # Keyword heuristics (checked in most-specific-first order) - if "opus" in bare: - return {"input": 15.00, "output": 75.00} - if "sonnet" in bare: - return {"input": 3.00, "output": 15.00} - if "haiku" in bare: - return {"input": 0.80, "output": 4.00} - if "gpt-4o-mini" in bare: - return {"input": 0.15, "output": 0.60} - if "gpt-4o" in bare: - return {"input": 2.50, "output": 10.00} - if "gpt-5" in bare: - return {"input": 10.00, "output": 30.00} - if "deepseek" in bare: - return {"input": 0.14, "output": 0.28} - if "gemini" in bare: - return {"input": 0.15, "output": 0.60} - - return _DEFAULT_PRICING + return get_pricing(model_name) def _estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float: """Estimate the USD cost for a given model and token counts.""" - pricing = _get_pricing(model) - return (input_tokens * pricing["input"] + output_tokens * pricing["output"]) / 1_000_000 + return estimate_cost_usd(model, input_tokens, output_tokens) def _format_duration(seconds: float) -> str: """Format seconds into a human-readable duration string.""" - if seconds < 60: - return f"{seconds:.0f}s" - minutes = seconds / 60 - if minutes < 60: - return f"{minutes:.0f}m" - hours = minutes / 60 - if hours < 24: - remaining_min = int(minutes % 60) - return f"{int(hours)}h {remaining_min}m" if remaining_min else f"{int(hours)}h" - days = hours / 24 - return f"{days:.1f}d" + return format_duration_compact(seconds) def _bar_chart(values: List[int], max_width: int = 20) -> List[str]: diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py new file mode 100644 index 000000000..5bfba25d4 --- /dev/null +++ b/agent/usage_pricing.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +from decimal import Decimal +from typing import Dict + + +MODEL_PRICING = { + "gpt-4o": {"input": 2.50, "output": 10.00}, + "gpt-4o-mini": {"input": 0.15, "output": 0.60}, + "gpt-4.1": {"input": 2.00, "output": 8.00}, + "gpt-4.1-mini": {"input": 0.40, "output": 1.60}, + "gpt-4.1-nano": {"input": 0.10, "output": 0.40}, + "gpt-4.5-preview": {"input": 75.00, "output": 150.00}, + "gpt-5": {"input": 10.00, "output": 30.00}, + "gpt-5.4": {"input": 10.00, "output": 30.00}, + "o3": {"input": 10.00, "output": 40.00}, + "o3-mini": {"input": 1.10, "output": 4.40}, + "o4-mini": {"input": 1.10, "output": 4.40}, + "claude-opus-4-20250514": {"input": 15.00, "output": 75.00}, + "claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00}, + "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00}, + "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00}, + "claude-3-opus-20240229": {"input": 15.00, "output": 75.00}, + "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25}, + "deepseek-chat": {"input": 0.14, "output": 0.28}, + "deepseek-reasoner": {"input": 0.55, "output": 2.19}, + "gemini-2.5-pro": {"input": 1.25, "output": 10.00}, + "gemini-2.5-flash": {"input": 0.15, "output": 0.60}, + "gemini-2.0-flash": {"input": 0.10, "output": 0.40}, + "llama-4-maverick": {"input": 0.50, "output": 0.70}, + "llama-4-scout": {"input": 0.20, "output": 0.30}, + "glm-5": {"input": 0.0, "output": 0.0}, + "glm-4.7": {"input": 0.0, "output": 0.0}, + "glm-4.5": {"input": 0.0, "output": 0.0}, + "glm-4.5-flash": {"input": 0.0, "output": 0.0}, + "kimi-k2.5": {"input": 0.0, "output": 0.0}, + "kimi-k2-thinking": {"input": 0.0, "output": 0.0}, + "kimi-k2-turbo-preview": {"input": 0.0, "output": 0.0}, + "kimi-k2-0905-preview": {"input": 0.0, "output": 0.0}, + "MiniMax-M2.5": {"input": 0.0, "output": 0.0}, + "MiniMax-M2.5-highspeed": {"input": 0.0, "output": 0.0}, + "MiniMax-M2.1": {"input": 0.0, "output": 0.0}, +} + +DEFAULT_PRICING = {"input": 0.0, "output": 0.0} + + +def get_pricing(model_name: str) -> Dict[str, float]: + if not model_name: + return DEFAULT_PRICING + + bare = model_name.split("/")[-1].lower() + if bare in MODEL_PRICING: + return MODEL_PRICING[bare] + + best_match = None + best_len = 0 + for key, price in MODEL_PRICING.items(): + if bare.startswith(key) and len(key) > best_len: + best_match = price + best_len = len(key) + if best_match: + return best_match + + if "opus" in bare: + return {"input": 15.00, "output": 75.00} + if "sonnet" in bare: + return {"input": 3.00, "output": 15.00} + if "haiku" in bare: + return {"input": 0.80, "output": 4.00} + if "gpt-4o-mini" in bare: + return {"input": 0.15, "output": 0.60} + if "gpt-4o" in bare: + return {"input": 2.50, "output": 10.00} + if "gpt-5" in bare: + return {"input": 10.00, "output": 30.00} + if "deepseek" in bare: + return {"input": 0.14, "output": 0.28} + if "gemini" in bare: + return {"input": 0.15, "output": 0.60} + + return DEFAULT_PRICING + + +def has_known_pricing(model_name: str) -> bool: + pricing = get_pricing(model_name) + return pricing is not DEFAULT_PRICING and any( + float(value) > 0 for value in pricing.values() + ) + + +def estimate_cost_usd(model: str, input_tokens: int, output_tokens: int) -> float: + pricing = get_pricing(model) + total = ( + Decimal(input_tokens) * Decimal(str(pricing["input"])) + + Decimal(output_tokens) * Decimal(str(pricing["output"])) + ) / Decimal("1000000") + return float(total) + + +def format_duration_compact(seconds: float) -> str: + if seconds < 60: + return f"{seconds:.0f}s" + minutes = seconds / 60 + if minutes < 60: + return f"{minutes:.0f}m" + hours = minutes / 60 + if hours < 24: + remaining_min = int(minutes % 60) + return f"{int(hours)}h {remaining_min}m" if remaining_min else f"{int(hours)}h" + days = hours / 24 + return f"{days:.1f}d" + + +def format_token_count_compact(value: int) -> str: + abs_value = abs(int(value)) + if abs_value < 1_000: + return str(int(value)) + + sign = "-" if value < 0 else "" + units = ((1_000_000_000, "B"), (1_000_000, "M"), (1_000, "K")) + for threshold, suffix in units: + if abs_value >= threshold: + scaled = abs_value / threshold + if scaled < 10: + text = f"{scaled:.2f}" + elif scaled < 100: + text = f"{scaled:.1f}" + else: + text = f"{scaled:.0f}" + text = text.rstrip("0").rstrip(".") + return f"{sign}{text}{suffix}" + + return f"{value:,}" diff --git a/cli.py b/cli.py index 94433722f..2e17060af 100755 --- a/cli.py +++ b/cli.py @@ -58,6 +58,9 @@ except (ImportError, AttributeError): import threading import queue +from agent.usage_pricing import estimate_cost_usd, format_duration_compact, format_token_count_compact, has_known_pricing +from hermes_cli.banner import _format_context_length + _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏") @@ -1194,6 +1197,153 @@ class HermesCLI: self._last_invalidate = now self._app.invalidate() + def _status_bar_context_style(self, percent_used: Optional[int]) -> str: + if percent_used is None: + return "class:status-bar-dim" + if percent_used >= 95: + return "class:status-bar-critical" + if percent_used > 80: + return "class:status-bar-bad" + if percent_used >= 50: + return "class:status-bar-warn" + return "class:status-bar-good" + + def _build_context_bar(self, percent_used: Optional[int], width: int = 10) -> str: + safe_percent = max(0, min(100, percent_used or 0)) + filled = round((safe_percent / 100) * width) + return f"[{('█' * filled) + ('░' * max(0, width - filled))}]" + + def _get_status_bar_snapshot(self) -> Dict[str, Any]: + model_name = self.model or "unknown" + model_short = model_name.split("/")[-1] if "/" in model_name else model_name + if len(model_short) > 26: + model_short = f"{model_short[:23]}..." + + elapsed_seconds = max(0.0, (datetime.now() - self.session_start).total_seconds()) + snapshot = { + "model_name": model_name, + "model_short": model_short, + "duration": format_duration_compact(elapsed_seconds), + "context_tokens": 0, + "context_length": None, + "context_percent": None, + "session_prompt_tokens": 0, + "session_completion_tokens": 0, + "session_total_tokens": 0, + "session_api_calls": 0, + "session_cost": 0.0, + "pricing_known": has_known_pricing(model_name), + "compressions": 0, + } + + agent = getattr(self, "agent", None) + if not agent: + return snapshot + + snapshot["session_prompt_tokens"] = getattr(agent, "session_prompt_tokens", 0) or 0 + snapshot["session_completion_tokens"] = getattr(agent, "session_completion_tokens", 0) or 0 + snapshot["session_total_tokens"] = getattr(agent, "session_total_tokens", 0) or 0 + snapshot["session_api_calls"] = getattr(agent, "session_api_calls", 0) or 0 + snapshot["session_cost"] = estimate_cost_usd( + model_name, + snapshot["session_prompt_tokens"], + snapshot["session_completion_tokens"], + ) + + compressor = getattr(agent, "context_compressor", None) + if compressor: + context_tokens = getattr(compressor, "last_prompt_tokens", 0) or 0 + context_length = getattr(compressor, "context_length", 0) or 0 + snapshot["context_tokens"] = context_tokens + snapshot["context_length"] = context_length or None + snapshot["compressions"] = getattr(compressor, "compression_count", 0) or 0 + if context_length: + snapshot["context_percent"] = max(0, min(100, round((context_tokens / context_length) * 100))) + + return snapshot + + def _build_status_bar_text(self, width: Optional[int] = None) -> str: + try: + snapshot = self._get_status_bar_snapshot() + width = width or shutil.get_terminal_size((80, 24)).columns + percent = snapshot["context_percent"] + percent_label = f"{percent}%" if percent is not None else "--" + cost_label = f"${snapshot['session_cost']:.2f}" if snapshot["pricing_known"] else "cost n/a" + duration_label = snapshot["duration"] + + if width < 52: + return f"⚕ {snapshot['model_short']} · {duration_label}" + if width < 76: + return f"⚕ {snapshot['model_short']} · {percent_label} · {cost_label} · {duration_label}" + + if snapshot["context_length"]: + ctx_total = _format_context_length(snapshot["context_length"]) + ctx_used = format_token_count_compact(snapshot["context_tokens"]) + context_label = f"{ctx_used}/{ctx_total}" + else: + context_label = "ctx --" + + return f"⚕ {snapshot['model_short']} │ {context_label} │ {percent_label} │ {cost_label} │ {duration_label}" + except Exception: + return f"⚕ {self.model if getattr(self, 'model', None) else 'Hermes'}" + + def _get_status_bar_fragments(self): + try: + snapshot = self._get_status_bar_snapshot() + width = shutil.get_terminal_size((80, 24)).columns + cost_label = f"${snapshot['session_cost']:.2f}" if snapshot["pricing_known"] else "cost n/a" + duration_label = snapshot["duration"] + + if width < 52: + return [ + ("class:status-bar", " ⚕ "), + ("class:status-bar-strong", snapshot["model_short"]), + ("class:status-bar-dim", " · "), + ("class:status-bar-dim", duration_label), + ("class:status-bar", " "), + ] + + percent = snapshot["context_percent"] + percent_label = f"{percent}%" if percent is not None else "--" + if width < 76: + return [ + ("class:status-bar", " ⚕ "), + ("class:status-bar-strong", snapshot["model_short"]), + ("class:status-bar-dim", " · "), + (self._status_bar_context_style(percent), percent_label), + ("class:status-bar-dim", " · "), + ("class:status-bar-dim", cost_label), + ("class:status-bar-dim", " · "), + ("class:status-bar-dim", duration_label), + ("class:status-bar", " "), + ] + + if snapshot["context_length"]: + ctx_total = _format_context_length(snapshot["context_length"]) + ctx_used = format_token_count_compact(snapshot["context_tokens"]) + context_label = f"{ctx_used}/{ctx_total}" + else: + context_label = "ctx --" + + bar_style = self._status_bar_context_style(percent) + return [ + ("class:status-bar", " ⚕ "), + ("class:status-bar-strong", snapshot["model_short"]), + ("class:status-bar-dim", " │ "), + ("class:status-bar-dim", context_label), + ("class:status-bar-dim", " │ "), + (bar_style, self._build_context_bar(percent)), + ("class:status-bar-dim", " "), + (bar_style, percent_label), + ("class:status-bar-dim", " │ "), + ("class:status-bar-dim", cost_label), + ("class:status-bar-dim", " │ "), + ("class:status-bar-dim", duration_label), + ("class:status-bar", " "), + ] + except Exception: + return [("class:status-bar", f" {self._build_status_bar_text()} ")] + def _normalize_model_for_provider(self, resolved_provider: str) -> bool: """Strip provider prefixes and swap the default model for Codex. @@ -3447,17 +3597,34 @@ class HermesCLI: compressions = compressor.compression_count msg_count = len(self.conversation_history) + cost = estimate_cost_usd(agent.model, prompt, completion) + prompt_cost = estimate_cost_usd(agent.model, prompt, 0) + completion_cost = estimate_cost_usd(agent.model, 0, completion) + pricing_known = has_known_pricing(agent.model) + elapsed = format_duration_compact((datetime.now() - self.session_start).total_seconds()) print(f" 📊 Session Token Usage") print(f" {'─' * 40}") + print(f" Model: {agent.model}") print(f" Prompt tokens (input): {prompt:>10,}") print(f" Completion tokens (output): {completion:>9,}") print(f" Total tokens: {total:>10,}") print(f" API calls: {calls:>10,}") + print(f" Session duration: {elapsed:>10}") + if pricing_known: + print(f" Input cost: ${prompt_cost:>10.4f}") + print(f" Output cost: ${completion_cost:>10.4f}") + print(f" Total cost: ${cost:>10.4f}") + else: + print(f" Input cost: {'n/a':>10}") + print(f" Output cost: {'n/a':>10}") + print(f" Total cost: {'n/a':>10}") print(f" {'─' * 40}") print(f" Current context: {last_prompt:,} / {ctx_len:,} ({pct:.0f}%)") print(f" Messages: {msg_count}") print(f" Compressions: {compressions}") + if not pricing_known: + print(f" Note: Pricing unknown for {agent.model}") if self.verbose: logging.getLogger().setLevel(logging.DEBUG) @@ -5657,6 +5824,11 @@ class HermesCLI: filter=Condition(lambda: cli_ref._voice_mode), ) + status_bar = Window( + content=FormattedTextControl(lambda: cli_ref._get_status_bar_fragments()), + height=1, + ) + # Layout: interactive prompt widgets + ruled input at bottom. # The sudo, approval, and clarify widgets appear above the input when # the corresponding interactive prompt is active. @@ -5669,6 +5841,7 @@ class HermesCLI: clarify_widget, spinner_widget, spacer, + status_bar, input_rule_top, image_bar, input_area, @@ -5685,6 +5858,13 @@ class HermesCLI: 'prompt': '#FFF8DC', 'prompt-working': '#888888 italic', 'hint': '#555555 italic', + 'status-bar': 'bg:#1a1a2e #C0C0C0', + 'status-bar-strong': 'bg:#1a1a2e #FFD700 bold', + 'status-bar-dim': 'bg:#1a1a2e #8B8682', + 'status-bar-good': 'bg:#1a1a2e #8FBC8F bold', + 'status-bar-warn': 'bg:#1a1a2e #FFD700 bold', + 'status-bar-bad': 'bg:#1a1a2e #FF8C00 bold', + 'status-bar-critical': 'bg:#1a1a2e #FF6B6B bold', # Bronze horizontal rules around the input area 'input-rule': '#CD7F32', # Clipboard image attachment badges @@ -5737,12 +5917,20 @@ class HermesCLI: def spinner_loop(): import time as _time + last_idle_refresh = 0.0 while not self._should_exit: - if self._command_running and self._app: + if not self._app: + _time.sleep(0.1) + continue + if self._command_running: self._invalidate(min_interval=0.1) _time.sleep(0.1) else: - _time.sleep(0.05) + now = _time.monotonic() + if now - last_idle_refresh >= 1.0: + last_idle_refresh = now + self._invalidate(min_interval=1.0) + _time.sleep(0.2) spinner_thread = threading.Thread(target=spinner_loop, daemon=True) spinner_thread.start() diff --git a/tests/test_cli_status_bar.py b/tests/test_cli_status_bar.py new file mode 100644 index 000000000..c5225ce91 --- /dev/null +++ b/tests/test_cli_status_bar.py @@ -0,0 +1,160 @@ +from datetime import datetime, timedelta +from types import SimpleNamespace + +from cli import HermesCLI + + +def _make_cli(model: str = "anthropic/claude-sonnet-4-20250514"): + cli_obj = HermesCLI.__new__(HermesCLI) + cli_obj.model = model + cli_obj.session_start = datetime.now() - timedelta(minutes=14, seconds=32) + cli_obj.conversation_history = [{"role": "user", "content": "hi"}] + cli_obj.agent = None + return cli_obj + + +def _attach_agent( + cli_obj, + *, + prompt_tokens: int, + completion_tokens: int, + total_tokens: int, + api_calls: int, + context_tokens: int, + context_length: int, + compressions: int = 0, +): + cli_obj.agent = SimpleNamespace( + model=cli_obj.model, + session_prompt_tokens=prompt_tokens, + session_completion_tokens=completion_tokens, + session_total_tokens=total_tokens, + session_api_calls=api_calls, + context_compressor=SimpleNamespace( + last_prompt_tokens=context_tokens, + context_length=context_length, + compression_count=compressions, + ), + ) + return cli_obj + + +class TestCLIStatusBar: + def test_context_style_thresholds(self): + cli_obj = _make_cli() + + assert cli_obj._status_bar_context_style(None) == "class:status-bar-dim" + assert cli_obj._status_bar_context_style(10) == "class:status-bar-good" + assert cli_obj._status_bar_context_style(50) == "class:status-bar-warn" + assert cli_obj._status_bar_context_style(81) == "class:status-bar-bad" + assert cli_obj._status_bar_context_style(95) == "class:status-bar-critical" + + def test_build_status_bar_text_for_wide_terminal(self): + cli_obj = _attach_agent( + _make_cli(), + prompt_tokens=10_230, + completion_tokens=2_220, + total_tokens=12_450, + api_calls=7, + context_tokens=12_450, + context_length=200_000, + ) + + text = cli_obj._build_status_bar_text(width=120) + + assert "claude-sonnet-4-20250514" in text + assert "12.4K/200K" in text + assert "6%" in text + assert "$0.06" in text + assert "15m" in text + + def test_build_status_bar_text_collapses_for_narrow_terminal(self): + cli_obj = _attach_agent( + _make_cli(), + prompt_tokens=10_230, + completion_tokens=2_220, + total_tokens=12_450, + api_calls=7, + context_tokens=12_450, + context_length=200_000, + ) + + text = cli_obj._build_status_bar_text(width=60) + + assert "⚕" in text + assert "$0.06" in text + assert "15m" in text + assert "200K" not in text + + def test_build_status_bar_text_handles_missing_agent(self): + cli_obj = _make_cli() + + text = cli_obj._build_status_bar_text(width=100) + + assert "⚕" in text + assert "claude-sonnet-4-20250514" in text + + +class TestCLIUsageReport: + def test_show_usage_includes_estimated_cost(self, capsys): + cli_obj = _attach_agent( + _make_cli(), + prompt_tokens=10_230, + completion_tokens=2_220, + total_tokens=12_450, + api_calls=7, + context_tokens=12_450, + context_length=200_000, + compressions=1, + ) + cli_obj.verbose = False + + cli_obj._show_usage() + output = capsys.readouterr().out + + assert "Model:" in output + assert "Input cost:" in output + assert "Output cost:" in output + assert "Total cost:" in output + assert "$" in output + assert "0.064" in output + assert "Session duration:" in output + assert "Compressions:" in output + + def test_show_usage_marks_unknown_pricing(self, capsys): + cli_obj = _attach_agent( + _make_cli(model="local/my-custom-model"), + prompt_tokens=1_000, + completion_tokens=500, + total_tokens=1_500, + api_calls=1, + context_tokens=1_000, + context_length=32_000, + ) + cli_obj.verbose = False + + cli_obj._show_usage() + output = capsys.readouterr().out + + assert "Total cost:" in output + assert "n/a" in output + assert "Pricing unknown for local/my-custom-model" in output + + def test_zero_priced_provider_models_stay_unknown(self, capsys): + cli_obj = _attach_agent( + _make_cli(model="glm-5"), + prompt_tokens=1_000, + completion_tokens=500, + total_tokens=1_500, + api_calls=1, + context_tokens=1_000, + context_length=32_000, + ) + cli_obj.verbose = False + + cli_obj._show_usage() + output = capsys.readouterr().out + + assert "Total cost:" in output + assert "n/a" in output + assert "Pricing unknown for glm-5" in output diff --git a/tests/test_insights.py b/tests/test_insights.py index 0f598f9a6..6f6280a1d 100644 --- a/tests/test_insights.py +++ b/tests/test_insights.py @@ -206,6 +206,7 @@ class TestHasKnownPricing: def test_unknown_custom_model(self): assert _has_known_pricing("FP16_Hermes_4.5") is False assert _has_known_pricing("my-custom-model") is False + assert _has_known_pricing("glm-5") is False assert _has_known_pricing("") is False assert _has_known_pricing(None) is False