hermes-agent/tests/agent/test_usage_pricing.py
Teknium febc4cfec0
remove Vercel AI Gateway and Vercel Sandbox (#33067)
* remove Vercel AI Gateway provider and Vercel Sandbox terminal backend

Both Vercel-hosted integrations are removed end-to-end. Users on the AI
Gateway should switch to OpenRouter or one of the other aggregators
(Nous Portal, Kilo Code). Users on the Vercel Sandbox backend should
switch to Docker, Modal, Daytona, or SSH.

What's removed:
- `plugins/model-providers/ai-gateway/` provider plugin
- `hermes_cli/vercel_auth.py` Vercel-Sandbox auth helper
- `tools/environments/vercel_sandbox.py` terminal backend
- `ai-gateway` provider wiring across auth, doctor, setup, models,
  config, status, providers, main, web_server, model_normalize, dump
- `vercel_sandbox` backend wiring across terminal_tool, file_tools,
  code_execution_tool, file_operations, approval, skills_tool,
  environments/local, credential_files, lazy_deps, prompt_builder,
  cli, gateway/run
- `AI_GATEWAY_BASE_URL` constant, `_AI_GATEWAY_HEADERS` auxiliary-client
  header set, run_agent base-URL header/reasoning special-cases
- `[vercel]` pyproject extra and `vercel`/`vercel-workers` from uv.lock
- env vars: `AI_GATEWAY_API_KEY`, `AI_GATEWAY_BASE_URL`, `VERCEL_TOKEN`,
  `VERCEL_PROJECT_ID`, `VERCEL_TEAM_ID`, `VERCEL_OIDC_TOKEN`,
  `TERMINAL_VERCEL_RUNTIME`
- Tests: deletes test_ai_gateway_models.py and
  test_vercel_sandbox_environment.py; scrubs references across 23
  surviving test files (no entire tests deleted unless they were
  dedicated to AI Gateway / Sandbox)
- Docs: provider tables, env-var reference, setup guides, security
  notes, tool config, terminal-backend tables — English plus zh-Hans
  i18n parity
- `hermes-agent` skill: provider table entry and remote-backend list

What stays (intentional):
- `popular-web-designs/templates/vercel.md` — CSS design reference,
  unrelated to Vercel-the-AI-product
- `x-vercel-id` in `stream_diag.py` headers — generic Vercel CDN
  response header, useful diag signal on any Vercel-hosted endpoint
- `vercel-labs/agent-browser` URL in browser config — lightpanda
  browser project, different OSS effort
- `userStories.json` historical contributor entry mentioning Vercel
  Sandbox — archive, not active docs

Validation:
- 1153 tests in the 22 targeted files pass (`scripts/run_tests.sh`)
- Full repo `py_compile` clean
- Live import of every touched module + invariant check (no
  `ai-gateway` in `PROVIDER_REGISTRY`, no `_AI_GATEWAY_HEADERS`, no
  `vercel_sandbox` in `_REMOTE_TERMINAL_BACKENDS`)

* test: convert profile-count check from change-detector to invariant

The hardcoded "== 34" assertion broke when ai-gateway was removed.
Per AGENTS.md change-detector-test guidance, assert the relationship
(registry count >= number of plugin dirs) instead of a literal count.
Counts shift when providers are added/removed; that's expected.
2026-05-27 00:43:32 -07:00

226 lines
7.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from types import SimpleNamespace
from agent.usage_pricing import (
CanonicalUsage,
estimate_usage_cost,
get_pricing_entry,
normalize_usage,
)
def test_normalize_usage_anthropic_keeps_cache_buckets_separate():
usage = SimpleNamespace(
input_tokens=1000,
output_tokens=500,
cache_read_input_tokens=2000,
cache_creation_input_tokens=400,
)
normalized = normalize_usage(usage, provider="anthropic", api_mode="anthropic_messages")
assert normalized.input_tokens == 1000
assert normalized.output_tokens == 500
assert normalized.cache_read_tokens == 2000
assert normalized.cache_write_tokens == 400
assert normalized.prompt_tokens == 3400
def test_normalize_usage_openai_subtracts_cached_prompt_tokens():
usage = SimpleNamespace(
prompt_tokens=3000,
completion_tokens=700,
prompt_tokens_details=SimpleNamespace(cached_tokens=1800),
)
normalized = normalize_usage(usage, provider="openai", api_mode="chat_completions")
assert normalized.input_tokens == 1200
assert normalized.cache_read_tokens == 1800
assert normalized.output_tokens == 700
def test_normalize_usage_openai_reads_top_level_anthropic_cache_fields():
"""Some OpenAI-compatible proxies (OpenRouter, Cline) expose
Anthropic-style cache token counts at the top level of the usage object when
routing Claude models, instead of nesting them in prompt_tokens_details.
Regression guard for the bug fixed in cline/cline#10266 — before this fix,
the chat-completions branch of normalize_usage() only read
prompt_tokens_details.cache_write_tokens and completely missed the
cache_creation_input_tokens case, so cache writes showed as 0 and reflected
inputTokens were overstated by the cache-write amount.
"""
usage = SimpleNamespace(
prompt_tokens=1000,
completion_tokens=200,
prompt_tokens_details=SimpleNamespace(cached_tokens=500),
cache_creation_input_tokens=300,
)
normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
# Expected: cache read from prompt_tokens_details.cached_tokens (preferred),
# cache write from top-level cache_creation_input_tokens (fallback).
assert normalized.cache_read_tokens == 500
assert normalized.cache_write_tokens == 300
# input_tokens = prompt_total - cache_read - cache_write = 1000 - 500 - 300 = 200
assert normalized.input_tokens == 200
assert normalized.output_tokens == 200
def test_normalize_usage_openai_reads_top_level_cache_read_when_details_missing():
"""Some proxies expose only top-level Anthropic-style fields with no
prompt_tokens_details object. Regression guard for cline/cline#10266.
"""
usage = SimpleNamespace(
prompt_tokens=1000,
completion_tokens=200,
cache_read_input_tokens=500,
cache_creation_input_tokens=300,
)
normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
assert normalized.cache_read_tokens == 500
assert normalized.cache_write_tokens == 300
assert normalized.input_tokens == 200
def test_normalize_usage_openai_prefers_prompt_tokens_details_over_top_level():
"""When both prompt_tokens_details and top-level Anthropic fields are
present, we prefer the OpenAI-standard nested fields. Top-level Anthropic
fields are only a fallback when the nested ones are absent/zero.
"""
usage = SimpleNamespace(
prompt_tokens=1000,
completion_tokens=200,
prompt_tokens_details=SimpleNamespace(cached_tokens=600, cache_write_tokens=150),
# Intentionally different values — proving we ignore these when details exist.
cache_read_input_tokens=999,
cache_creation_input_tokens=999,
)
normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
assert normalized.cache_read_tokens == 600
assert normalized.cache_write_tokens == 150
def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch):
monkeypatch.setattr(
"agent.usage_pricing.fetch_model_metadata",
lambda: {
"anthropic/claude-opus-4.6": {
"pricing": {
"prompt": "0.000005",
"completion": "0.000025",
"input_cache_read": "0.0000005",
"input_cache_write": "0.00000625",
}
}
},
)
entry = get_pricing_entry(
"anthropic/claude-opus-4.6",
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
)
assert float(entry.input_cost_per_million) == 5.0
assert float(entry.output_cost_per_million) == 25.0
assert float(entry.cache_read_cost_per_million) == 0.5
assert float(entry.cache_write_cost_per_million) == 6.25
def test_estimate_usage_cost_marks_subscription_routes_included():
result = estimate_usage_cost(
"gpt-5.3-codex",
CanonicalUsage(input_tokens=1000, output_tokens=500),
provider="openai-codex",
base_url="https://chatgpt.com/backend-api/codex",
)
assert result.status == "included"
assert float(result.amount_usd) == 0.0
def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(monkeypatch):
monkeypatch.setattr(
"agent.usage_pricing.fetch_model_metadata",
lambda: {
"google/gemini-2.5-pro": {
"pricing": {
"prompt": "0.00000125",
"completion": "0.00001",
}
}
},
)
result = estimate_usage_cost(
"google/gemini-2.5-pro",
CanonicalUsage(input_tokens=1000, output_tokens=500, cache_read_tokens=100),
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
)
assert result.status == "unknown"
def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
monkeypatch.setattr(
"agent.usage_pricing.fetch_endpoint_model_metadata",
lambda base_url, api_key=None: {
"zai-org/GLM-5-TEE": {
"pricing": {
"prompt": "0.0000005",
"completion": "0.000002",
}
}
},
)
entry = get_pricing_entry(
"zai-org/GLM-5-TEE",
provider="custom",
base_url="https://llm.chutes.ai/v1",
api_key="test-key",
)
assert float(entry.input_cost_per_million) == 0.5
assert float(entry.output_cost_per_million) == 2.0
def test_deepseek_v4_pro_pricing_entry_exists():
"""Regression test: deepseek-v4-pro must have a pricing entry.
Before this fix, deepseek-v4-pro sessions showed as unknown cost
in hermes insights because the _OFFICIAL_DOCS_PRICING table had no
entry for that model. See #24218.
"""
entry = get_pricing_entry(
"deepseek-v4-pro",
provider="deepseek",
)
assert entry is not None
assert entry.input_cost_per_million is not None
assert entry.output_cost_per_million is not None
assert float(entry.input_cost_per_million) == 1.74
assert float(entry.output_cost_per_million) == 3.48
assert float(entry.cache_read_cost_per_million) == 0.0145
def test_deepseek_v4_pro_estimate_usage_cost():
"""Ensure deepseek-v4-pro sessions get a dollar estimate, not unknown."""
result = estimate_usage_cost(
"deepseek-v4-pro",
CanonicalUsage(input_tokens=1000000, output_tokens=500000),
provider="deepseek",
)
assert result.status == "estimated"
assert result.amount_usd is not None
# 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48
assert float(result.amount_usd) == 3.48