mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
* remove Vercel AI Gateway provider and Vercel Sandbox terminal backend Both Vercel-hosted integrations are removed end-to-end. Users on the AI Gateway should switch to OpenRouter or one of the other aggregators (Nous Portal, Kilo Code). Users on the Vercel Sandbox backend should switch to Docker, Modal, Daytona, or SSH. What's removed: - `plugins/model-providers/ai-gateway/` provider plugin - `hermes_cli/vercel_auth.py` Vercel-Sandbox auth helper - `tools/environments/vercel_sandbox.py` terminal backend - `ai-gateway` provider wiring across auth, doctor, setup, models, config, status, providers, main, web_server, model_normalize, dump - `vercel_sandbox` backend wiring across terminal_tool, file_tools, code_execution_tool, file_operations, approval, skills_tool, environments/local, credential_files, lazy_deps, prompt_builder, cli, gateway/run - `AI_GATEWAY_BASE_URL` constant, `_AI_GATEWAY_HEADERS` auxiliary-client header set, run_agent base-URL header/reasoning special-cases - `[vercel]` pyproject extra and `vercel`/`vercel-workers` from uv.lock - env vars: `AI_GATEWAY_API_KEY`, `AI_GATEWAY_BASE_URL`, `VERCEL_TOKEN`, `VERCEL_PROJECT_ID`, `VERCEL_TEAM_ID`, `VERCEL_OIDC_TOKEN`, `TERMINAL_VERCEL_RUNTIME` - Tests: deletes test_ai_gateway_models.py and test_vercel_sandbox_environment.py; scrubs references across 23 surviving test files (no entire tests deleted unless they were dedicated to AI Gateway / Sandbox) - Docs: provider tables, env-var reference, setup guides, security notes, tool config, terminal-backend tables — English plus zh-Hans i18n parity - `hermes-agent` skill: provider table entry and remote-backend list What stays (intentional): - `popular-web-designs/templates/vercel.md` — CSS design reference, unrelated to Vercel-the-AI-product - `x-vercel-id` in `stream_diag.py` headers — generic Vercel CDN response header, useful diag signal on any Vercel-hosted endpoint - `vercel-labs/agent-browser` URL in browser config — lightpanda browser project, different OSS effort - `userStories.json` historical contributor entry mentioning Vercel Sandbox — archive, not active docs Validation: - 1153 tests in the 22 targeted files pass (`scripts/run_tests.sh`) - Full repo `py_compile` clean - Live import of every touched module + invariant check (no `ai-gateway` in `PROVIDER_REGISTRY`, no `_AI_GATEWAY_HEADERS`, no `vercel_sandbox` in `_REMOTE_TERMINAL_BACKENDS`) * test: convert profile-count check from change-detector to invariant The hardcoded "== 34" assertion broke when ai-gateway was removed. Per AGENTS.md change-detector-test guidance, assert the relationship (registry count >= number of plugin dirs) instead of a literal count. Counts shift when providers are added/removed; that's expected.
226 lines
7.7 KiB
Python
226 lines
7.7 KiB
Python
from types import SimpleNamespace
|
||
|
||
from agent.usage_pricing import (
|
||
CanonicalUsage,
|
||
estimate_usage_cost,
|
||
get_pricing_entry,
|
||
normalize_usage,
|
||
)
|
||
|
||
|
||
def test_normalize_usage_anthropic_keeps_cache_buckets_separate():
|
||
usage = SimpleNamespace(
|
||
input_tokens=1000,
|
||
output_tokens=500,
|
||
cache_read_input_tokens=2000,
|
||
cache_creation_input_tokens=400,
|
||
)
|
||
|
||
normalized = normalize_usage(usage, provider="anthropic", api_mode="anthropic_messages")
|
||
|
||
assert normalized.input_tokens == 1000
|
||
assert normalized.output_tokens == 500
|
||
assert normalized.cache_read_tokens == 2000
|
||
assert normalized.cache_write_tokens == 400
|
||
assert normalized.prompt_tokens == 3400
|
||
|
||
|
||
def test_normalize_usage_openai_subtracts_cached_prompt_tokens():
|
||
usage = SimpleNamespace(
|
||
prompt_tokens=3000,
|
||
completion_tokens=700,
|
||
prompt_tokens_details=SimpleNamespace(cached_tokens=1800),
|
||
)
|
||
|
||
normalized = normalize_usage(usage, provider="openai", api_mode="chat_completions")
|
||
|
||
assert normalized.input_tokens == 1200
|
||
assert normalized.cache_read_tokens == 1800
|
||
assert normalized.output_tokens == 700
|
||
|
||
|
||
def test_normalize_usage_openai_reads_top_level_anthropic_cache_fields():
|
||
"""Some OpenAI-compatible proxies (OpenRouter, Cline) expose
|
||
Anthropic-style cache token counts at the top level of the usage object when
|
||
routing Claude models, instead of nesting them in prompt_tokens_details.
|
||
|
||
Regression guard for the bug fixed in cline/cline#10266 — before this fix,
|
||
the chat-completions branch of normalize_usage() only read
|
||
prompt_tokens_details.cache_write_tokens and completely missed the
|
||
cache_creation_input_tokens case, so cache writes showed as 0 and reflected
|
||
inputTokens were overstated by the cache-write amount.
|
||
"""
|
||
usage = SimpleNamespace(
|
||
prompt_tokens=1000,
|
||
completion_tokens=200,
|
||
prompt_tokens_details=SimpleNamespace(cached_tokens=500),
|
||
cache_creation_input_tokens=300,
|
||
)
|
||
|
||
normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
|
||
|
||
# Expected: cache read from prompt_tokens_details.cached_tokens (preferred),
|
||
# cache write from top-level cache_creation_input_tokens (fallback).
|
||
assert normalized.cache_read_tokens == 500
|
||
assert normalized.cache_write_tokens == 300
|
||
# input_tokens = prompt_total - cache_read - cache_write = 1000 - 500 - 300 = 200
|
||
assert normalized.input_tokens == 200
|
||
assert normalized.output_tokens == 200
|
||
|
||
|
||
def test_normalize_usage_openai_reads_top_level_cache_read_when_details_missing():
|
||
"""Some proxies expose only top-level Anthropic-style fields with no
|
||
prompt_tokens_details object. Regression guard for cline/cline#10266.
|
||
"""
|
||
usage = SimpleNamespace(
|
||
prompt_tokens=1000,
|
||
completion_tokens=200,
|
||
cache_read_input_tokens=500,
|
||
cache_creation_input_tokens=300,
|
||
)
|
||
|
||
normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
|
||
|
||
assert normalized.cache_read_tokens == 500
|
||
assert normalized.cache_write_tokens == 300
|
||
assert normalized.input_tokens == 200
|
||
|
||
|
||
def test_normalize_usage_openai_prefers_prompt_tokens_details_over_top_level():
|
||
"""When both prompt_tokens_details and top-level Anthropic fields are
|
||
present, we prefer the OpenAI-standard nested fields. Top-level Anthropic
|
||
fields are only a fallback when the nested ones are absent/zero.
|
||
"""
|
||
usage = SimpleNamespace(
|
||
prompt_tokens=1000,
|
||
completion_tokens=200,
|
||
prompt_tokens_details=SimpleNamespace(cached_tokens=600, cache_write_tokens=150),
|
||
# Intentionally different values — proving we ignore these when details exist.
|
||
cache_read_input_tokens=999,
|
||
cache_creation_input_tokens=999,
|
||
)
|
||
|
||
normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
|
||
|
||
assert normalized.cache_read_tokens == 600
|
||
assert normalized.cache_write_tokens == 150
|
||
|
||
|
||
def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch):
|
||
monkeypatch.setattr(
|
||
"agent.usage_pricing.fetch_model_metadata",
|
||
lambda: {
|
||
"anthropic/claude-opus-4.6": {
|
||
"pricing": {
|
||
"prompt": "0.000005",
|
||
"completion": "0.000025",
|
||
"input_cache_read": "0.0000005",
|
||
"input_cache_write": "0.00000625",
|
||
}
|
||
}
|
||
},
|
||
)
|
||
|
||
entry = get_pricing_entry(
|
||
"anthropic/claude-opus-4.6",
|
||
provider="openrouter",
|
||
base_url="https://openrouter.ai/api/v1",
|
||
)
|
||
|
||
assert float(entry.input_cost_per_million) == 5.0
|
||
assert float(entry.output_cost_per_million) == 25.0
|
||
assert float(entry.cache_read_cost_per_million) == 0.5
|
||
assert float(entry.cache_write_cost_per_million) == 6.25
|
||
|
||
|
||
def test_estimate_usage_cost_marks_subscription_routes_included():
|
||
result = estimate_usage_cost(
|
||
"gpt-5.3-codex",
|
||
CanonicalUsage(input_tokens=1000, output_tokens=500),
|
||
provider="openai-codex",
|
||
base_url="https://chatgpt.com/backend-api/codex",
|
||
)
|
||
|
||
assert result.status == "included"
|
||
assert float(result.amount_usd) == 0.0
|
||
|
||
|
||
def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(monkeypatch):
|
||
monkeypatch.setattr(
|
||
"agent.usage_pricing.fetch_model_metadata",
|
||
lambda: {
|
||
"google/gemini-2.5-pro": {
|
||
"pricing": {
|
||
"prompt": "0.00000125",
|
||
"completion": "0.00001",
|
||
}
|
||
}
|
||
},
|
||
)
|
||
|
||
result = estimate_usage_cost(
|
||
"google/gemini-2.5-pro",
|
||
CanonicalUsage(input_tokens=1000, output_tokens=500, cache_read_tokens=100),
|
||
provider="openrouter",
|
||
base_url="https://openrouter.ai/api/v1",
|
||
)
|
||
|
||
assert result.status == "unknown"
|
||
|
||
|
||
def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
|
||
monkeypatch.setattr(
|
||
"agent.usage_pricing.fetch_endpoint_model_metadata",
|
||
lambda base_url, api_key=None: {
|
||
"zai-org/GLM-5-TEE": {
|
||
"pricing": {
|
||
"prompt": "0.0000005",
|
||
"completion": "0.000002",
|
||
}
|
||
}
|
||
},
|
||
)
|
||
|
||
entry = get_pricing_entry(
|
||
"zai-org/GLM-5-TEE",
|
||
provider="custom",
|
||
base_url="https://llm.chutes.ai/v1",
|
||
api_key="test-key",
|
||
)
|
||
|
||
assert float(entry.input_cost_per_million) == 0.5
|
||
assert float(entry.output_cost_per_million) == 2.0
|
||
|
||
|
||
def test_deepseek_v4_pro_pricing_entry_exists():
|
||
"""Regression test: deepseek-v4-pro must have a pricing entry.
|
||
|
||
Before this fix, deepseek-v4-pro sessions showed as unknown cost
|
||
in hermes insights because the _OFFICIAL_DOCS_PRICING table had no
|
||
entry for that model. See #24218.
|
||
"""
|
||
entry = get_pricing_entry(
|
||
"deepseek-v4-pro",
|
||
provider="deepseek",
|
||
)
|
||
|
||
assert entry is not None
|
||
assert entry.input_cost_per_million is not None
|
||
assert entry.output_cost_per_million is not None
|
||
assert float(entry.input_cost_per_million) == 1.74
|
||
assert float(entry.output_cost_per_million) == 3.48
|
||
assert float(entry.cache_read_cost_per_million) == 0.0145
|
||
|
||
|
||
def test_deepseek_v4_pro_estimate_usage_cost():
|
||
"""Ensure deepseek-v4-pro sessions get a dollar estimate, not unknown."""
|
||
result = estimate_usage_cost(
|
||
"deepseek-v4-pro",
|
||
CanonicalUsage(input_tokens=1000000, output_tokens=500000),
|
||
provider="deepseek",
|
||
)
|
||
|
||
assert result.status == "estimated"
|
||
assert result.amount_usd is not None
|
||
# 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48
|
||
assert float(result.amount_usd) == 3.48
|