mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
feat: add claude-opus-4.8 and claude-opus-4.8-fast (#34003)
Anthropic released Claude Opus 4.8 on 2026-05-27, available on OpenRouter, Anthropic, Amazon Bedrock, and Claude Platform on AWS: - https://openrouter.ai/anthropic/claude-opus-4.8 - https://openrouter.ai/anthropic/claude-opus-4.8-fast The fast-mode variant is a separate model ID (anthropic/claude-opus-4.8-fast) priced at 2x of the base model — a notable improvement over the 6x premium on older Opus generations (4.6/4.7). It is NOT a `speed: "fast"` request parameter like Opus 4.6; Anthropic's native fast-mode beta still only covers Opus 4.6. Changes: hermes_cli/models.py - Add anthropic/claude-opus-4.8 + anthropic/claude-opus-4.8-fast to the OpenRouter fallback snapshot and the Nous Portal curated list (live catalogs surface them automatically when reachable; the fallback list matters when the manifest fetch fails). - Add claude-opus-4-8 to the Anthropic-native picker list. agent/model_metadata.py - Register claude-opus-4-8 / claude-opus-4.8 in DEFAULT_CONTEXT_LENGTHS with 1M tokens (matches 4.6/4.7). agent/anthropic_adapter.py - Extend _XHIGH_EFFORT_SUBSTRINGS, _ADAPTIVE_THINKING_SUBSTRINGS, and _NO_SAMPLING_PARAMS_SUBSTRINGS with "4-8"/"4.8". 4.8 inherits the Opus 4.7 API contract: adaptive thinking only, xhigh effort level supported, sampling parameters (temperature/top_p/top_k) return 400. - Add claude-opus-4-8 to _ANTHROPIC_OUTPUT_LIMITS (128k max output, same as 4.7). Matches by substring so claude-opus-4-8-fast and date-stamped variants resolve correctly. agent/usage_pricing.py - Add anthropic/claude-opus-4-8: $5/$25 per MTok input/output, $0.50 cache read, $6.25 cache write (same as 4.6/4.7). - Add anthropic/claude-opus-4-8-fast: $10/$50 per MTok (2x), $1.00 cache read, $12.50 cache write. Per OpenRouter, the 2x premium is the only differentiator from regular Opus 4.8. - OpenRouter routes still pull pricing from the live /models API, so no static OpenRouter entry is needed. tests/agent/test_model_metadata.py - Extend the Claude 4.6+ context-length tag list with 4.8/4-8. website/static/api/model-catalog.json - Regenerated via `python scripts/build_model_catalog.py` to pick up the new entries in the OpenRouter and Nous Portal fallback lists. E2E verification (isolated sys.path import against the worktree): - _supports_adaptive_thinking, _supports_xhigh_effort, _forbids_sampling_params all return True for claude-opus-4.8 and claude-opus-4.8-fast. - _supports_fast_mode (the `speed: "fast"` request-parameter gate) stays False for 4.8 — fast mode is a separate model ID on OpenRouter, not a parameter Anthropic accepts on the base model. - DEFAULT_CONTEXT_LENGTHS resolves 1M for both notations. - resolve_billing_route + _lookup_official_docs_pricing resolve the correct $5/$25 (regular) and $10/$50 (fast) pricing for both dot-notation and dash-notation inputs. - 4.7 and 4.6 regression: behavior unchanged. Unit tests: 305 passed across tests/agent/test_usage_pricing.py, test_model_metadata.py, tests/hermes_cli/test_model_catalog.py, test_models.py, test_model_validation.py, test_models_dev_preferred_merge.py.
This commit is contained in:
parent
e8b9369a9d
commit
1a74795735
7 changed files with 65 additions and 7 deletions
|
|
@ -77,16 +77,16 @@ ADAPTIVE_EFFORT_MAP = {
|
|||
# xhigh as a distinct level between high and max; older adaptive-thinking
|
||||
# models (4.6) reject it with a 400. Keep this substring list in sync with
|
||||
# the Anthropic migration guide as new model families ship.
|
||||
_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
|
||||
_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7", "4-8", "4.8")
|
||||
|
||||
# Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
|
||||
# is the only supported mode; 4.7 additionally forbids manual thinking entirely
|
||||
# and drops temperature/top_p/top_k).
|
||||
_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
|
||||
_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7", "4-8", "4.8")
|
||||
|
||||
# Models where temperature/top_p/top_k return 400 if set to non-default values.
|
||||
# This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
|
||||
_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
|
||||
_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7", "4-8", "4.8")
|
||||
_FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
|
||||
|
||||
# ── Max output token limits per Anthropic model ───────────────────────
|
||||
|
|
@ -94,6 +94,8 @@ _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
|
|||
# max_tokens as a mandatory field. Previously we hardcoded 16384, which
|
||||
# starves thinking-enabled models (thinking tokens count toward the limit).
|
||||
_ANTHROPIC_OUTPUT_LIMITS = {
|
||||
# Claude 4.8
|
||||
"claude-opus-4-8": 128_000,
|
||||
# Claude 4.7
|
||||
"claude-opus-4-7": 128_000,
|
||||
# Claude 4.6
|
||||
|
|
|
|||
|
|
@ -141,6 +141,8 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
# fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
|
||||
# substring of "anthropic/claude-sonnet-4.6").
|
||||
# OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
|
||||
"claude-opus-4-8": 1000000,
|
||||
"claude-opus-4.8": 1000000,
|
||||
"claude-opus-4-7": 1000000,
|
||||
"claude-opus-4.7": 1000000,
|
||||
"claude-opus-4-6": 1000000,
|
||||
|
|
|
|||
|
|
@ -83,6 +83,34 @@ _UTC_NOW = lambda: datetime.now(timezone.utc)
|
|||
# Official docs snapshot entries. Models whose published pricing and cache
|
||||
# semantics are stable enough to encode exactly.
|
||||
_OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
|
||||
# ── Anthropic Claude 4.8 ─────────────────────────────────────────────
|
||||
# Same $5/$25 base pricing as 4.6/4.7. Fast-mode variant is a separate
|
||||
# model ID with 2x premium (vs the 6x premium on older Opus generations).
|
||||
# Source: https://openrouter.ai/anthropic/claude-opus-4.8
|
||||
(
|
||||
"anthropic",
|
||||
"claude-opus-4-8",
|
||||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("5.00"),
|
||||
output_cost_per_million=Decimal("25.00"),
|
||||
cache_read_cost_per_million=Decimal("0.50"),
|
||||
cache_write_cost_per_million=Decimal("6.25"),
|
||||
source="official_docs_snapshot",
|
||||
source_url="https://platform.claude.com/docs/en/about-claude/pricing",
|
||||
pricing_version="anthropic-pricing-2026-05",
|
||||
),
|
||||
(
|
||||
"anthropic",
|
||||
"claude-opus-4-8-fast",
|
||||
): PricingEntry(
|
||||
input_cost_per_million=Decimal("10.00"),
|
||||
output_cost_per_million=Decimal("50.00"),
|
||||
cache_read_cost_per_million=Decimal("1.00"),
|
||||
cache_write_cost_per_million=Decimal("12.50"),
|
||||
source="official_docs_snapshot",
|
||||
source_url="https://openrouter.ai/anthropic/claude-opus-4.8-fast",
|
||||
pricing_version="anthropic-pricing-2026-05",
|
||||
),
|
||||
# ── Anthropic Claude 4.7 ─────────────────────────────────────────────
|
||||
# Opus 4.5/4.6/4.7 share $5/$25 pricing (new tokenizer, up to 35% more
|
||||
# tokens for the same text).
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
|
|||
# Fallback OpenRouter snapshot used when the live catalog is unavailable.
|
||||
# (model_id, display description shown in menus)
|
||||
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("anthropic/claude-opus-4.8", ""),
|
||||
("anthropic/claude-opus-4.8-fast", "2x price, higher output speed"),
|
||||
("anthropic/claude-opus-4.7", ""),
|
||||
("anthropic/claude-opus-4.6", ""),
|
||||
("anthropic/claude-sonnet-4.6", ""),
|
||||
|
|
@ -139,6 +141,7 @@ def _xai_curated_models() -> list[str]:
|
|||
|
||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"anthropic/claude-opus-4.8",
|
||||
"anthropic/claude-opus-4.7",
|
||||
"anthropic/claude-opus-4.6",
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
|
|
@ -290,6 +293,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"MiniMax-M2",
|
||||
],
|
||||
"anthropic": [
|
||||
"claude-opus-4-8",
|
||||
"claude-opus-4-7",
|
||||
"claude-opus-4-6",
|
||||
"claude-sonnet-4-6",
|
||||
|
|
|
|||
|
|
@ -1188,16 +1188,27 @@ class TestBuildAnthropicKwargs:
|
|||
# params through its signature, we exercise the strip behavior by
|
||||
# calling the internal predicate directly.
|
||||
from agent.anthropic_adapter import _forbids_sampling_params
|
||||
assert _forbids_sampling_params("claude-opus-4-8") is True
|
||||
assert _forbids_sampling_params("claude-opus-4-8-fast") is True
|
||||
assert _forbids_sampling_params("claude-opus-4-7") is True
|
||||
assert _forbids_sampling_params("claude-opus-4-6") is False
|
||||
assert _forbids_sampling_params("claude-sonnet-4-5") is False
|
||||
|
||||
def test_supports_fast_mode_predicate(self):
|
||||
"""Fast mode is Opus 4.6 only — Opus 4.7 and others must be excluded."""
|
||||
"""Fast mode is Opus 4.6 only — Opus 4.7 and others must be excluded.
|
||||
|
||||
For Opus 4.8 the fast variant is a separate model ID
|
||||
(anthropic/claude-opus-4.8-fast) routed through the normal model
|
||||
field, NOT via the ``speed: "fast"`` request parameter. So
|
||||
``_supports_fast_mode`` (which gates the parameter) must stay
|
||||
False for both opus-4-8 and opus-4-8-fast.
|
||||
"""
|
||||
from agent.anthropic_adapter import _supports_fast_mode
|
||||
assert _supports_fast_mode("claude-opus-4-6") is True
|
||||
assert _supports_fast_mode("anthropic/claude-opus-4-6") is True
|
||||
assert _supports_fast_mode("claude-opus-4-7") is False
|
||||
assert _supports_fast_mode("claude-opus-4-8") is False
|
||||
assert _supports_fast_mode("claude-opus-4-8-fast") is False
|
||||
assert _supports_fast_mode("claude-sonnet-4-6") is False
|
||||
assert _supports_fast_mode("claude-haiku-4-5") is False
|
||||
assert _supports_fast_mode("") is False
|
||||
|
|
|
|||
|
|
@ -131,10 +131,10 @@ class TestDefaultContextLengths:
|
|||
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
|
||||
if "claude" not in key:
|
||||
continue
|
||||
# Claude 4.6+ models (4.6 and 4.7) have 1M context at standard
|
||||
# Claude 4.6+ models (4.6, 4.7, 4.8) have 1M context at standard
|
||||
# API pricing (no long-context premium). Older Claude 4.x and
|
||||
# 3.x models cap at 200k.
|
||||
if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7")):
|
||||
if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7", "4.8", "4-8")):
|
||||
assert value == 1000000, f"{key} should be 1000000"
|
||||
else:
|
||||
assert value == 200000, f"{key} should be 200000"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-05-26T20:49:36Z",
|
||||
"updated_at": "2026-05-28T17:19:08Z",
|
||||
"metadata": {
|
||||
"source": "hermes-agent repo",
|
||||
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog"
|
||||
|
|
@ -12,6 +12,14 @@
|
|||
"note": "Descriptions drive picker badges. Live /api/v1/models filters curated ids by tool-calling support and free pricing."
|
||||
},
|
||||
"models": [
|
||||
{
|
||||
"id": "anthropic/claude-opus-4.8",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-opus-4.8-fast",
|
||||
"description": "2x price, higher output speed"
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-opus-4.7",
|
||||
"description": ""
|
||||
|
|
@ -144,6 +152,9 @@
|
|||
"note": "Free-tier gating is determined live via Portal pricing (partition_nous_models_by_tier), not this manifest."
|
||||
},
|
||||
"models": [
|
||||
{
|
||||
"id": "anthropic/claude-opus-4.8"
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-opus-4.7"
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue