From 1a747957352ca33cb2d113d3c7d552aafbf62b22 Mon Sep 17 00:00:00 2001
From: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
Date: Thu, 28 May 2026 10:31:59 -0700
Subject: [PATCH] feat: add claude-opus-4.8 and claude-opus-4.8-fast (#34003)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Anthropic released Claude Opus 4.8 on 2026-05-27, available on
OpenRouter, Anthropic, Amazon Bedrock, and Claude Platform on AWS:
  - https://openrouter.ai/anthropic/claude-opus-4.8
  - https://openrouter.ai/anthropic/claude-opus-4.8-fast

The fast-mode variant is a separate model ID (anthropic/claude-opus-4.8-fast)
priced at 2x of the base model — a notable improvement over the 6x premium
on older Opus generations (4.6/4.7). It is NOT a `speed: "fast"` request
parameter like Opus 4.6; Anthropic's native fast-mode beta still only
covers Opus 4.6.

Changes:

  hermes_cli/models.py
    - Add anthropic/claude-opus-4.8 + anthropic/claude-opus-4.8-fast to
      the OpenRouter fallback snapshot and the Nous Portal curated list
      (live catalogs surface them automatically when reachable; the
      fallback list matters when the manifest fetch fails).
    - Add claude-opus-4-8 to the Anthropic-native picker list.

  agent/model_metadata.py
    - Register claude-opus-4-8 / claude-opus-4.8 in DEFAULT_CONTEXT_LENGTHS
      with 1M tokens (matches 4.6/4.7).

  agent/anthropic_adapter.py
    - Extend _XHIGH_EFFORT_SUBSTRINGS, _ADAPTIVE_THINKING_SUBSTRINGS, and
      _NO_SAMPLING_PARAMS_SUBSTRINGS with "4-8"/"4.8". 4.8 inherits the
      Opus 4.7 API contract: adaptive thinking only, xhigh effort level
      supported, sampling parameters (temperature/top_p/top_k) return 400.
    - Add claude-opus-4-8 to _ANTHROPIC_OUTPUT_LIMITS (128k max output,
      same as 4.7). Matches by substring so claude-opus-4-8-fast and
      date-stamped variants resolve correctly.

  agent/usage_pricing.py
    - Add anthropic/claude-opus-4-8: $5/$25 per MTok input/output, $0.50
      cache read, $6.25 cache write (same as 4.6/4.7).
    - Add anthropic/claude-opus-4-8-fast: $10/$50 per MTok (2x), $1.00
      cache read, $12.50 cache write. Per OpenRouter, the 2x premium is
      the only differentiator from regular Opus 4.8.
    - OpenRouter routes still pull pricing from the live /models API, so
      no static OpenRouter entry is needed.

  tests/agent/test_model_metadata.py
    - Extend the Claude 4.6+ context-length tag list with 4.8/4-8.

  website/static/api/model-catalog.json
    - Regenerated via `python scripts/build_model_catalog.py` to pick up
      the new entries in the OpenRouter and Nous Portal fallback lists.

E2E verification (isolated sys.path import against the worktree):
  - _supports_adaptive_thinking, _supports_xhigh_effort, _forbids_sampling_params
    all return True for claude-opus-4.8 and claude-opus-4.8-fast.
  - _supports_fast_mode (the `speed: "fast"` request-parameter gate) stays
    False for 4.8 — fast mode is a separate model ID on OpenRouter, not a
    parameter Anthropic accepts on the base model.
  - DEFAULT_CONTEXT_LENGTHS resolves 1M for both notations.
  - resolve_billing_route + _lookup_official_docs_pricing resolve the
    correct $5/$25 (regular) and $10/$50 (fast) pricing for both
    dot-notation and dash-notation inputs.
  - 4.7 and 4.6 regression: behavior unchanged.

Unit tests: 305 passed across tests/agent/test_usage_pricing.py,
test_model_metadata.py, tests/hermes_cli/test_model_catalog.py,
test_models.py, test_model_validation.py, test_models_dev_preferred_merge.py.
---
 agent/anthropic_adapter.py            |  8 +++++---
 agent/model_metadata.py               |  2 ++
 agent/usage_pricing.py                | 28 +++++++++++++++++++++++++++
 hermes_cli/models.py                  |  4 ++++
 tests/agent/test_anthropic_adapter.py | 13 ++++++++++++-
 tests/agent/test_model_metadata.py    |  4 ++--
 website/static/api/model-catalog.json | 13 ++++++++++++-
 7 files changed, 65 insertions(+), 7 deletions(-)

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 898df7eb685..fbdb265b0f3 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -77,16 +77,16 @@ ADAPTIVE_EFFORT_MAP = {
 # xhigh as a distinct level between high and max; older adaptive-thinking
 # models (4.6) reject it with a 400.  Keep this substring list in sync with
 # the Anthropic migration guide as new model families ship.
-_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
+_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7", "4-8", "4.8")
 
 # Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
 # is the only supported mode; 4.7 additionally forbids manual thinking entirely
 # and drops temperature/top_p/top_k).
-_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
+_ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7", "4-8", "4.8")
 
 # Models where temperature/top_p/top_k return 400 if set to non-default values.
 # This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
-_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
+_NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7", "4-8", "4.8")
 _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
 
 # ── Max output token limits per Anthropic model ───────────────────────
@@ -94,6 +94,8 @@ _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
 # max_tokens as a mandatory field.  Previously we hardcoded 16384, which
 # starves thinking-enabled models (thinking tokens count toward the limit).
 _ANTHROPIC_OUTPUT_LIMITS = {
+    # Claude 4.8
+    "claude-opus-4-8":   128_000,
     # Claude 4.7
     "claude-opus-4-7":   128_000,
     # Claude 4.6
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index fa21c837123..c77dcff1ace 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -141,6 +141,8 @@ DEFAULT_CONTEXT_LENGTHS = {
     # fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
     # substring of "anthropic/claude-sonnet-4.6").
     # OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
+    "claude-opus-4-8": 1000000,
+    "claude-opus-4.8": 1000000,
     "claude-opus-4-7": 1000000,
     "claude-opus-4.7": 1000000,
     "claude-opus-4-6": 1000000,
diff --git a/agent/usage_pricing.py b/agent/usage_pricing.py
index 93ced2e7d43..8d6b85cd0b8 100644
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -83,6 +83,34 @@ _UTC_NOW = lambda: datetime.now(timezone.utc)
 # Official docs snapshot entries. Models whose published pricing and cache
 # semantics are stable enough to encode exactly.
 _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
+    # ── Anthropic Claude 4.8 ─────────────────────────────────────────────
+    # Same $5/$25 base pricing as 4.6/4.7.  Fast-mode variant is a separate
+    # model ID with 2x premium (vs the 6x premium on older Opus generations).
+    # Source: https://openrouter.ai/anthropic/claude-opus-4.8
+    (
+        "anthropic",
+        "claude-opus-4-8",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("5.00"),
+        output_cost_per_million=Decimal("25.00"),
+        cache_read_cost_per_million=Decimal("0.50"),
+        cache_write_cost_per_million=Decimal("6.25"),
+        source="official_docs_snapshot",
+        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
+        pricing_version="anthropic-pricing-2026-05",
+    ),
+    (
+        "anthropic",
+        "claude-opus-4-8-fast",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("10.00"),
+        output_cost_per_million=Decimal("50.00"),
+        cache_read_cost_per_million=Decimal("1.00"),
+        cache_write_cost_per_million=Decimal("12.50"),
+        source="official_docs_snapshot",
+        source_url="https://openrouter.ai/anthropic/claude-opus-4.8-fast",
+        pricing_version="anthropic-pricing-2026-05",
+    ),
     # ── Anthropic Claude 4.7 ─────────────────────────────────────────────
     # Opus 4.5/4.6/4.7 share $5/$25 pricing (new tokenizer, up to 35% more
     # tokens for the same text).
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 4b26a5e787f..b9b7574f892 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -32,6 +32,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # Fallback OpenRouter snapshot used when the live catalog is unavailable.
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
+    ("anthropic/claude-opus-4.8",              ""),
+    ("anthropic/claude-opus-4.8-fast",         "2x price, higher output speed"),
     ("anthropic/claude-opus-4.7",              ""),
     ("anthropic/claude-opus-4.6",              ""),
     ("anthropic/claude-sonnet-4.6",            ""),
@@ -139,6 +141,7 @@ def _xai_curated_models() -> list[str]:
 
 _PROVIDER_MODELS: dict[str, list[str]] = {
     "nous": [
+        "anthropic/claude-opus-4.8",
         "anthropic/claude-opus-4.7",
         "anthropic/claude-opus-4.6",
         "anthropic/claude-sonnet-4.6",
@@ -290,6 +293,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "MiniMax-M2",
     ],
     "anthropic": [
+        "claude-opus-4-8",
         "claude-opus-4-7",
         "claude-opus-4-6",
         "claude-sonnet-4-6",
diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index cfd6edeca65..7c7e8e33373 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -1188,16 +1188,27 @@ class TestBuildAnthropicKwargs:
         # params through its signature, we exercise the strip behavior by
         # calling the internal predicate directly.
         from agent.anthropic_adapter import _forbids_sampling_params
+        assert _forbids_sampling_params("claude-opus-4-8") is True
+        assert _forbids_sampling_params("claude-opus-4-8-fast") is True
         assert _forbids_sampling_params("claude-opus-4-7") is True
         assert _forbids_sampling_params("claude-opus-4-6") is False
         assert _forbids_sampling_params("claude-sonnet-4-5") is False
 
     def test_supports_fast_mode_predicate(self):
-        """Fast mode is Opus 4.6 only — Opus 4.7 and others must be excluded."""
+        """Fast mode is Opus 4.6 only — Opus 4.7 and others must be excluded.
+
+        For Opus 4.8 the fast variant is a separate model ID
+        (anthropic/claude-opus-4.8-fast) routed through the normal model
+        field, NOT via the ``speed: "fast"`` request parameter. So
+        ``_supports_fast_mode`` (which gates the parameter) must stay
+        False for both opus-4-8 and opus-4-8-fast.
+        """
         from agent.anthropic_adapter import _supports_fast_mode
         assert _supports_fast_mode("claude-opus-4-6") is True
         assert _supports_fast_mode("anthropic/claude-opus-4-6") is True
         assert _supports_fast_mode("claude-opus-4-7") is False
+        assert _supports_fast_mode("claude-opus-4-8") is False
+        assert _supports_fast_mode("claude-opus-4-8-fast") is False
         assert _supports_fast_mode("claude-sonnet-4-6") is False
         assert _supports_fast_mode("claude-haiku-4-5") is False
         assert _supports_fast_mode("") is False
diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py
index e889f2e67bd..20a4bacaad6 100644
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -131,10 +131,10 @@ class TestDefaultContextLengths:
         for key, value in DEFAULT_CONTEXT_LENGTHS.items():
             if "claude" not in key:
                 continue
-            # Claude 4.6+ models (4.6 and 4.7) have 1M context at standard
+            # Claude 4.6+ models (4.6, 4.7, 4.8) have 1M context at standard
             # API pricing (no long-context premium).  Older Claude 4.x and
             # 3.x models cap at 200k.
-            if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7")):
+            if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7", "4.8", "4-8")):
                 assert value == 1000000, f"{key} should be 1000000"
             else:
                 assert value == 200000, f"{key} should be 200000"
diff --git a/website/static/api/model-catalog.json b/website/static/api/model-catalog.json
index 1a084917aab..13389a570ef 100644
--- a/website/static/api/model-catalog.json
+++ b/website/static/api/model-catalog.json
@@ -1,6 +1,6 @@
 {
   "version": 1,
-  "updated_at": "2026-05-26T20:49:36Z",
+  "updated_at": "2026-05-28T17:19:08Z",
   "metadata": {
     "source": "hermes-agent repo",
     "docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog"
@@ -12,6 +12,14 @@
         "note": "Descriptions drive picker badges. Live /api/v1/models filters curated ids by tool-calling support and free pricing."
       },
       "models": [
+        {
+          "id": "anthropic/claude-opus-4.8",
+          "description": ""
+        },
+        {
+          "id": "anthropic/claude-opus-4.8-fast",
+          "description": "2x price, higher output speed"
+        },
         {
           "id": "anthropic/claude-opus-4.7",
           "description": ""
@@ -144,6 +152,9 @@
         "note": "Free-tier gating is determined live via Portal pricing (partition_nous_models_by_tier), not this manifest."
       },
       "models": [
+        {
+          "id": "anthropic/claude-opus-4.8"
+        },
         {
           "id": "anthropic/claude-opus-4.7"
         },