hermes-agent/plugins/model-providers/openrouter/__init__.py
Siddharth Balyan 1febb08240
fix(anthropic): default new Claude models to the modern thinking contract (#42991)
New Anthropic models without a recognized version substring (claude-fable-5
and future named/numbered releases) were classified as legacy and routed down
the manual-thinking path, which made OpenRouter emit thinking.type.disabled —
a form reasoning-mandatory Claude models reject with a non-retryable HTTP 400.

Invert the brittle version-substring allowlists to default-to-modern (mirroring
_get_anthropic_max_output): unknown Claude models get the adaptive/xhigh/
no-sampling contract, with an explicit legacy list for older families. Non-Claude
Anthropic-Messages models (minimax, qwen3, …) keep the manual path.

- anthropic_adapter: _supports_adaptive_thinking / _supports_xhigh_effort /
  _forbids_sampling_params now default unknown Claude models to modern; legacy
  families enumerated in _LEGACY_MANUAL_THINKING_CLAUDE_SUBSTRINGS.
- openrouter profile: omit reasoning entirely (→ adaptive default) instead of
  forwarding {enabled:false} for reasoning-mandatory Anthropic models; legacy
  Anthropic + all non-Anthropic models still pass the disable form through.
- model_metadata + output-limit table: register claude-fable-5 (1M ctx, 128K out).

Tests assert the invariant ("unknown Claude model -> modern contract; legacy
stays manual; non-Claude unaffected"), not specific model names.
2026-06-09 23:37:23 +05:30

161 lines
6.5 KiB
Python

"""OpenRouter provider profile."""
import logging
from typing import Any
from providers import register_provider
from providers.base import ProviderProfile
logger = logging.getLogger(__name__)
_CACHE: list[str] | None = None
# Anthropic model families that still accept an explicit "disable thinking"
# request (the manual ``thinking: {type: "disabled"}`` form OpenRouter emits
# for ``reasoning: {enabled: false}``). Everything Claude 4.6 and newer —
# including future date-stamped / named models (fable, mythos-class, …) —
# mandates reasoning and returns HTTP 400 on any disable form. We therefore
# default *unknown* Anthropic models to "cannot disable" (the modern contract)
# and keep only this explicit legacy allowlist of models that can. Mirrors the
# default-to-newest philosophy in agent/anthropic_adapter._get_anthropic_max_output.
_ANTHROPIC_REASONING_OPTIONAL_SUBSTRINGS = (
"claude-3", # 3, 3.5, 3.7
"claude-opus-4-0", "claude-opus-4.0", "claude-opus-4-1", "claude-opus-4.1",
"claude-sonnet-4-0", "claude-sonnet-4.0",
"claude-opus-4-2025", "claude-sonnet-4-2025", # date-stamped 4.0 IDs
"claude-opus-4-5", "claude-opus-4.5",
"claude-sonnet-4-5", "claude-sonnet-4.5",
"claude-haiku-4-5", "claude-haiku-4.5",
)
def _anthropic_reasoning_is_mandatory(model: str | None) -> bool:
"""Return True for Anthropic models that reject any disable-thinking form.
Claude 4.6+ (adaptive thinking) and newer named models have no "off"
switch — sending ``reasoning: {enabled: false}`` makes OpenRouter emit
``thinking: {type: "disabled"}``, which these models 400 on. Unknown /
new Anthropic model names default to mandatory so the next un-numbered
release doesn't reintroduce the 400.
"""
m = (model or "").lower()
if not m.startswith(("anthropic/", "claude")) and "claude" not in m:
return False
return not any(sub in m for sub in _ANTHROPIC_REASONING_OPTIONAL_SUBSTRINGS)
class OpenRouterProfile(ProviderProfile):
"""OpenRouter aggregator — provider preferences, reasoning config passthrough."""
def fetch_models(
self,
*,
api_key: str | None = None,
timeout: float = 8.0,
) -> list[str] | None:
"""Fetch from public OpenRouter catalog — no auth required.
Note: Tool-call capability filtering is applied by hermes_cli/models.py
via fetch_openrouter_models() → _openrouter_model_supports_tools(), not
here. The picker early-returns via the dedicated openrouter path before
reaching this method, so filtering here would be unreachable.
"""
global _CACHE # noqa: PLW0603
if _CACHE is not None:
return _CACHE
try:
result = super().fetch_models(api_key=None, timeout=timeout)
if result is not None:
_CACHE = result
return result
except Exception as exc:
logger.debug("fetch_models(openrouter): %s", exc)
return None
def build_extra_body(
self, *, session_id: str | None = None, **context: Any
) -> dict[str, Any]:
body: dict[str, Any] = {}
if session_id:
body["session_id"] = session_id
prefs = context.get("provider_preferences")
if prefs:
body["provider"] = prefs
# Pareto Code router — model-gated. The plugins block is only
# meaningful for openrouter/pareto-code; sending it on any other
# model has no documented effect and would be confusing in logs.
# See: https://openrouter.ai/docs/guides/routing/routers/pareto-router
model = (context.get("model") or "")
if model == "openrouter/pareto-code":
score = context.get("openrouter_min_coding_score")
if score is not None and score != "":
try:
score_f = float(score)
except (TypeError, ValueError):
score_f = None
if score_f is not None and 0.0 <= score_f <= 1.0:
body["plugins"] = [
{"id": "pareto-router", "min_coding_score": score_f}
]
return body
def build_api_kwargs_extras(
self,
*,
reasoning_config: dict | None = None,
supports_reasoning: bool = False,
model: str | None = None,
session_id: str | None = None,
**context: Any,
) -> tuple[dict[str, Any], dict[str, Any]]:
"""OpenRouter passes the full reasoning_config dict as extra_body.reasoning.
For xAI Grok models routed through OpenRouter, attach the
``x-grok-conv-id`` header so that xAI's prompt cache stays pinned to
the same backend server across turns.
"""
extra_body: dict[str, Any] = {}
if supports_reasoning:
if reasoning_config is not None:
cfg = dict(reasoning_config)
# Reasoning-mandatory Anthropic models (Claude 4.6+ / fable /
# future named models) have no "off" switch. Forwarding
# ``{enabled: false}`` makes OpenRouter emit Anthropic's manual
# ``thinking: {type: "disabled"}``, which those models reject
# with a non-retryable HTTP 400. Omit reasoning entirely so the
# model falls back to its default (adaptive) thinking instead.
disabling = cfg.get("enabled") is False or cfg.get("effort") == "none"
if disabling and _anthropic_reasoning_is_mandatory(model):
pass # leave reasoning unset → adaptive default
else:
extra_body["reasoning"] = cfg
else:
extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
extra_headers: dict[str, Any] = {}
if session_id and model and model.startswith(("x-ai/grok-", "xai/grok-")):
extra_headers["x-grok-conv-id"] = session_id
return extra_body, {"extra_headers": extra_headers} if extra_headers else {}
openrouter = OpenRouterProfile(
name="openrouter",
aliases=("or",),
env_vars=("OPENROUTER_API_KEY",),
display_name="OpenRouter",
description="OpenRouter — unified API for 200+ models",
signup_url="https://openrouter.ai/keys",
base_url="https://openrouter.ai/api/v1",
models_url="https://openrouter.ai/api/v1/models",
fallback_models=(
"anthropic/claude-sonnet-4.6",
"openai/gpt-5.4",
"deepseek/deepseek-chat",
"google/gemini-3-flash-preview",
"qwen/qwen3-plus",
),
)
register_provider(openrouter)