diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 90a3a412e..64b952251 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -30,9 +30,11 @@ logger = logging.getLogger(__name__) THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000} # Hermes effort → Anthropic adaptive-thinking effort (output_config.effort). # Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max. -# We preserve xhigh as xhigh (the recommended default for coding/agentic on -# 4.7) and expose max as a distinct ceiling. "minimal" is a legacy alias that -# maps to low. See: +# Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh. +# We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/ +# agentic work) and downgrade it to max on pre-4.7 adaptive models (which +# is the strongest level they accept). "minimal" is a legacy alias that +# maps to low on every model. See: # https://platform.claude.com/docs/en/about-claude/models/migration-guide ADAPTIVE_EFFORT_MAP = { "max": "max", @@ -43,6 +45,12 @@ ADAPTIVE_EFFORT_MAP = { "minimal": "low", } +# Models that accept the "xhigh" output_config.effort level. Opus 4.7 added +# xhigh as a distinct level between high and max; older adaptive-thinking +# models (4.6) reject it with a 400. Keep this substring list in sync with +# the Anthropic migration guide as new model families ship. +_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7") + # Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive # is the only supported mode; 4.7 additionally forbids manual thinking entirely # and drops temperature/top_p/top_k). @@ -113,6 +121,17 @@ def _supports_adaptive_thinking(model: str) -> bool: return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS) +def _supports_xhigh_effort(model: str) -> bool: + """Return True for models that accept the 'xhigh' adaptive effort level. + + Opus 4.7 introduced xhigh as a distinct level between high and max. + Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max + and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max + when this returns False. + """ + return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS) + + def _forbids_sampling_params(model: str) -> bool: """Return True for models that 400 on any non-default temperature/top_p/top_k. @@ -1392,8 +1411,13 @@ def build_anthropic_kwargs( "type": "adaptive", "display": "summarized", } + adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium") + # Downgrade xhigh→max on models that don't list xhigh as a + # supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh. + if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model): + adaptive_effort = "max" kwargs["output_config"] = { - "effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium"), + "effort": adaptive_effort, } else: kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget} diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py index 9d8f3deaa..737db01a3 100644 --- a/tests/agent/test_anthropic_adapter.py +++ b/tests/agent/test_anthropic_adapter.py @@ -959,11 +959,13 @@ class TestBuildAnthropicKwargs: assert "temperature" not in kwargs assert kwargs["max_tokens"] == 4096 - def test_reasoning_config_maps_xhigh_to_xhigh_effort_for_4_6_models(self): - # Opus 4.7 added "xhigh" as a distinct effort level (the recommended - # default for coding/agentic work). Earlier mapping aliased xhigh→max, - # which silently over-efforted every request. 2026-04-16 migration - # guide: xhigh and max are distinct levels. + def test_reasoning_config_downgrades_xhigh_to_max_for_4_6_models(self): + # Opus 4.7 added "xhigh" as a distinct effort level (low/medium/high/ + # xhigh/max). Opus 4.6 only supports low/medium/high/max — sending + # "xhigh" there returns an API 400. Preserve the pre-migration + # behavior of aliasing xhigh→max on pre-4.7 adaptive models so users + # who prefer xhigh as their default don't 400 every request when + # switching back to 4.6. kwargs = build_anthropic_kwargs( model="claude-sonnet-4-6", messages=[{"role": "user", "content": "think harder"}], @@ -972,6 +974,19 @@ class TestBuildAnthropicKwargs: reasoning_config={"enabled": True, "effort": "xhigh"}, ) assert kwargs["thinking"] == {"type": "adaptive", "display": "summarized"} + assert kwargs["output_config"] == {"effort": "max"} + + def test_reasoning_config_preserves_xhigh_for_4_7_models(self): + # On 4.7+ xhigh is a real level and the recommended default for + # coding/agentic work — keep it distinct from max. + kwargs = build_anthropic_kwargs( + model="claude-opus-4-7", + messages=[{"role": "user", "content": "think harder"}], + tools=None, + max_tokens=4096, + reasoning_config={"enabled": True, "effort": "xhigh"}, + ) + assert kwargs["thinking"] == {"type": "adaptive", "display": "summarized"} assert kwargs["output_config"] == {"effort": "xhigh"} def test_reasoning_config_maps_max_effort_for_4_7_models(self):