From 63d06dd93d6c19f22598d8ffc855788e1fe04714 Mon Sep 17 00:00:00 2001
From: Trev <trevthefoolish@users.noreply.github.com>
Date: Thu, 16 Apr 2026 13:51:42 -0500
Subject: [PATCH] =?UTF-8?q?fix(agent):=20downgrade=20xhigh=E2=86=92max=20o?=
 =?UTF-8?q?n=20Anthropic=20pre-4.7=20adaptive=20models?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Regression from #11161 (Claude Opus 4.7 migration, commit 0517ac3e).

The Opus 4.7 migration changed `ADAPTIVE_EFFORT_MAP["xhigh"]` from "max"
(the pre-migration alias) to "xhigh" to preserve the new 4.7 effort level
as distinct from max. This is correct for 4.7, but Opus/Sonnet 4.6 only
expose 4 levels (low/medium/high/max) — sending "xhigh" there now 400s:

    BadRequestError [HTTP 400]: This model does not support effort
    level 'xhigh'. Supported levels: high, low, max, medium.

Users who set reasoning_effort=xhigh as their default (xhigh is the
recommended default for coding/agentic on 4.7 per the Anthropic migration
guide) now 400 every request the moment they switch back to a 4.6 model
via `/model` or config. Verified live against the Anthropic API on
`anthropic==0.94.0`.

Fix: make the mapping model-aware. Add `_supports_xhigh_effort()`
predicate (matches 4-7/4.7 substrings, mirroring the existing
`_supports_adaptive_thinking` / `_forbids_sampling_params` pattern).
On pre-4.7 adaptive models, downgrade xhigh→max (the strongest effort
those models accept, restoring pre-migration behavior). On 4.7+, keep
xhigh as a distinct level.

Per Anthropic's migration guide, xhigh is 4.7-only:
https://platform.claude.com/docs/en/about-claude/models/migration-guide
> Opus 4.7 effort levels: max, xhigh (new), high, medium, low.
> Opus 4.6 effort levels: max, high, medium, low.
SDK typing confirms: `anthropic.types.OutputConfigParam.effort: Literal[
"low", "medium", "high", "max"]` (v0.94.0 not yet updated for xhigh).

## Test plan

Verified live on macOS 15.5 / anthropic==0.94.0:

    claude-opus-4-6 + effort=xhigh → output_config.effort=max  → 200 OK
    claude-opus-4-7 + effort=xhigh → output_config.effort=xhigh → 200 OK
    claude-opus-4-6 + effort=max   → output_config.effort=max  → 200 OK
    claude-opus-4-7 + effort=max   → output_config.effort=max  → 200 OK

`tests/agent/test_anthropic_adapter.py` — 120 pass (replaced 1 bugged
test that asserted the broken behavior, added 1 for 4.7 preservation).

Full adapter suite: 120 passed in 1.05s.
Broader suite (agent + run_agent + cli/gateway reasoning): 2140 passed
(2 pre-existing failures on clean upstream/main, unrelated).

## Platforms

Tested on macOS 15.5. No platform-specific code paths touched.
---
 agent/anthropic_adapter.py            | 32 +++++++++++++++++++++++----
 tests/agent/test_anthropic_adapter.py | 25 ++++++++++++++++-----
 2 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 90a3a412e8..64b9522517 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -30,9 +30,11 @@ logger = logging.getLogger(__name__)
 THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
 # Hermes effort → Anthropic adaptive-thinking effort (output_config.effort).
 # Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max.
-# We preserve xhigh as xhigh (the recommended default for coding/agentic on
-# 4.7) and expose max as a distinct ceiling. "minimal" is a legacy alias that
-# maps to low.  See:
+# Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh.
+# We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/
+# agentic work) and downgrade it to max on pre-4.7 adaptive models (which
+# is the strongest level they accept).  "minimal" is a legacy alias that
+# maps to low on every model.  See:
 # https://platform.claude.com/docs/en/about-claude/models/migration-guide
 ADAPTIVE_EFFORT_MAP = {
     "max":     "max",
@@ -43,6 +45,12 @@ ADAPTIVE_EFFORT_MAP = {
     "minimal": "low",
 }
 
+# Models that accept the "xhigh" output_config.effort level.  Opus 4.7 added
+# xhigh as a distinct level between high and max; older adaptive-thinking
+# models (4.6) reject it with a 400.  Keep this substring list in sync with
+# the Anthropic migration guide as new model families ship.
+_XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
+
 # Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
 # is the only supported mode; 4.7 additionally forbids manual thinking entirely
 # and drops temperature/top_p/top_k).
@@ -113,6 +121,17 @@ def _supports_adaptive_thinking(model: str) -> bool:
     return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
 
 
+def _supports_xhigh_effort(model: str) -> bool:
+    """Return True for models that accept the 'xhigh' adaptive effort level.
+
+    Opus 4.7 introduced xhigh as a distinct level between high and max.
+    Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max
+    and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max
+    when this returns False.
+    """
+    return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS)
+
+
 def _forbids_sampling_params(model: str) -> bool:
     """Return True for models that 400 on any non-default temperature/top_p/top_k.
 
@@ -1392,8 +1411,13 @@ def build_anthropic_kwargs(
                     "type": "adaptive",
                     "display": "summarized",
                 }
+                adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium")
+                # Downgrade xhigh→max on models that don't list xhigh as a
+                # supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh.
+                if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model):
+                    adaptive_effort = "max"
                 kwargs["output_config"] = {
-                    "effort": ADAPTIVE_EFFORT_MAP.get(effort, "medium"),
+                    "effort": adaptive_effort,
                 }
             else:
                 kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index 9d8f3deaaa..737db01a35 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -959,11 +959,13 @@ class TestBuildAnthropicKwargs:
         assert "temperature" not in kwargs
         assert kwargs["max_tokens"] == 4096
 
-    def test_reasoning_config_maps_xhigh_to_xhigh_effort_for_4_6_models(self):
-        # Opus 4.7 added "xhigh" as a distinct effort level (the recommended
-        # default for coding/agentic work). Earlier mapping aliased xhigh→max,
-        # which silently over-efforted every request. 2026-04-16 migration
-        # guide: xhigh and max are distinct levels.
+    def test_reasoning_config_downgrades_xhigh_to_max_for_4_6_models(self):
+        # Opus 4.7 added "xhigh" as a distinct effort level (low/medium/high/
+        # xhigh/max). Opus 4.6 only supports low/medium/high/max — sending
+        # "xhigh" there returns an API 400. Preserve the pre-migration
+        # behavior of aliasing xhigh→max on pre-4.7 adaptive models so users
+        # who prefer xhigh as their default don't 400 every request when
+        # switching back to 4.6.
         kwargs = build_anthropic_kwargs(
             model="claude-sonnet-4-6",
             messages=[{"role": "user", "content": "think harder"}],
@@ -972,6 +974,19 @@ class TestBuildAnthropicKwargs:
             reasoning_config={"enabled": True, "effort": "xhigh"},
         )
         assert kwargs["thinking"] == {"type": "adaptive", "display": "summarized"}
+        assert kwargs["output_config"] == {"effort": "max"}
+
+    def test_reasoning_config_preserves_xhigh_for_4_7_models(self):
+        # On 4.7+ xhigh is a real level and the recommended default for
+        # coding/agentic work — keep it distinct from max.
+        kwargs = build_anthropic_kwargs(
+            model="claude-opus-4-7",
+            messages=[{"role": "user", "content": "think harder"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "xhigh"},
+        )
+        assert kwargs["thinking"] == {"type": "adaptive", "display": "summarized"}
         assert kwargs["output_config"] == {"effort": "xhigh"}
 
     def test_reasoning_config_maps_max_effort_for_4_7_models(self):