From 529eb29b6a673bb2a67cbe2cd22e5f1dbaf75399 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Tue, 28 Apr 2026 05:37:35 -0700 Subject: [PATCH] fix(gemini): clamp Flash thinkingLevel to documented low/medium/high set Gemini 3 Flash documents low/medium/high as the accepted thinkingLevel values. The salvaged bridge was forwarding Hermes' "minimal" effort to Flash verbatim, which is not a documented Gemini level and risks a 400 from the native adapter. Clamp minimal->low on Flash (matching how Pro already clamps minimal+low down), and funnel anything outside {low, medium, high} into medium to keep the request valid by construction. No behaviour change for the documented effort levels. --- agent/transports/chat_completions.py | 12 +++++++++--- tests/agent/transports/test_chat_completions.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index ab7075aa18..480ba05d27 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -51,11 +51,17 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> if effort not in {"minimal", "low", "medium", "high", "xhigh"}: effort = "medium" - # Gemini 3 Flash supports the wider level set; Gemini 3 Pro is stricter, - # so collapse low-end efforts to ``low`` and high-end efforts to ``high``. + # Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro + # is stricter (low/high). Clamp Hermes' wider effort set to what each + # family accepts so we never forward an undocumented level verbatim. if normalized_model.startswith(("gemini-3", "gemini-3.1")): if "flash" in normalized_model: - thinking_config["thinkingLevel"] = "high" if effort == "xhigh" else effort + if effort in {"minimal", "low"}: + thinking_config["thinkingLevel"] = "low" + elif effort in {"high", "xhigh"}: + thinking_config["thinkingLevel"] = "high" + else: + thinking_config["thinkingLevel"] = "medium" elif "pro" in normalized_model: thinking_config["thinkingLevel"] = ( "high" if effort in {"high", "xhigh"} else "low" diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py index 0110f893ca..69326887af 100644 --- a/tests/agent/transports/test_chat_completions.py +++ b/tests/agent/transports/test_chat_completions.py @@ -191,6 +191,21 @@ class TestChatCompletionsBuildKwargs: ) assert kw["extra_body"]["thinking_config"]["thinkingLevel"] == "high" + def test_gemini_flash_minimal_clamps_to_low(self, transport): + # Gemini 3 Flash documents low/medium/high; "minimal" isn't accepted, + # so clamp it down to "low" rather than forwarding it verbatim. + msgs = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="gemini-3-flash-preview", + messages=msgs, + provider_name="gemini", + reasoning_config={"enabled": True, "effort": "minimal"}, + ) + assert kw["extra_body"]["thinking_config"] == { + "includeThoughts": True, + "thinkingLevel": "low", + } + def test_max_tokens_with_fn(self, transport): msgs = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs(