From 529eb29b6a673bb2a67cbe2cd22e5f1dbaf75399 Mon Sep 17 00:00:00 2001
From: teknium1 <teknium@users.noreply.github.com>
Date: Tue, 28 Apr 2026 05:37:35 -0700
Subject: [PATCH] fix(gemini): clamp Flash thinkingLevel to documented
 low/medium/high set

Gemini 3 Flash documents low/medium/high as the accepted thinkingLevel
values. The salvaged bridge was forwarding Hermes' "minimal" effort to
Flash verbatim, which is not a documented Gemini level and risks a 400
from the native adapter.

Clamp minimal->low on Flash (matching how Pro already clamps minimal+low
down), and funnel anything outside {low, medium, high} into medium to
keep the request valid by construction. No behaviour change for the
documented effort levels.
---
 agent/transports/chat_completions.py            | 12 +++++++++---
 tests/agent/transports/test_chat_completions.py | 15 +++++++++++++++
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py
index ab7075aa18..480ba05d27 100644
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -51,11 +51,17 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) ->
     if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
         effort = "medium"
 
-    # Gemini 3 Flash supports the wider level set; Gemini 3 Pro is stricter,
-    # so collapse low-end efforts to ``low`` and high-end efforts to ``high``.
+    # Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
+    # is stricter (low/high). Clamp Hermes' wider effort set to what each
+    # family accepts so we never forward an undocumented level verbatim.
     if normalized_model.startswith(("gemini-3", "gemini-3.1")):
         if "flash" in normalized_model:
-            thinking_config["thinkingLevel"] = "high" if effort == "xhigh" else effort
+            if effort in {"minimal", "low"}:
+                thinking_config["thinkingLevel"] = "low"
+            elif effort in {"high", "xhigh"}:
+                thinking_config["thinkingLevel"] = "high"
+            else:
+                thinking_config["thinkingLevel"] = "medium"
         elif "pro" in normalized_model:
             thinking_config["thinkingLevel"] = (
                 "high" if effort in {"high", "xhigh"} else "low"
diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py
index 0110f893ca..69326887af 100644
--- a/tests/agent/transports/test_chat_completions.py
+++ b/tests/agent/transports/test_chat_completions.py
@@ -191,6 +191,21 @@ class TestChatCompletionsBuildKwargs:
         )
         assert kw["extra_body"]["thinking_config"]["thinkingLevel"] == "high"
 
+    def test_gemini_flash_minimal_clamps_to_low(self, transport):
+        # Gemini 3 Flash documents low/medium/high; "minimal" isn't accepted,
+        # so clamp it down to "low" rather than forwarding it verbatim.
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gemini-3-flash-preview",
+            messages=msgs,
+            provider_name="gemini",
+            reasoning_config={"enabled": True, "effort": "minimal"},
+        )
+        assert kw["extra_body"]["thinking_config"] == {
+            "includeThoughts": True,
+            "thinkingLevel": "low",
+        }
+
     def test_max_tokens_with_fn(self, transport):
         msgs = [{"role": "user", "content": "Hi"}]
         kw = transport.build_kwargs(