fix(transport): omit thinking_config for Gemma on the gemini provider (#17426)

The `gemini` provider also serves Gemma (e.g. `gemma-4-31b-it`) and historically other Google models like PaLM. Those reject `extra_body.thinking_config` with HTTP 400: Unknown name "thinking_config": Cannot find field `_build_gemini_thinking_config()` was unconditionally producing a config dict for any model on the `gemini` / `google-gemini-cli` provider, which `ChatCompletionsTransport.build_kwargs` then dropped into `extra_body["thinking_config"]`. The result: every chat turn for Gemma users on the gemini provider blew up at the API edge. The fix is the same shape Hermes already uses for the Gemini-2.5 vs Gemini-3 family clamping: normalise the model id, strip an `OpenRouter`-style `google/` prefix, and short-circuit early when the result doesn't start with `gemini`. We return `None` rather than `{"includeThoughts": False}`, because the API rejects the field name itself — even the polite "off" form trips the same 400. Three regression tests cover Gemma with reasoning enabled, Gemma with reasoning disabled, and the `google/gemma-…` OpenRouter-style id; the existing Gemini-2.5 / Gemini-3 / `google/gemini-…` cases keep passing because the Gemini guard fires after the prefix strip. Fixes #17426 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-07 02:51:50 +00:00 · 2026-04-29 05:29:23 -07:00 · 2026-04-29 05:29:23 -07:00 · cc5b9fb581
commit cc5b9fb581
parent 3de8e21683
2 changed files with 52 additions and 3 deletions
--- a/tests/agent/transports/test_chat_completions.py
+++ b/tests/agent/transports/test_chat_completions.py
@ -244,6 +244,46 @@ class TestChatCompletionsBuildKwargs:
            "thinking_level": "low",
        }

+    def test_gemma_does_not_receive_thinking_config(self, transport):
+        # The `gemini` provider also serves Gemma (e.g. `gemma-4-31b-it`),
+        # but Gemma rejects `thinking_config` with HTTP 400 (#17426). Even
+        # when Hermes has reasoning enabled, the field must be omitted for
+        # non-Gemini models on this provider.
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gemma-4-31b-it",
+            messages=msgs,
+            provider_name="gemini",
+            reasoning_config={"enabled": True, "effort": "high"},
+        )
+        assert "thinking_config" not in kw.get("extra_body", {})
+
+    def test_gemma_disabled_reasoning_still_omits_thinking_config(self, transport):
+        # The `Unknown name 'thinking_config': Cannot find field` rejection
+        # fires even on `{"includeThoughts": False}` — the entire field must
+        # be absent, not just disabled. (#17426)
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="gemma-4-31b-it",
+            messages=msgs,
+            provider_name="gemini",
+            reasoning_config={"enabled": False},
+        )
+        assert "thinking_config" not in kw.get("extra_body", {})
+
+    def test_google_prefixed_gemma_also_omits_thinking_config(self, transport):
+        # OpenRouter-style `google/gemma-...` IDs hit the same provider path
+        # and must also omit `thinking_config`. The existing `google/`
+        # prefix-stripping must not accidentally classify Gemma as Gemini.
+        msgs = [{"role": "user", "content": "Hi"}]
+        kw = transport.build_kwargs(
+            model="google/gemma-4-31b-it",
+            messages=msgs,
+            provider_name="gemini",
+            reasoning_config={"enabled": True, "effort": "medium"},
+        )
+        assert "thinking_config" not in kw.get("extra_body", {})
+
    def test_max_tokens_with_fn(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(