diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index c43611f85f..0da01fedf9 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -24,6 +24,18 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> if reasoning_config is None or not isinstance(reasoning_config, dict): return None + normalized_model = (model or "").strip().lower() + if normalized_model.startswith("google/"): + normalized_model = normalized_model.split("/", 1)[1] + + # ``thinking_config`` is a Gemini-only request parameter. The same + # ``gemini`` provider also serves Gemma (and historically PaLM/Bard); + # those reject the field with HTTP 400 "Unknown name 'thinking_config': + # Cannot find field" — including the polite ``{"includeThoughts": False}`` + # form. Omit the field entirely on non-Gemini models. (#17426) + if not normalized_model.startswith("gemini"): + return None + if reasoning_config.get("enabled") is False: # Gemini can hide thought parts even when internal thinking still # happens; omit thinkingLevel to avoid model-specific validation quirks. @@ -34,9 +46,6 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> return {"includeThoughts": False} thinking_config: Dict[str, Any] = {"includeThoughts": True} - normalized_model = (model or "").strip().lower() - if normalized_model.startswith("google/"): - normalized_model = normalized_model.split("/", 1)[1] # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes' # coarse effort levels. ``includeThoughts`` alone is enough to surface diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py index bec7dc58a0..66aa7e9058 100644 --- a/tests/agent/transports/test_chat_completions.py +++ b/tests/agent/transports/test_chat_completions.py @@ -244,6 +244,46 @@ class TestChatCompletionsBuildKwargs: "thinking_level": "low", } + def test_gemma_does_not_receive_thinking_config(self, transport): + # The `gemini` provider also serves Gemma (e.g. `gemma-4-31b-it`), + # but Gemma rejects `thinking_config` with HTTP 400 (#17426). Even + # when Hermes has reasoning enabled, the field must be omitted for + # non-Gemini models on this provider. + msgs = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="gemma-4-31b-it", + messages=msgs, + provider_name="gemini", + reasoning_config={"enabled": True, "effort": "high"}, + ) + assert "thinking_config" not in kw.get("extra_body", {}) + + def test_gemma_disabled_reasoning_still_omits_thinking_config(self, transport): + # The `Unknown name 'thinking_config': Cannot find field` rejection + # fires even on `{"includeThoughts": False}` — the entire field must + # be absent, not just disabled. (#17426) + msgs = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="gemma-4-31b-it", + messages=msgs, + provider_name="gemini", + reasoning_config={"enabled": False}, + ) + assert "thinking_config" not in kw.get("extra_body", {}) + + def test_google_prefixed_gemma_also_omits_thinking_config(self, transport): + # OpenRouter-style `google/gemma-...` IDs hit the same provider path + # and must also omit `thinking_config`. The existing `google/` + # prefix-stripping must not accidentally classify Gemma as Gemini. + msgs = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="google/gemma-4-31b-it", + messages=msgs, + provider_name="gemini", + reasoning_config={"enabled": True, "effort": "medium"}, + ) + assert "thinking_config" not in kw.get("extra_body", {}) + def test_max_tokens_with_fn(self, transport): msgs = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs(