mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
fix(transport): omit thinking_config for Gemma on the gemini provider (#17426)
The `gemini` provider also serves Gemma (e.g. `gemma-4-31b-it`) and
historically other Google models like PaLM. Those reject
`extra_body.thinking_config` with HTTP 400:
Unknown name "thinking_config": Cannot find field
`_build_gemini_thinking_config()` was unconditionally producing a
config dict for any model on the `gemini` / `google-gemini-cli`
provider, which `ChatCompletionsTransport.build_kwargs` then dropped
into `extra_body["thinking_config"]`. The result: every chat turn for
Gemma users on the gemini provider blew up at the API edge.
The fix is the same shape Hermes already uses for the Gemini-2.5 vs
Gemini-3 family clamping: normalise the model id, strip an
`OpenRouter`-style `google/` prefix, and short-circuit early when the
result doesn't start with `gemini`. We return `None` rather than
`{"includeThoughts": False}`, because the API rejects the field name
itself — even the polite "off" form trips the same 400.
Three regression tests cover Gemma with reasoning enabled, Gemma with
reasoning disabled, and the `google/gemma-…` OpenRouter-style id; the
existing Gemini-2.5 / Gemini-3 / `google/gemini-…` cases keep passing
because the Gemini guard fires after the prefix strip.
Fixes #17426
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3de8e21683
commit
cc5b9fb581
2 changed files with 52 additions and 3 deletions
|
|
@ -24,6 +24,18 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) ->
|
|||
if reasoning_config is None or not isinstance(reasoning_config, dict):
|
||||
return None
|
||||
|
||||
normalized_model = (model or "").strip().lower()
|
||||
if normalized_model.startswith("google/"):
|
||||
normalized_model = normalized_model.split("/", 1)[1]
|
||||
|
||||
# ``thinking_config`` is a Gemini-only request parameter. The same
|
||||
# ``gemini`` provider also serves Gemma (and historically PaLM/Bard);
|
||||
# those reject the field with HTTP 400 "Unknown name 'thinking_config':
|
||||
# Cannot find field" — including the polite ``{"includeThoughts": False}``
|
||||
# form. Omit the field entirely on non-Gemini models. (#17426)
|
||||
if not normalized_model.startswith("gemini"):
|
||||
return None
|
||||
|
||||
if reasoning_config.get("enabled") is False:
|
||||
# Gemini can hide thought parts even when internal thinking still
|
||||
# happens; omit thinkingLevel to avoid model-specific validation quirks.
|
||||
|
|
@ -34,9 +46,6 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) ->
|
|||
return {"includeThoughts": False}
|
||||
|
||||
thinking_config: Dict[str, Any] = {"includeThoughts": True}
|
||||
normalized_model = (model or "").strip().lower()
|
||||
if normalized_model.startswith("google/"):
|
||||
normalized_model = normalized_model.split("/", 1)[1]
|
||||
|
||||
# Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
|
||||
# coarse effort levels. ``includeThoughts`` alone is enough to surface
|
||||
|
|
|
|||
|
|
@ -244,6 +244,46 @@ class TestChatCompletionsBuildKwargs:
|
|||
"thinking_level": "low",
|
||||
}
|
||||
|
||||
def test_gemma_does_not_receive_thinking_config(self, transport):
|
||||
# The `gemini` provider also serves Gemma (e.g. `gemma-4-31b-it`),
|
||||
# but Gemma rejects `thinking_config` with HTTP 400 (#17426). Even
|
||||
# when Hermes has reasoning enabled, the field must be omitted for
|
||||
# non-Gemini models on this provider.
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gemma-4-31b-it",
|
||||
messages=msgs,
|
||||
provider_name="gemini",
|
||||
reasoning_config={"enabled": True, "effort": "high"},
|
||||
)
|
||||
assert "thinking_config" not in kw.get("extra_body", {})
|
||||
|
||||
def test_gemma_disabled_reasoning_still_omits_thinking_config(self, transport):
|
||||
# The `Unknown name 'thinking_config': Cannot find field` rejection
|
||||
# fires even on `{"includeThoughts": False}` — the entire field must
|
||||
# be absent, not just disabled. (#17426)
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="gemma-4-31b-it",
|
||||
messages=msgs,
|
||||
provider_name="gemini",
|
||||
reasoning_config={"enabled": False},
|
||||
)
|
||||
assert "thinking_config" not in kw.get("extra_body", {})
|
||||
|
||||
def test_google_prefixed_gemma_also_omits_thinking_config(self, transport):
|
||||
# OpenRouter-style `google/gemma-...` IDs hit the same provider path
|
||||
# and must also omit `thinking_config`. The existing `google/`
|
||||
# prefix-stripping must not accidentally classify Gemma as Gemini.
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="google/gemma-4-31b-it",
|
||||
messages=msgs,
|
||||
provider_name="gemini",
|
||||
reasoning_config={"enabled": True, "effort": "medium"},
|
||||
)
|
||||
assert "thinking_config" not in kw.get("extra_body", {})
|
||||
|
||||
def test_max_tokens_with_fn(self, transport):
|
||||
msgs = [{"role": "user", "content": "Hi"}]
|
||||
kw = transport.build_kwargs(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue