mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-08 03:01:47 +00:00
fix(gemini): clamp Flash thinkingLevel to documented low/medium/high set
Gemini 3 Flash documents low/medium/high as the accepted thinkingLevel
values. The salvaged bridge was forwarding Hermes' "minimal" effort to
Flash verbatim, which is not a documented Gemini level and risks a 400
from the native adapter.
Clamp minimal->low on Flash (matching how Pro already clamps minimal+low
down), and funnel anything outside {low, medium, high} into medium to
keep the request valid by construction. No behaviour change for the
documented effort levels.
This commit is contained in:
parent
dbbe2d1973
commit
529eb29b6a
2 changed files with 24 additions and 3 deletions
|
|
@ -51,11 +51,17 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) ->
|
||||||
if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
|
if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
|
||||||
effort = "medium"
|
effort = "medium"
|
||||||
|
|
||||||
# Gemini 3 Flash supports the wider level set; Gemini 3 Pro is stricter,
|
# Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
|
||||||
# so collapse low-end efforts to ``low`` and high-end efforts to ``high``.
|
# is stricter (low/high). Clamp Hermes' wider effort set to what each
|
||||||
|
# family accepts so we never forward an undocumented level verbatim.
|
||||||
if normalized_model.startswith(("gemini-3", "gemini-3.1")):
|
if normalized_model.startswith(("gemini-3", "gemini-3.1")):
|
||||||
if "flash" in normalized_model:
|
if "flash" in normalized_model:
|
||||||
thinking_config["thinkingLevel"] = "high" if effort == "xhigh" else effort
|
if effort in {"minimal", "low"}:
|
||||||
|
thinking_config["thinkingLevel"] = "low"
|
||||||
|
elif effort in {"high", "xhigh"}:
|
||||||
|
thinking_config["thinkingLevel"] = "high"
|
||||||
|
else:
|
||||||
|
thinking_config["thinkingLevel"] = "medium"
|
||||||
elif "pro" in normalized_model:
|
elif "pro" in normalized_model:
|
||||||
thinking_config["thinkingLevel"] = (
|
thinking_config["thinkingLevel"] = (
|
||||||
"high" if effort in {"high", "xhigh"} else "low"
|
"high" if effort in {"high", "xhigh"} else "low"
|
||||||
|
|
|
||||||
|
|
@ -191,6 +191,21 @@ class TestChatCompletionsBuildKwargs:
|
||||||
)
|
)
|
||||||
assert kw["extra_body"]["thinking_config"]["thinkingLevel"] == "high"
|
assert kw["extra_body"]["thinking_config"]["thinkingLevel"] == "high"
|
||||||
|
|
||||||
|
def test_gemini_flash_minimal_clamps_to_low(self, transport):
|
||||||
|
# Gemini 3 Flash documents low/medium/high; "minimal" isn't accepted,
|
||||||
|
# so clamp it down to "low" rather than forwarding it verbatim.
|
||||||
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gemini-3-flash-preview",
|
||||||
|
messages=msgs,
|
||||||
|
provider_name="gemini",
|
||||||
|
reasoning_config={"enabled": True, "effort": "minimal"},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["thinking_config"] == {
|
||||||
|
"includeThoughts": True,
|
||||||
|
"thinkingLevel": "low",
|
||||||
|
}
|
||||||
|
|
||||||
def test_max_tokens_with_fn(self, transport):
|
def test_max_tokens_with_fn(self, transport):
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue