fix(gemini): bridge reasoning_config into thinking_config for chat-completions routes

This commit is contained in:
Nanako0129 2026-04-28 17:47:27 +08:00 committed by Teknium
parent 315a11a76f
commit dbbe2d1973
3 changed files with 123 additions and 1 deletions

View file

@ -18,6 +18,52 @@ from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse, ToolCall, Usage from agent.transports.types import NormalizedResponse, ToolCall, Usage
def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
"""Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig.
Gemini native/cloud-code adapters do not read ``extra_body.reasoning``.
They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and
then request thought parts with ``includeThoughts`` enabled.
"""
if reasoning_config is None or not isinstance(reasoning_config, dict):
return None
if reasoning_config.get("enabled") is False:
# Gemini can hide thought parts even when internal thinking still
# happens; omit thinkingLevel to avoid model-specific validation quirks.
return {"includeThoughts": False}
effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
if effort == "none":
return {"includeThoughts": False}
thinking_config: Dict[str, Any] = {"includeThoughts": True}
normalized_model = (model or "").strip().lower()
if normalized_model.startswith("google/"):
normalized_model = normalized_model.split("/", 1)[1]
# Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
# coarse effort levels. ``includeThoughts`` alone is enough to surface
# thought parts without risking request validation errors.
if normalized_model.startswith("gemini-2.5-"):
return thinking_config
if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
effort = "medium"
# Gemini 3 Flash supports the wider level set; Gemini 3 Pro is stricter,
# so collapse low-end efforts to ``low`` and high-end efforts to ``high``.
if normalized_model.startswith(("gemini-3", "gemini-3.1")):
if "flash" in normalized_model:
thinking_config["thinkingLevel"] = "high" if effort == "xhigh" else effort
elif "pro" in normalized_model:
thinking_config["thinkingLevel"] = (
"high" if effort in {"high", "xhigh"} else "low"
)
return thinking_config
class ChatCompletionsTransport(ProviderTransport): class ChatCompletionsTransport(ProviderTransport):
"""Transport for api_mode='chat_completions'. """Transport for api_mode='chat_completions'.
@ -241,6 +287,7 @@ class ChatCompletionsTransport(ProviderTransport):
is_openrouter = params.get("is_openrouter", False) is_openrouter = params.get("is_openrouter", False)
is_nous = params.get("is_nous", False) is_nous = params.get("is_nous", False)
is_github_models = params.get("is_github_models", False) is_github_models = params.get("is_github_models", False)
provider_name = str(params.get("provider_name") or "").strip().lower()
provider_prefs = params.get("provider_preferences") provider_prefs = params.get("provider_preferences")
if provider_prefs and is_openrouter: if provider_prefs and is_openrouter:
@ -293,6 +340,11 @@ class ChatCompletionsTransport(ProviderTransport):
if is_qwen: if is_qwen:
extra_body["vl_high_resolution_images"] = True extra_body["vl_high_resolution_images"] = True
if provider_name in {"gemini", "google-gemini-cli"}:
thinking_config = _build_gemini_thinking_config(model, reasoning_config)
if thinking_config:
extra_body["thinking_config"] = thinking_config
# Merge any pre-built extra_body additions # Merge any pre-built extra_body additions
additions = params.get("extra_body_additions") additions = params.get("extra_body_additions")
if additions: if additions:

View file

@ -8094,6 +8094,7 @@ class AIAgent:
supports_reasoning=self._supports_reasoning_extra_body(), supports_reasoning=self._supports_reasoning_extra_body(),
github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None, github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None,
anthropic_max_output=_ant_max, anthropic_max_output=_ant_max,
provider_name=self.provider,
) )
def _supports_reasoning_extra_body(self) -> bool: def _supports_reasoning_extra_body(self) -> bool:

View file

@ -4,7 +4,7 @@ import pytest
from types import SimpleNamespace from types import SimpleNamespace
from agent.transports import get_transport from agent.transports import get_transport
from agent.transports.types import NormalizedResponse, ToolCall from agent.transports.types import NormalizedResponse
@pytest.fixture @pytest.fixture
@ -122,6 +122,75 @@ class TestChatCompletionsBuildKwargs:
) )
assert kw["extra_body"]["think"] is False assert kw["extra_body"]["think"] is False
def test_gemini_without_explicit_reasoning_config_keeps_existing_behavior(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gemini-3-flash-preview",
messages=msgs,
provider_name="gemini",
)
assert "thinking_config" not in kw.get("extra_body", {})
def test_gemini_flash_reasoning_maps_to_thinking_config(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gemini-3-flash-preview",
messages=msgs,
provider_name="gemini",
reasoning_config={"enabled": True, "effort": "high"},
)
assert kw["extra_body"]["thinking_config"] == {
"includeThoughts": True,
"thinkingLevel": "high",
}
def test_gemini_25_reasoning_only_enables_visible_thoughts(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gemini-2.5-flash",
messages=msgs,
provider_name="gemini",
reasoning_config={"enabled": True, "effort": "high"},
)
assert kw["extra_body"]["thinking_config"] == {
"includeThoughts": True,
}
def test_gemini_pro_reasoning_clamps_to_supported_levels(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="google/gemini-3.1-pro-preview",
messages=msgs,
provider_name="gemini",
reasoning_config={"enabled": True, "effort": "medium"},
)
assert kw["extra_body"]["thinking_config"] == {
"includeThoughts": True,
"thinkingLevel": "low",
}
def test_gemini_disabled_reasoning_hides_thoughts(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gemini-3-flash-preview",
messages=msgs,
provider_name="gemini",
reasoning_config={"enabled": False},
)
assert kw["extra_body"]["thinking_config"] == {
"includeThoughts": False,
}
def test_gemini_xhigh_clamps_to_high(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gemini-3-flash-preview",
messages=msgs,
provider_name="gemini",
reasoning_config={"enabled": True, "effort": "xhigh"},
)
assert kw["extra_body"]["thinking_config"]["thinkingLevel"] == "high"
def test_max_tokens_with_fn(self, transport): def test_max_tokens_with_fn(self, transport):
msgs = [{"role": "user", "content": "Hi"}] msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs( kw = transport.build_kwargs(