fix(gemini): bridge reasoning_config into thinking_config for chat-completions routes

2026-05-07 02:51:50 +00:00 · 2026-04-28 17:47:27 +08:00 · 2026-04-28 17:47:27 +08:00 · dbbe2d1973
commit dbbe2d1973
parent 315a11a76f
3 changed files with 123 additions and 1 deletions
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@ -18,6 +18,52 @@ from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage
 def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig.
    Gemini native/cloud-code adapters do not read ``extra_body.reasoning``.
    They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and
    then request thought parts with ``includeThoughts`` enabled.
    """
    if reasoning_config is None or not isinstance(reasoning_config, dict):
        return None
    if reasoning_config.get("enabled") is False:
        # Gemini can hide thought parts even when internal thinking still
        # happens; omit thinkingLevel to avoid model-specific validation quirks.
        return {"includeThoughts": False}
    effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
    if effort == "none":
        return {"includeThoughts": False}
    thinking_config: Dict[str, Any] = {"includeThoughts": True}
    normalized_model = (model or "").strip().lower()
    if normalized_model.startswith("google/"):
        normalized_model = normalized_model.split("/", 1)[1]
    # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
    # coarse effort levels. ``includeThoughts`` alone is enough to surface
    # thought parts without risking request validation errors.
    if normalized_model.startswith("gemini-2.5-"):
        return thinking_config
    if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
        effort = "medium"
    # Gemini 3 Flash supports the wider level set; Gemini 3 Pro is stricter,
    # so collapse low-end efforts to ``low`` and high-end efforts to ``high``.
    if normalized_model.startswith(("gemini-3", "gemini-3.1")):
        if "flash" in normalized_model:
            thinking_config["thinkingLevel"] = "high" if effort == "xhigh" else effort
        elif "pro" in normalized_model:
            thinking_config["thinkingLevel"] = (
                "high" if effort in {"high", "xhigh"} else "low"
            )
    return thinking_config
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.
@ -241,6 +287,7 @@ class ChatCompletionsTransport(ProviderTransport):
        is_openrouter = params.get("is_openrouter", False)
        is_nous = params.get("is_nous", False)
        is_github_models = params.get("is_github_models", False)
        provider_name = str(params.get("provider_name") or "").strip().lower()
        provider_prefs = params.get("provider_preferences")
        if provider_prefs and is_openrouter:
@ -293,6 +340,11 @@ class ChatCompletionsTransport(ProviderTransport):
        if is_qwen:
            extra_body["vl_high_resolution_images"] = True
        if provider_name in {"gemini", "google-gemini-cli"}:
            thinking_config = _build_gemini_thinking_config(model, reasoning_config)
            if thinking_config:
                extra_body["thinking_config"] = thinking_config
        # Merge any pre-built extra_body additions
        additions = params.get("extra_body_additions")
        if additions:
--- a/run_agent.py
+++ b/run_agent.py
@ -8094,6 +8094,7 @@ class AIAgent:
            supports_reasoning=self._supports_reasoning_extra_body(),
            github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None,
            anthropic_max_output=_ant_max,
            provider_name=self.provider,
        )
    def _supports_reasoning_extra_body(self) -> bool:
--- a/tests/agent/transports/test_chat_completions.py
+++ b/tests/agent/transports/test_chat_completions.py
@ -4,7 +4,7 @@ import pytest
 from types import SimpleNamespace
 from agent.transports import get_transport
-from agent.transports.types import NormalizedResponse, ToolCall
+from agent.transports.types import NormalizedResponse
@pytest.fixture
@ -122,6 +122,75 @@ class TestChatCompletionsBuildKwargs:
        )
        assert kw["extra_body"]["think"] is False
    def test_gemini_without_explicit_reasoning_config_keeps_existing_behavior(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gemini-3-flash-preview",
            messages=msgs,
            provider_name="gemini",
        )
        assert "thinking_config" not in kw.get("extra_body", {})
    def test_gemini_flash_reasoning_maps_to_thinking_config(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gemini-3-flash-preview",
            messages=msgs,
            provider_name="gemini",
            reasoning_config={"enabled": True, "effort": "high"},
        )
        assert kw["extra_body"]["thinking_config"] == {
            "includeThoughts": True,
            "thinkingLevel": "high",
        }
    def test_gemini_25_reasoning_only_enables_visible_thoughts(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gemini-2.5-flash",
            messages=msgs,
            provider_name="gemini",
            reasoning_config={"enabled": True, "effort": "high"},
        )
        assert kw["extra_body"]["thinking_config"] == {
            "includeThoughts": True,
        }
    def test_gemini_pro_reasoning_clamps_to_supported_levels(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="google/gemini-3.1-pro-preview",
            messages=msgs,
            provider_name="gemini",
            reasoning_config={"enabled": True, "effort": "medium"},
        )
        assert kw["extra_body"]["thinking_config"] == {
            "includeThoughts": True,
            "thinkingLevel": "low",
        }
    def test_gemini_disabled_reasoning_hides_thoughts(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gemini-3-flash-preview",
            messages=msgs,
            provider_name="gemini",
            reasoning_config={"enabled": False},
        )
        assert kw["extra_body"]["thinking_config"] == {
            "includeThoughts": False,
        }
    def test_gemini_xhigh_clamps_to_high(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
            model="gemini-3-flash-preview",
            messages=msgs,
            provider_name="gemini",
            reasoning_config={"enabled": True, "effort": "xhigh"},
        )
        assert kw["extra_body"]["thinking_config"]["thinkingLevel"] == "high"
    def test_max_tokens_with_fn(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(