diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 0e497c9ec3..ab7075aa18 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -18,6 +18,52 @@ from agent.transports.base import ProviderTransport from agent.transports.types import NormalizedResponse, ToolCall, Usage +def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None: + """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig. + + Gemini native/cloud-code adapters do not read ``extra_body.reasoning``. + They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and + then request thought parts with ``includeThoughts`` enabled. + """ + if reasoning_config is None or not isinstance(reasoning_config, dict): + return None + + if reasoning_config.get("enabled") is False: + # Gemini can hide thought parts even when internal thinking still + # happens; omit thinkingLevel to avoid model-specific validation quirks. + return {"includeThoughts": False} + + effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower() + if effort == "none": + return {"includeThoughts": False} + + thinking_config: Dict[str, Any] = {"includeThoughts": True} + normalized_model = (model or "").strip().lower() + if normalized_model.startswith("google/"): + normalized_model = normalized_model.split("/", 1)[1] + + # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes' + # coarse effort levels. ``includeThoughts`` alone is enough to surface + # thought parts without risking request validation errors. + if normalized_model.startswith("gemini-2.5-"): + return thinking_config + + if effort not in {"minimal", "low", "medium", "high", "xhigh"}: + effort = "medium" + + # Gemini 3 Flash supports the wider level set; Gemini 3 Pro is stricter, + # so collapse low-end efforts to ``low`` and high-end efforts to ``high``. + if normalized_model.startswith(("gemini-3", "gemini-3.1")): + if "flash" in normalized_model: + thinking_config["thinkingLevel"] = "high" if effort == "xhigh" else effort + elif "pro" in normalized_model: + thinking_config["thinkingLevel"] = ( + "high" if effort in {"high", "xhigh"} else "low" + ) + + return thinking_config + + class ChatCompletionsTransport(ProviderTransport): """Transport for api_mode='chat_completions'. @@ -241,6 +287,7 @@ class ChatCompletionsTransport(ProviderTransport): is_openrouter = params.get("is_openrouter", False) is_nous = params.get("is_nous", False) is_github_models = params.get("is_github_models", False) + provider_name = str(params.get("provider_name") or "").strip().lower() provider_prefs = params.get("provider_preferences") if provider_prefs and is_openrouter: @@ -293,6 +340,11 @@ class ChatCompletionsTransport(ProviderTransport): if is_qwen: extra_body["vl_high_resolution_images"] = True + if provider_name in {"gemini", "google-gemini-cli"}: + thinking_config = _build_gemini_thinking_config(model, reasoning_config) + if thinking_config: + extra_body["thinking_config"] = thinking_config + # Merge any pre-built extra_body additions additions = params.get("extra_body_additions") if additions: diff --git a/run_agent.py b/run_agent.py index b4cf706257..9156b39a6b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8094,6 +8094,7 @@ class AIAgent: supports_reasoning=self._supports_reasoning_extra_body(), github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None, anthropic_max_output=_ant_max, + provider_name=self.provider, ) def _supports_reasoning_extra_body(self) -> bool: diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py index 4adf9f72e5..0110f893ca 100644 --- a/tests/agent/transports/test_chat_completions.py +++ b/tests/agent/transports/test_chat_completions.py @@ -4,7 +4,7 @@ import pytest from types import SimpleNamespace from agent.transports import get_transport -from agent.transports.types import NormalizedResponse, ToolCall +from agent.transports.types import NormalizedResponse @pytest.fixture @@ -122,6 +122,75 @@ class TestChatCompletionsBuildKwargs: ) assert kw["extra_body"]["think"] is False + def test_gemini_without_explicit_reasoning_config_keeps_existing_behavior(self, transport): + msgs = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="gemini-3-flash-preview", + messages=msgs, + provider_name="gemini", + ) + assert "thinking_config" not in kw.get("extra_body", {}) + + def test_gemini_flash_reasoning_maps_to_thinking_config(self, transport): + msgs = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="gemini-3-flash-preview", + messages=msgs, + provider_name="gemini", + reasoning_config={"enabled": True, "effort": "high"}, + ) + assert kw["extra_body"]["thinking_config"] == { + "includeThoughts": True, + "thinkingLevel": "high", + } + + def test_gemini_25_reasoning_only_enables_visible_thoughts(self, transport): + msgs = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="gemini-2.5-flash", + messages=msgs, + provider_name="gemini", + reasoning_config={"enabled": True, "effort": "high"}, + ) + assert kw["extra_body"]["thinking_config"] == { + "includeThoughts": True, + } + + def test_gemini_pro_reasoning_clamps_to_supported_levels(self, transport): + msgs = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="google/gemini-3.1-pro-preview", + messages=msgs, + provider_name="gemini", + reasoning_config={"enabled": True, "effort": "medium"}, + ) + assert kw["extra_body"]["thinking_config"] == { + "includeThoughts": True, + "thinkingLevel": "low", + } + + def test_gemini_disabled_reasoning_hides_thoughts(self, transport): + msgs = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="gemini-3-flash-preview", + messages=msgs, + provider_name="gemini", + reasoning_config={"enabled": False}, + ) + assert kw["extra_body"]["thinking_config"] == { + "includeThoughts": False, + } + + def test_gemini_xhigh_clamps_to_high(self, transport): + msgs = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="gemini-3-flash-preview", + messages=msgs, + provider_name="gemini", + reasoning_config={"enabled": True, "effort": "xhigh"}, + ) + assert kw["extra_body"]["thinking_config"]["thinkingLevel"] == "high" + def test_max_tokens_with_fn(self, transport): msgs = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs(