mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-07 02:51:50 +00:00
fix(gemini): bridge reasoning_config into thinking_config for chat-completions routes
This commit is contained in:
parent
315a11a76f
commit
dbbe2d1973
3 changed files with 123 additions and 1 deletions
|
|
@ -18,6 +18,52 @@ from agent.transports.base import ProviderTransport
|
||||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||||
|
|
||||||
|
|
||||||
|
def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
|
||||||
|
"""Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig.
|
||||||
|
|
||||||
|
Gemini native/cloud-code adapters do not read ``extra_body.reasoning``.
|
||||||
|
They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and
|
||||||
|
then request thought parts with ``includeThoughts`` enabled.
|
||||||
|
"""
|
||||||
|
if reasoning_config is None or not isinstance(reasoning_config, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
if reasoning_config.get("enabled") is False:
|
||||||
|
# Gemini can hide thought parts even when internal thinking still
|
||||||
|
# happens; omit thinkingLevel to avoid model-specific validation quirks.
|
||||||
|
return {"includeThoughts": False}
|
||||||
|
|
||||||
|
effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
|
||||||
|
if effort == "none":
|
||||||
|
return {"includeThoughts": False}
|
||||||
|
|
||||||
|
thinking_config: Dict[str, Any] = {"includeThoughts": True}
|
||||||
|
normalized_model = (model or "").strip().lower()
|
||||||
|
if normalized_model.startswith("google/"):
|
||||||
|
normalized_model = normalized_model.split("/", 1)[1]
|
||||||
|
|
||||||
|
# Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
|
||||||
|
# coarse effort levels. ``includeThoughts`` alone is enough to surface
|
||||||
|
# thought parts without risking request validation errors.
|
||||||
|
if normalized_model.startswith("gemini-2.5-"):
|
||||||
|
return thinking_config
|
||||||
|
|
||||||
|
if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
|
||||||
|
effort = "medium"
|
||||||
|
|
||||||
|
# Gemini 3 Flash supports the wider level set; Gemini 3 Pro is stricter,
|
||||||
|
# so collapse low-end efforts to ``low`` and high-end efforts to ``high``.
|
||||||
|
if normalized_model.startswith(("gemini-3", "gemini-3.1")):
|
||||||
|
if "flash" in normalized_model:
|
||||||
|
thinking_config["thinkingLevel"] = "high" if effort == "xhigh" else effort
|
||||||
|
elif "pro" in normalized_model:
|
||||||
|
thinking_config["thinkingLevel"] = (
|
||||||
|
"high" if effort in {"high", "xhigh"} else "low"
|
||||||
|
)
|
||||||
|
|
||||||
|
return thinking_config
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionsTransport(ProviderTransport):
|
class ChatCompletionsTransport(ProviderTransport):
|
||||||
"""Transport for api_mode='chat_completions'.
|
"""Transport for api_mode='chat_completions'.
|
||||||
|
|
||||||
|
|
@ -241,6 +287,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
is_openrouter = params.get("is_openrouter", False)
|
is_openrouter = params.get("is_openrouter", False)
|
||||||
is_nous = params.get("is_nous", False)
|
is_nous = params.get("is_nous", False)
|
||||||
is_github_models = params.get("is_github_models", False)
|
is_github_models = params.get("is_github_models", False)
|
||||||
|
provider_name = str(params.get("provider_name") or "").strip().lower()
|
||||||
|
|
||||||
provider_prefs = params.get("provider_preferences")
|
provider_prefs = params.get("provider_preferences")
|
||||||
if provider_prefs and is_openrouter:
|
if provider_prefs and is_openrouter:
|
||||||
|
|
@ -293,6 +340,11 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||||
if is_qwen:
|
if is_qwen:
|
||||||
extra_body["vl_high_resolution_images"] = True
|
extra_body["vl_high_resolution_images"] = True
|
||||||
|
|
||||||
|
if provider_name in {"gemini", "google-gemini-cli"}:
|
||||||
|
thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||||
|
if thinking_config:
|
||||||
|
extra_body["thinking_config"] = thinking_config
|
||||||
|
|
||||||
# Merge any pre-built extra_body additions
|
# Merge any pre-built extra_body additions
|
||||||
additions = params.get("extra_body_additions")
|
additions = params.get("extra_body_additions")
|
||||||
if additions:
|
if additions:
|
||||||
|
|
|
||||||
|
|
@ -8094,6 +8094,7 @@ class AIAgent:
|
||||||
supports_reasoning=self._supports_reasoning_extra_body(),
|
supports_reasoning=self._supports_reasoning_extra_body(),
|
||||||
github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None,
|
github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None,
|
||||||
anthropic_max_output=_ant_max,
|
anthropic_max_output=_ant_max,
|
||||||
|
provider_name=self.provider,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _supports_reasoning_extra_body(self) -> bool:
|
def _supports_reasoning_extra_body(self) -> bool:
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ import pytest
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
|
|
||||||
from agent.transports import get_transport
|
from agent.transports import get_transport
|
||||||
from agent.transports.types import NormalizedResponse, ToolCall
|
from agent.transports.types import NormalizedResponse
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
@ -122,6 +122,75 @@ class TestChatCompletionsBuildKwargs:
|
||||||
)
|
)
|
||||||
assert kw["extra_body"]["think"] is False
|
assert kw["extra_body"]["think"] is False
|
||||||
|
|
||||||
|
def test_gemini_without_explicit_reasoning_config_keeps_existing_behavior(self, transport):
|
||||||
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gemini-3-flash-preview",
|
||||||
|
messages=msgs,
|
||||||
|
provider_name="gemini",
|
||||||
|
)
|
||||||
|
assert "thinking_config" not in kw.get("extra_body", {})
|
||||||
|
|
||||||
|
def test_gemini_flash_reasoning_maps_to_thinking_config(self, transport):
|
||||||
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gemini-3-flash-preview",
|
||||||
|
messages=msgs,
|
||||||
|
provider_name="gemini",
|
||||||
|
reasoning_config={"enabled": True, "effort": "high"},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["thinking_config"] == {
|
||||||
|
"includeThoughts": True,
|
||||||
|
"thinkingLevel": "high",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_gemini_25_reasoning_only_enables_visible_thoughts(self, transport):
|
||||||
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gemini-2.5-flash",
|
||||||
|
messages=msgs,
|
||||||
|
provider_name="gemini",
|
||||||
|
reasoning_config={"enabled": True, "effort": "high"},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["thinking_config"] == {
|
||||||
|
"includeThoughts": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_gemini_pro_reasoning_clamps_to_supported_levels(self, transport):
|
||||||
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="google/gemini-3.1-pro-preview",
|
||||||
|
messages=msgs,
|
||||||
|
provider_name="gemini",
|
||||||
|
reasoning_config={"enabled": True, "effort": "medium"},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["thinking_config"] == {
|
||||||
|
"includeThoughts": True,
|
||||||
|
"thinkingLevel": "low",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_gemini_disabled_reasoning_hides_thoughts(self, transport):
|
||||||
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gemini-3-flash-preview",
|
||||||
|
messages=msgs,
|
||||||
|
provider_name="gemini",
|
||||||
|
reasoning_config={"enabled": False},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["thinking_config"] == {
|
||||||
|
"includeThoughts": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_gemini_xhigh_clamps_to_high(self, transport):
|
||||||
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
|
kw = transport.build_kwargs(
|
||||||
|
model="gemini-3-flash-preview",
|
||||||
|
messages=msgs,
|
||||||
|
provider_name="gemini",
|
||||||
|
reasoning_config={"enabled": True, "effort": "xhigh"},
|
||||||
|
)
|
||||||
|
assert kw["extra_body"]["thinking_config"]["thinkingLevel"] == "high"
|
||||||
|
|
||||||
def test_max_tokens_with_fn(self, transport):
|
def test_max_tokens_with_fn(self, transport):
|
||||||
msgs = [{"role": "user", "content": "Hi"}]
|
msgs = [{"role": "user", "content": "Hi"}]
|
||||||
kw = transport.build_kwargs(
|
kw = transport.build_kwargs(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue