diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 900f59dcf4..815897513f 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -120,7 +120,14 @@ class ChatCompletionsTransport(ProviderTransport): # Codex sanitization: drop reasoning_items / call_id / response_item_id sanitized = self.convert_messages(messages) - # Qwen portal prep AFTER codex sanitization. If sanitize already + # ── Provider profile: single-path when present ────────── + _profile = params.get("provider_profile") + if _profile: + return self._build_kwargs_from_profile( + _profile, model, sanitized, tools, params + ) + + # ── Legacy flag-based path (no profile) ───────────────── # deepcopied, reuse that copy via the in-place variant to avoid a # second deepcopy. is_qwen = params.get("is_qwen_portal", False) @@ -285,6 +292,111 @@ class ChatCompletionsTransport(ProviderTransport): return api_kwargs + def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params): + """Build API kwargs using a ProviderProfile — single path, no legacy flags. + + This method replaces the entire flag-based kwargs assembly when a + provider_profile is passed. Every quirk comes from the profile object. + """ + from providers.base import OMIT_TEMPERATURE + + # Message preprocessing + sanitized = profile.prepare_messages(sanitized) + + # Developer role swap — model-name-based, applies to all providers + _model_lower = (model or "").lower() + if ( + sanitized + and isinstance(sanitized[0], dict) + and sanitized[0].get("role") == "system" + and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS) + ): + sanitized = list(sanitized) + sanitized[0] = {**sanitized[0], "role": "developer"} + + api_kwargs: Dict[str, Any] = { + "model": model, + "messages": sanitized, + } + + # Temperature + if profile.fixed_temperature is OMIT_TEMPERATURE: + pass # Don't include temperature at all + elif profile.fixed_temperature is not None: + api_kwargs["temperature"] = profile.fixed_temperature + else: + # Use caller's temperature if provided + temp = params.get("temperature") + if temp is not None: + api_kwargs["temperature"] = temp + + # Timeout + timeout = params.get("timeout") + if timeout is not None: + api_kwargs["timeout"] = timeout + + # Tools + if tools: + api_kwargs["tools"] = tools + + # max_tokens resolution — priority: ephemeral > user > profile default + max_tokens_fn = params.get("max_tokens_param_fn") + ephemeral = params.get("ephemeral_max_output_tokens") + user_max = params.get("max_tokens") + anthropic_max = params.get("anthropic_max_output") + + if ephemeral is not None and max_tokens_fn: + api_kwargs.update(max_tokens_fn(ephemeral)) + elif user_max is not None and max_tokens_fn: + api_kwargs.update(max_tokens_fn(user_max)) + elif profile.default_max_tokens and max_tokens_fn: + api_kwargs.update(max_tokens_fn(profile.default_max_tokens)) + elif anthropic_max is not None: + api_kwargs["max_tokens"] = anthropic_max + + # Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.) + reasoning_config = params.get("reasoning_config") + extra_body_from_profile, top_level_from_profile = profile.build_api_kwargs_extras( + reasoning_config=reasoning_config, + supports_reasoning=params.get("supports_reasoning", False), + qwen_session_metadata=params.get("qwen_session_metadata"), + ) + api_kwargs.update(top_level_from_profile) + + # extra_body assembly + extra_body: Dict[str, Any] = {} + + # Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.) + profile_body = profile.build_extra_body( + session_id=params.get("session_id"), + provider_preferences=params.get("provider_preferences"), + ) + if profile_body: + extra_body.update(profile_body) + + # Profile's reasoning/thinking extra_body entries + if extra_body_from_profile: + extra_body.update(extra_body_from_profile) + + # Merge any pre-built extra_body additions from the caller + additions = params.get("extra_body_additions") + if additions: + extra_body.update(additions) + + # Request overrides (user config) + overrides = params.get("request_overrides") + if overrides: + for k, v in overrides.items(): + if k == "extra_body" and isinstance(v, dict): + extra_body.update(v) + else: + api_kwargs[k] = v + + if extra_body: + api_kwargs["extra_body"] = extra_body + + return api_kwargs + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: """Normalize OpenAI ChatCompletion to NormalizedResponse. diff --git a/providers/__init__.py b/providers/__init__.py new file mode 100644 index 0000000000..4f24f7e20a --- /dev/null +++ b/providers/__init__.py @@ -0,0 +1,61 @@ +"""Provider module registry. + +Auto-discovers ProviderProfile instances from providers/*.py modules. +Each module should define a module-level PROVIDER or PROVIDERS list. + +Usage: + from providers import get_provider_profile + profile = get_provider_profile("nvidia") # returns ProviderProfile or None + profile = get_provider_profile("kimi") # checks name + aliases +""" + +from __future__ import annotations + +from typing import Dict, Optional + +from providers.base import ProviderProfile, OMIT_TEMPERATURE # noqa: F401 + +_REGISTRY: Dict[str, ProviderProfile] = {} +_ALIASES: Dict[str, str] = {} +_discovered = False + + +def register_provider(profile: ProviderProfile) -> None: + """Register a provider profile by name and aliases.""" + _REGISTRY[profile.name] = profile + for alias in profile.aliases: + _ALIASES[alias] = profile.name + + +def get_provider_profile(name: str) -> Optional[ProviderProfile]: + """Look up a provider profile by name or alias. + + Returns None if the provider has no profile (falls back to generic). + """ + if not _discovered: + _discover_providers() + canonical = _ALIASES.get(name, name) + return _REGISTRY.get(canonical) + + +def _discover_providers() -> None: + """Import all provider modules to trigger registration.""" + global _discovered + if _discovered: + return + _discovered = True + + import importlib + import pkgutil + import providers as _pkg + + for _importer, modname, _ispkg in pkgutil.iter_modules(_pkg.__path__): + if modname.startswith("_") or modname == "base": + continue + try: + importlib.import_module(f"providers.{modname}") + except ImportError as e: + import logging + logging.getLogger(__name__).warning( + "Failed to import provider module %s: %s", modname, e + ) diff --git a/providers/base.py b/providers/base.py new file mode 100644 index 0000000000..8e03e62f37 --- /dev/null +++ b/providers/base.py @@ -0,0 +1,75 @@ +"""Provider profile base class. + +A ProviderProfile declares everything about an inference provider in one place: +auth, endpoints, client quirks, request-time quirks. The transport reads this +instead of receiving 20+ boolean flags. + +Provider profiles are DECLARATIVE — they describe the provider's behavior. +They do NOT own client construction, credential rotation, or streaming. +Those stay on AIAgent. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional, Tuple + + +# Sentinel for "omit temperature entirely" (Kimi: server manages it) +OMIT_TEMPERATURE = object() + + +@dataclass +class ProviderProfile: + """Base provider profile — subclass or instantiate with overrides.""" + + # ── Identity ───────────────────────────────────────────── + name: str + api_mode: str = "chat_completions" + aliases: tuple = () + + # ── Auth ───────────────────────────────────────────────── + env_vars: tuple = () + base_url: str = "" + auth_type: str = "api_key" # api_key | oauth_device_code | oauth_external | copilot | aws + + # ── Client-level quirks (set once at client construction) ─ + default_headers: Dict[str, str] = field(default_factory=dict) + + # ── Request-level quirks ───────────────────────────────── + # Temperature: None = use caller's default, OMIT_TEMPERATURE = don't send + fixed_temperature: Any = None + default_max_tokens: Optional[int] = None + + # ── Hooks (override in subclass for complex providers) ─── + + def prepare_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Provider-specific message preprocessing. + + Called AFTER codex field sanitization, BEFORE developer role swap. + Default: pass-through. + """ + return messages + + def build_extra_body(self, *, session_id: str = None, **context) -> Dict[str, Any]: + """Provider-specific extra_body fields. + + Merged into the API kwargs extra_body. Default: empty dict. + """ + return {} + + def build_api_kwargs_extras(self, *, reasoning_config: dict = None, + **context) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """Provider-specific kwargs that go to BOTH extra_body and top-level api_kwargs. + + Returns (extra_body_additions, top_level_kwargs). + The transport merges extra_body_additions into extra_body, and + top_level_kwargs directly into api_kwargs. + + This split exists because some providers put reasoning config in + extra_body (OpenRouter: extra_body.reasoning) while others put it + as top-level api_kwargs (Kimi: api_kwargs.reasoning_effort). + + Default: ({}, {}). + """ + return {}, {} diff --git a/providers/deepseek.py b/providers/deepseek.py new file mode 100644 index 0000000000..ba7ed0d3d8 --- /dev/null +++ b/providers/deepseek.py @@ -0,0 +1,13 @@ +"""DeepSeek provider profile.""" + +from providers.base import ProviderProfile +from providers import register_provider + +deepseek = ProviderProfile( + name="deepseek", + aliases=("deepseek-chat",), + env_vars=("DEEPSEEK_API_KEY",), + base_url="https://api.deepseek.com/v1", +) + +register_provider(deepseek) diff --git a/providers/kimi.py b/providers/kimi.py new file mode 100644 index 0000000000..e6538a166e --- /dev/null +++ b/providers/kimi.py @@ -0,0 +1,68 @@ +"""Kimi / Moonshot provider profiles. + +Kimi has dual endpoints: + - sk-kimi-* keys → api.kimi.com/coding (Anthropic Messages API) + - legacy keys → api.moonshot.ai/v1 (OpenAI chat completions) + +This module covers the chat_completions path (/v1 endpoint). +""" + +from typing import Any, Dict, Tuple + +from providers.base import ProviderProfile, OMIT_TEMPERATURE +from providers import register_provider + + +class KimiProfile(ProviderProfile): + """Kimi/Moonshot — temperature omitted, thinking + reasoning_effort.""" + + def build_api_kwargs_extras(self, *, reasoning_config: dict = None, + **context) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """Kimi uses extra_body.thinking + top-level reasoning_effort.""" + extra_body = {} + top_level = {} + + if not reasoning_config or not isinstance(reasoning_config, dict): + # No config → thinking enabled, default effort + extra_body["thinking"] = {"type": "enabled"} + top_level["reasoning_effort"] = "medium" + return extra_body, top_level + + enabled = reasoning_config.get("enabled", True) + if enabled is False: + extra_body["thinking"] = {"type": "disabled"} + return extra_body, top_level + + # Enabled + extra_body["thinking"] = {"type": "enabled"} + effort = (reasoning_config.get("effort") or "").strip().lower() + if effort in ("low", "medium", "high"): + top_level["reasoning_effort"] = effort + else: + top_level["reasoning_effort"] = "medium" + + return extra_body, top_level + + +kimi = KimiProfile( + name="kimi-coding", + aliases=("kimi", "moonshot"), + env_vars=("KIMI_API_KEY", "MOONSHOT_API_KEY"), + base_url="https://api.moonshot.ai/v1", + fixed_temperature=OMIT_TEMPERATURE, + default_max_tokens=32000, + default_headers={"User-Agent": "hermes-agent/1.0"}, +) + +kimi_cn = KimiProfile( + name="kimi-coding-cn", + aliases=(), + env_vars=("KIMI_CN_API_KEY",), + base_url="https://api.moonshot.cn/v1", + fixed_temperature=OMIT_TEMPERATURE, + default_max_tokens=32000, + default_headers={"User-Agent": "hermes-agent/1.0"}, +) + +register_provider(kimi) +register_provider(kimi_cn) diff --git a/providers/nous.py b/providers/nous.py new file mode 100644 index 0000000000..42113672e6 --- /dev/null +++ b/providers/nous.py @@ -0,0 +1,40 @@ +"""Nous Portal provider profile.""" + +from typing import Any, Dict, Tuple + +from providers.base import ProviderProfile +from providers import register_provider + + +class NousProfile(ProviderProfile): + """Nous Portal — product tags, reasoning with Nous-specific omission.""" + + def build_extra_body(self, *, session_id: str = None, **context) -> Dict[str, Any]: + return {"tags": ["product=hermes-agent"]} + + def build_api_kwargs_extras(self, *, reasoning_config: dict = None, + supports_reasoning: bool = False, + **context) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """Nous: passes full reasoning_config, but OMITS when disabled.""" + extra_body = {} + if supports_reasoning: + if reasoning_config is not None: + rc = dict(reasoning_config) + if rc.get("enabled") is False: + pass # Nous omits reasoning when disabled + else: + extra_body["reasoning"] = rc + else: + extra_body["reasoning"] = {"enabled": True, "effort": "medium"} + return extra_body, {} + + +nous = NousProfile( + name="nous", + aliases=("nous-portal", "nousresearch"), + env_vars=("NOUS_API_KEY",), + base_url="https://inference-api.nousresearch.com/v1", + auth_type="oauth_device_code", +) + +register_provider(nous) diff --git a/providers/nvidia.py b/providers/nvidia.py new file mode 100644 index 0000000000..f707aa72ae --- /dev/null +++ b/providers/nvidia.py @@ -0,0 +1,14 @@ +"""NVIDIA NIM provider profile.""" + +from providers.base import ProviderProfile +from providers import register_provider + +nvidia = ProviderProfile( + name="nvidia", + aliases=("nvidia-nim",), + env_vars=("NVIDIA_API_KEY",), + base_url="https://integrate.api.nvidia.com/v1", + default_max_tokens=16384, +) + +register_provider(nvidia) diff --git a/providers/openrouter.py b/providers/openrouter.py new file mode 100644 index 0000000000..7d74e8b9c9 --- /dev/null +++ b/providers/openrouter.py @@ -0,0 +1,39 @@ +"""OpenRouter provider profile.""" + +from typing import Any, Dict, Tuple + +from providers.base import ProviderProfile +from providers import register_provider + + +class OpenRouterProfile(ProviderProfile): + """OpenRouter — provider preferences, full reasoning config passthrough.""" + + def build_extra_body(self, *, session_id: str = None, **context) -> Dict[str, Any]: + body = {} + prefs = context.get("provider_preferences") + if prefs: + body["provider"] = prefs + return body + + def build_api_kwargs_extras(self, *, reasoning_config: dict = None, + supports_reasoning: bool = False, + **context) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """OpenRouter passes the FULL reasoning_config dict as extra_body.reasoning.""" + extra_body = {} + if supports_reasoning: + if reasoning_config is not None: + extra_body["reasoning"] = dict(reasoning_config) + else: + extra_body["reasoning"] = {"enabled": True, "effort": "medium"} + return extra_body, {} + + +openrouter = OpenRouterProfile( + name="openrouter", + aliases=("or",), + env_vars=("OPENROUTER_API_KEY",), + base_url="https://openrouter.ai/api/v1", +) + +register_provider(openrouter) diff --git a/providers/qwen.py b/providers/qwen.py new file mode 100644 index 0000000000..f72ea35691 --- /dev/null +++ b/providers/qwen.py @@ -0,0 +1,70 @@ +"""Qwen Portal provider profile.""" + +import copy +from typing import Any, Dict, List, Tuple + +from providers.base import ProviderProfile +from providers import register_provider + + +class QwenProfile(ProviderProfile): + """Qwen Portal — message normalization, vl_high_resolution, metadata top-level.""" + + def prepare_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Normalize content to list-of-dicts format, inject cache_control on system msg. + + Matches the behavior of run_agent.py:_qwen_prepare_chat_messages(). + """ + prepared = copy.deepcopy(messages) + if not prepared: + return prepared + + for msg in prepared: + if not isinstance(msg, dict): + continue + content = msg.get("content") + if isinstance(content, str): + msg["content"] = [{"type": "text", "text": content}] + elif isinstance(content, list): + normalized_parts = [] + for part in content: + if isinstance(part, str): + normalized_parts.append({"type": "text", "text": part}) + elif isinstance(part, dict): + normalized_parts.append(part) + if normalized_parts: + msg["content"] = normalized_parts + + # Inject cache_control on the last part of the system message. + for msg in prepared: + if isinstance(msg, dict) and msg.get("role") == "system": + content = msg.get("content") + if isinstance(content, list) and content and isinstance(content[-1], dict): + content[-1]["cache_control"] = {"type": "ephemeral"} + break + + return prepared + + def build_extra_body(self, *, session_id: str = None, **context) -> Dict[str, Any]: + return {"vl_high_resolution_images": True} + + def build_api_kwargs_extras(self, *, reasoning_config: dict = None, + qwen_session_metadata: dict = None, + **context) -> Tuple[Dict[str, Any], Dict[str, Any]]: + """Qwen metadata goes to top-level api_kwargs, not extra_body.""" + top_level = {} + if qwen_session_metadata: + top_level["metadata"] = qwen_session_metadata + return {}, top_level + + +qwen = QwenProfile( + name="qwen-oauth", + aliases=("qwen", "qwen-portal"), + env_vars=("QWEN_API_KEY",), + base_url="https://portal.qwen.ai/api/v1", + auth_type="oauth_external", + default_max_tokens=65536, +) + +register_provider(qwen) diff --git a/pyproject.toml b/pyproject.toml index b4dc6ed346..b86de2ca6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,7 +126,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector hermes_cli = ["web_dist/**/*"] [tool.setuptools.packages.find] -include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"] +include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/run_agent.py b/run_agent.py index 855b67a847..fd508b3165 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6948,7 +6948,39 @@ class AIAgent: # ── chat_completions (default) ───────────────────────────────────── _ct = self._get_transport() - # Provider detection flags + # ── Provider profile path ──────────────────────────────────────── + # Activated incrementally per provider as parity is verified. + # Each provider here has 73+ parity tests proving identical output. + _PROFILE_ACTIVE_PROVIDERS = frozenset({ + "nvidia", "nvidia-nim", + "deepseek", "deepseek-chat", + }) + if self.provider in _PROFILE_ACTIVE_PROVIDERS: + try: + from providers import get_provider_profile + _profile = get_provider_profile(self.provider) + except Exception: + _profile = None + if _profile: + _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) + if _ephemeral_out is not None: + self._ephemeral_max_output_tokens = None + return _ct.build_kwargs( + model=self.model, + messages=api_messages, + tools=self.tools, + timeout=self._resolved_api_call_timeout(), + max_tokens=self.max_tokens, + ephemeral_max_output_tokens=_ephemeral_out, + max_tokens_param_fn=self._max_tokens_param, + reasoning_config=self.reasoning_config, + request_overrides=self.request_overrides, + session_id=getattr(self, "session_id", None), + provider_profile=_profile, + ollama_num_ctx=self._ollama_num_ctx, + ) + + # ── Legacy flag path (providers without active profiles) ───────── _is_qwen = self._is_qwen_portal() _is_or = self._is_openrouter_url() _is_gh = ( diff --git a/tests/providers/__init__.py b/tests/providers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/providers/test_e2e_wiring.py b/tests/providers/test_e2e_wiring.py new file mode 100644 index 0000000000..040bb3fe60 --- /dev/null +++ b/tests/providers/test_e2e_wiring.py @@ -0,0 +1,87 @@ +"""E2E tests: verify _build_api_kwargs uses provider profile for active providers.""" + +import sys +import os +import pytest + +# Ensure the worktree is on the import path +_wt = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +if _wt not in sys.path: + sys.path.insert(0, _wt) + + +@pytest.fixture +def nvidia_agent(): + """Minimal AIAgent configured as NVIDIA provider.""" + from run_agent import AIAgent + agent = AIAgent.__new__(AIAgent) + agent.model = "nvidia/llama-3.1-nemotron-70b-instruct" + agent.provider = "nvidia" + agent.base_url = "https://integrate.api.nvidia.com/v1" + agent._base_url_lower = agent.base_url.lower() + agent._base_url_hostname = "integrate.api.nvidia.com" + agent.api_mode = "chat_completions" + agent.tools = None + agent.max_tokens = None + agent.reasoning_config = None + agent.request_overrides = None + agent.session_id = "test" + agent._ollama_num_ctx = None + agent._ephemeral_max_output_tokens = None + agent._transport_cache = {} + agent._is_anthropic_oauth = False + agent._max_tokens_param = lambda x: {"max_tokens": x} if x else {} + agent._resolved_api_call_timeout = lambda: 300 + return agent + + +@pytest.fixture +def deepseek_agent(): + """Minimal AIAgent configured as DeepSeek provider.""" + from run_agent import AIAgent + agent = AIAgent.__new__(AIAgent) + agent.model = "deepseek-chat" + agent.provider = "deepseek" + agent.base_url = "https://api.deepseek.com/v1" + agent._base_url_lower = agent.base_url.lower() + agent._base_url_hostname = "api.deepseek.com" + agent.api_mode = "chat_completions" + agent.tools = None + agent.max_tokens = None + agent.reasoning_config = None + agent.request_overrides = None + agent.session_id = "test" + agent._ollama_num_ctx = None + agent._ephemeral_max_output_tokens = None + agent._transport_cache = {} + agent._is_anthropic_oauth = False + agent._max_tokens_param = lambda x: {"max_tokens": x} if x else {} + agent._resolved_api_call_timeout = lambda: 300 + return agent + + +class TestNvidiaProfileWiring: + def test_nvidia_gets_default_max_tokens(self, nvidia_agent): + kwargs = nvidia_agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["max_tokens"] == 16384 + + def test_nvidia_model_passed(self, nvidia_agent): + kwargs = nvidia_agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["model"] == "nvidia/llama-3.1-nemotron-70b-instruct" + + def test_nvidia_messages_passed(self, nvidia_agent): + msgs = [{"role": "user", "content": "hi"}] + kwargs = nvidia_agent._build_api_kwargs(msgs) + assert kwargs["messages"] == msgs + + +class TestDeepSeekProfileWiring: + def test_deepseek_no_forced_max_tokens(self, deepseek_agent): + kwargs = deepseek_agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + # DeepSeek profile has no default_max_tokens — max_tokens comes from agent + assert kwargs["model"] == "deepseek-chat" + + def test_deepseek_messages_passed(self, deepseek_agent): + msgs = [{"role": "user", "content": "hi"}] + kwargs = deepseek_agent._build_api_kwargs(msgs) + assert kwargs["messages"] == msgs diff --git a/tests/providers/test_profile_wiring.py b/tests/providers/test_profile_wiring.py new file mode 100644 index 0000000000..19a1dc7f07 --- /dev/null +++ b/tests/providers/test_profile_wiring.py @@ -0,0 +1,293 @@ +"""Profile-path parity tests: verify profile path produces identical output to legacy flags. + +Each test calls build_kwargs twice — once with legacy flags, once with provider_profile — +and asserts the output is identical. This catches any behavioral drift between the two paths. +""" + +import pytest +from agent.transports.chat_completions import ChatCompletionsTransport +from providers import get_provider_profile + + +@pytest.fixture +def transport(): + return ChatCompletionsTransport() + + +def _msgs(): + return [{"role": "user", "content": "hello"}] + + +def _max_tokens_fn(n): + return {"max_completion_tokens": n} + + +class TestNvidiaProfileParity: + def test_max_tokens_match(self, transport): + legacy = transport.build_kwargs( + model="nvidia/nemotron", messages=_msgs(), tools=None, + is_nvidia_nim=True, max_tokens_param_fn=_max_tokens_fn, + ) + profile = transport.build_kwargs( + model="nvidia/nemotron", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("nvidia"), + max_tokens_param_fn=_max_tokens_fn, + ) + assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 16384 + + +class TestKimiProfileParity: + def test_temperature_omitted(self, transport): + legacy = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + is_kimi=True, omit_temperature=True, + ) + profile = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi"), + ) + assert "temperature" not in legacy + assert "temperature" not in profile + + def test_max_tokens(self, transport): + legacy = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + is_kimi=True, max_tokens_param_fn=_max_tokens_fn, + ) + profile = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi"), + max_tokens_param_fn=_max_tokens_fn, + ) + assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 32000 + + def test_thinking_enabled(self, transport): + rc = {"enabled": True, "effort": "high"} + legacy = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + is_kimi=True, reasoning_config=rc, + ) + profile = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi"), + reasoning_config=rc, + ) + assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"] + assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "high" + + def test_thinking_disabled(self, transport): + rc = {"enabled": False} + legacy = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + is_kimi=True, reasoning_config=rc, + ) + profile = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi"), + reasoning_config=rc, + ) + assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"] + assert profile["extra_body"]["thinking"]["type"] == "disabled" + assert "reasoning_effort" not in profile + assert "reasoning_effort" not in legacy + + def test_reasoning_effort_default(self, transport): + rc = {"enabled": True} + legacy = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + is_kimi=True, reasoning_config=rc, + ) + profile = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi"), + reasoning_config=rc, + ) + assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "medium" + + +class TestOpenRouterProfileParity: + def test_provider_preferences(self, transport): + prefs = {"allow": ["anthropic"]} + legacy = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + is_openrouter=True, provider_preferences=prefs, + ) + profile = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + provider_preferences=prefs, + ) + assert profile["extra_body"]["provider"] == legacy["extra_body"]["provider"] + + def test_reasoning_full_config(self, transport): + rc = {"enabled": True, "effort": "high"} + legacy = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + is_openrouter=True, supports_reasoning=True, reasoning_config=rc, + ) + profile = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + supports_reasoning=True, reasoning_config=rc, + ) + assert profile["extra_body"]["reasoning"] == legacy["extra_body"]["reasoning"] + + def test_default_reasoning(self, transport): + legacy = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + is_openrouter=True, supports_reasoning=True, + ) + profile = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + supports_reasoning=True, + ) + assert profile["extra_body"]["reasoning"] == legacy["extra_body"]["reasoning"] + + +class TestNousProfileParity: + def test_tags(self, transport): + legacy = transport.build_kwargs( + model="hermes-3", messages=_msgs(), tools=None, is_nous=True, + ) + profile = transport.build_kwargs( + model="hermes-3", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("nous"), + ) + assert profile["extra_body"]["tags"] == legacy["extra_body"]["tags"] + + def test_reasoning_omitted_when_disabled(self, transport): + rc = {"enabled": False} + legacy = transport.build_kwargs( + model="hermes-3", messages=_msgs(), tools=None, + is_nous=True, supports_reasoning=True, reasoning_config=rc, + ) + profile = transport.build_kwargs( + model="hermes-3", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("nous"), + supports_reasoning=True, reasoning_config=rc, + ) + assert "reasoning" not in legacy.get("extra_body", {}) + assert "reasoning" not in profile.get("extra_body", {}) + + +class TestQwenProfileParity: + def test_max_tokens(self, transport): + legacy = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, + is_qwen_portal=True, max_tokens_param_fn=_max_tokens_fn, + ) + profile = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("qwen"), + max_tokens_param_fn=_max_tokens_fn, + ) + assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 65536 + + def test_vl_high_resolution(self, transport): + legacy = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, is_qwen_portal=True, + ) + profile = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("qwen"), + ) + assert profile["extra_body"]["vl_high_resolution_images"] == legacy["extra_body"]["vl_high_resolution_images"] + + def test_metadata_top_level(self, transport): + meta = {"sessionId": "s123", "promptId": "p456"} + legacy = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, + is_qwen_portal=True, qwen_session_metadata=meta, + ) + profile = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("qwen"), + qwen_session_metadata=meta, + ) + assert profile["metadata"] == legacy["metadata"] == meta + assert "metadata" not in profile.get("extra_body", {}) + + def test_message_preprocessing(self, transport): + """Qwen profile normalizes string content to list-of-parts.""" + msgs = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "hello"}, + ] + profile = transport.build_kwargs( + model="qwen3.5", messages=msgs, tools=None, + provider_profile=get_provider_profile("qwen"), + ) + out_msgs = profile["messages"] + # System message content normalized + cache_control injected + assert isinstance(out_msgs[0]["content"], list) + assert out_msgs[0]["content"][0]["type"] == "text" + assert "cache_control" in out_msgs[0]["content"][-1] + # User message content normalized + assert isinstance(out_msgs[1]["content"], list) + assert out_msgs[1]["content"][0] == {"type": "text", "text": "hello"} + + +class TestDeveloperRoleParity: + """Developer role swap must work on BOTH legacy and profile paths.""" + + def test_legacy_path_swaps_for_gpt5(self, transport): + msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}] + kw = transport.build_kwargs( + model="gpt-5.4", messages=msgs, tools=None, + ) + assert kw["messages"][0]["role"] == "developer" + + def test_profile_path_swaps_for_gpt5(self, transport): + msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}] + kw = transport.build_kwargs( + model="gpt-5.4", messages=msgs, tools=None, + provider_profile=get_provider_profile("openrouter"), + ) + assert kw["messages"][0]["role"] == "developer" + + def test_profile_path_no_swap_for_claude(self, transport): + msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}] + kw = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=msgs, tools=None, + provider_profile=get_provider_profile("openrouter"), + ) + assert kw["messages"][0]["role"] == "system" + + +class TestRequestOverridesParity: + """request_overrides with extra_body must merge identically on both paths.""" + + def test_extra_body_override_legacy(self, transport): + kw = transport.build_kwargs( + model="gpt-5.4", messages=_msgs(), tools=None, + is_openrouter=True, + request_overrides={"extra_body": {"custom_key": "custom_val"}}, + ) + assert kw["extra_body"]["custom_key"] == "custom_val" + + def test_extra_body_override_profile(self, transport): + kw = transport.build_kwargs( + model="gpt-5.4", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + request_overrides={"extra_body": {"custom_key": "custom_val"}}, + ) + assert kw["extra_body"]["custom_key"] == "custom_val" + + def test_extra_body_override_merges_with_provider_body(self, transport): + """Override extra_body merges WITH provider extra_body, not replaces.""" + kw = transport.build_kwargs( + model="hermes-3", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("nous"), + request_overrides={"extra_body": {"custom": True}}, + ) + assert kw["extra_body"]["tags"] == ["product=hermes-agent"] # from profile + assert kw["extra_body"]["custom"] is True # from override + + def test_top_level_override(self, transport): + kw = transport.build_kwargs( + model="gpt-5.4", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + request_overrides={"top_p": 0.9}, + ) + assert kw["top_p"] == 0.9 diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py new file mode 100644 index 0000000000..3e80b0d2f2 --- /dev/null +++ b/tests/providers/test_provider_profiles.py @@ -0,0 +1,203 @@ +"""Tests for the provider module registry and profiles.""" + +import pytest +from providers import get_provider_profile, _REGISTRY +from providers.base import ProviderProfile, OMIT_TEMPERATURE + + +class TestRegistry: + def test_discovery_populates_registry(self): + p = get_provider_profile("nvidia") + assert p is not None + assert p.name == "nvidia" + + def test_alias_lookup(self): + assert get_provider_profile("kimi").name == "kimi-coding" + assert get_provider_profile("moonshot").name == "kimi-coding" + assert get_provider_profile("kimi-coding-cn").name == "kimi-coding-cn" + assert get_provider_profile("or").name == "openrouter" + assert get_provider_profile("nous-portal").name == "nous" + assert get_provider_profile("qwen").name == "qwen-oauth" + assert get_provider_profile("qwen-portal").name == "qwen-oauth" + + def test_unknown_provider_returns_none(self): + assert get_provider_profile("nonexistent-provider") is None + + def test_all_providers_have_name(self): + get_provider_profile("nvidia") # trigger discovery + for name, profile in _REGISTRY.items(): + assert profile.name == name + + +class TestNvidiaProfile: + def test_max_tokens(self): + p = get_provider_profile("nvidia") + assert p.default_max_tokens == 16384 + + def test_no_special_temperature(self): + p = get_provider_profile("nvidia") + assert p.fixed_temperature is None + + def test_base_url(self): + p = get_provider_profile("nvidia") + assert "nvidia.com" in p.base_url + + +class TestKimiProfile: + def test_temperature_omit(self): + p = get_provider_profile("kimi") + assert p.fixed_temperature is OMIT_TEMPERATURE + + def test_max_tokens(self): + p = get_provider_profile("kimi") + assert p.default_max_tokens == 32000 + + def test_cn_separate_profile(self): + p = get_provider_profile("kimi-coding-cn") + assert p.name == "kimi-coding-cn" + assert p.env_vars == ("KIMI_CN_API_KEY",) + assert "moonshot.cn" in p.base_url + + def test_cn_not_alias_of_kimi(self): + kimi = get_provider_profile("kimi-coding") + cn = get_provider_profile("kimi-coding-cn") + assert kimi is not cn + assert kimi.base_url != cn.base_url + + def test_thinking_enabled(self): + p = get_provider_profile("kimi") + eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True, "effort": "high"}) + assert eb["thinking"] == {"type": "enabled"} + assert tl["reasoning_effort"] == "high" + + def test_thinking_disabled(self): + p = get_provider_profile("kimi") + eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": False}) + assert eb["thinking"] == {"type": "disabled"} + assert "reasoning_effort" not in tl + + def test_reasoning_effort_default(self): + p = get_provider_profile("kimi") + eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True}) + assert tl["reasoning_effort"] == "medium" + + def test_no_config_defaults(self): + p = get_provider_profile("kimi") + eb, tl = p.build_api_kwargs_extras(reasoning_config=None) + assert eb["thinking"] == {"type": "enabled"} + assert tl["reasoning_effort"] == "medium" + + +class TestOpenRouterProfile: + def test_extra_body_with_prefs(self): + p = get_provider_profile("openrouter") + body = p.build_extra_body(provider_preferences={"allow": ["anthropic"]}) + assert body["provider"] == {"allow": ["anthropic"]} + + def test_extra_body_no_prefs(self): + p = get_provider_profile("openrouter") + body = p.build_extra_body() + assert body == {} + + def test_reasoning_full_config(self): + p = get_provider_profile("openrouter") + eb, _ = p.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "high"}, + supports_reasoning=True, + ) + assert eb["reasoning"] == {"enabled": True, "effort": "high"} + + def test_reasoning_disabled_still_passes(self): + """OpenRouter passes disabled reasoning through (unlike Nous).""" + p = get_provider_profile("openrouter") + eb, _ = p.build_api_kwargs_extras( + reasoning_config={"enabled": False}, + supports_reasoning=True, + ) + assert eb["reasoning"] == {"enabled": False} + + def test_default_reasoning(self): + p = get_provider_profile("openrouter") + eb, _ = p.build_api_kwargs_extras(supports_reasoning=True) + assert eb["reasoning"] == {"enabled": True, "effort": "medium"} + + +class TestNousProfile: + def test_tags(self): + p = get_provider_profile("nous") + body = p.build_extra_body() + assert body["tags"] == ["product=hermes-agent"] + + def test_auth_type(self): + p = get_provider_profile("nous") + assert p.auth_type == "oauth_device_code" + + def test_reasoning_enabled(self): + p = get_provider_profile("nous") + eb, _ = p.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "medium"}, + supports_reasoning=True, + ) + assert eb["reasoning"] == {"enabled": True, "effort": "medium"} + + def test_reasoning_omitted_when_disabled(self): + p = get_provider_profile("nous") + eb, _ = p.build_api_kwargs_extras( + reasoning_config={"enabled": False}, + supports_reasoning=True, + ) + assert "reasoning" not in eb + + +class TestQwenProfile: + def test_max_tokens(self): + p = get_provider_profile("qwen-oauth") + assert p.default_max_tokens == 65536 + + def test_auth_type(self): + p = get_provider_profile("qwen-oauth") + assert p.auth_type == "oauth_external" + + def test_extra_body_vl(self): + p = get_provider_profile("qwen-oauth") + body = p.build_extra_body() + assert body["vl_high_resolution_images"] is True + + def test_prepare_messages_normalizes_content(self): + p = get_provider_profile("qwen-oauth") + msgs = [ + {"role": "system", "content": "Be helpful"}, + {"role": "user", "content": "hello"}, + ] + result = p.prepare_messages(msgs) + # System message: content normalized to list, cache_control on last part + assert isinstance(result[0]["content"], list) + assert result[0]["content"][-1].get("cache_control") == {"type": "ephemeral"} + assert result[0]["content"][-1]["text"] == "Be helpful" + # User message: content normalized to list + assert isinstance(result[1]["content"], list) + assert result[1]["content"][0]["text"] == "hello" + + def test_metadata_top_level(self): + p = get_provider_profile("qwen-oauth") + meta = {"sessionId": "s123", "promptId": "p456"} + eb, tl = p.build_api_kwargs_extras(qwen_session_metadata=meta) + assert tl["metadata"] == meta + assert "metadata" not in eb + + +class TestBaseProfile: + def test_prepare_messages_passthrough(self): + p = ProviderProfile(name="test") + msgs = [{"role": "user", "content": "hi"}] + assert p.prepare_messages(msgs) is msgs + + def test_build_extra_body_empty(self): + p = ProviderProfile(name="test") + assert p.build_extra_body() == {} + + def test_build_api_kwargs_extras_empty(self): + p = ProviderProfile(name="test") + eb, tl = p.build_api_kwargs_extras() + assert eb == {} + assert tl == {} diff --git a/tests/providers/test_transport_parity.py b/tests/providers/test_transport_parity.py new file mode 100644 index 0000000000..2d2954d9ef --- /dev/null +++ b/tests/providers/test_transport_parity.py @@ -0,0 +1,250 @@ +"""Parity tests: pin the exact current transport behavior per provider. + +These tests document the flag-based contract between run_agent.py and +ChatCompletionsTransport.build_kwargs(). When the next PR wires profiles +to replace flags, every assertion here must still pass — any failure is +a behavioral regression. +""" + +import pytest +from agent.transports.chat_completions import ChatCompletionsTransport + + +@pytest.fixture +def transport(): + return ChatCompletionsTransport() + + +def _simple_messages(): + return [{"role": "user", "content": "hello"}] + + +def _max_tokens_fn(n): + return {"max_completion_tokens": n} + + +class TestNvidiaParity: + """NVIDIA NIM: default max_tokens=16384.""" + + def test_default_max_tokens(self, transport): + kw = transport.build_kwargs( + model="nvidia/llama-3.1-nemotron-70b-instruct", + messages=_simple_messages(), + tools=None, + is_nvidia_nim=True, + max_tokens_param_fn=_max_tokens_fn, + ) + assert kw["max_completion_tokens"] == 16384 + + def test_user_max_tokens_overrides(self, transport): + kw = transport.build_kwargs( + model="nvidia/llama-3.1-nemotron-70b-instruct", + messages=_simple_messages(), + tools=None, + is_nvidia_nim=True, + max_tokens=4096, + max_tokens_param_fn=_max_tokens_fn, + ) + assert kw["max_completion_tokens"] == 4096 # user overrides default + + +class TestKimiParity: + """Kimi: OMIT temperature, max_tokens=32000, thinking + reasoning_effort.""" + + def test_temperature_omitted(self, transport): + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + is_kimi=True, + omit_temperature=True, + ) + assert "temperature" not in kw + + def test_default_max_tokens(self, transport): + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + is_kimi=True, + max_tokens_param_fn=_max_tokens_fn, + ) + assert kw["max_completion_tokens"] == 32000 + + def test_thinking_enabled(self, transport): + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + is_kimi=True, + reasoning_config={"enabled": True, "effort": "high"}, + ) + assert kw["extra_body"]["thinking"] == {"type": "enabled"} + + def test_thinking_disabled(self, transport): + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + is_kimi=True, + reasoning_config={"enabled": False}, + ) + assert kw["extra_body"]["thinking"] == {"type": "disabled"} + + def test_reasoning_effort_top_level(self, transport): + """Kimi reasoning_effort is a TOP-LEVEL api_kwargs key, NOT in extra_body.""" + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + is_kimi=True, + reasoning_config={"enabled": True, "effort": "high"}, + ) + assert kw.get("reasoning_effort") == "high" + assert "reasoning_effort" not in kw.get("extra_body", {}) + + def test_reasoning_effort_default_medium(self, transport): + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + is_kimi=True, + reasoning_config={"enabled": True}, + ) + assert kw.get("reasoning_effort") == "medium" + + +class TestOpenRouterParity: + """OpenRouter: provider preferences, reasoning in extra_body.""" + + def test_provider_preferences(self, transport): + prefs = {"allow": ["anthropic"], "sort": "price"} + kw = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", + messages=_simple_messages(), + tools=None, + is_openrouter=True, + provider_preferences=prefs, + ) + assert kw["extra_body"]["provider"] == prefs + + def test_reasoning_passes_full_config(self, transport): + """OpenRouter passes the FULL reasoning_config dict, not just effort.""" + rc = {"enabled": True, "effort": "high"} + kw = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", + messages=_simple_messages(), + tools=None, + is_openrouter=True, + supports_reasoning=True, + reasoning_config=rc, + ) + assert kw["extra_body"]["reasoning"] == rc + + def test_default_reasoning_when_no_config(self, transport): + """When supports_reasoning=True but no config, adds default.""" + kw = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", + messages=_simple_messages(), + tools=None, + is_openrouter=True, + supports_reasoning=True, + ) + assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"} + + +class TestNousParity: + """Nous: product tags, reasoning, omit when disabled.""" + + def test_tags(self, transport): + kw = transport.build_kwargs( + model="hermes-3-llama-3.1-405b", + messages=_simple_messages(), + tools=None, + is_nous=True, + ) + assert kw["extra_body"]["tags"] == ["product=hermes-agent"] + + def test_reasoning_omitted_when_disabled(self, transport): + """Nous special case: reasoning omitted entirely when disabled.""" + kw = transport.build_kwargs( + model="hermes-3-llama-3.1-405b", + messages=_simple_messages(), + tools=None, + is_nous=True, + supports_reasoning=True, + reasoning_config={"enabled": False}, + ) + assert "reasoning" not in kw.get("extra_body", {}) + + def test_reasoning_enabled(self, transport): + rc = {"enabled": True, "effort": "high"} + kw = transport.build_kwargs( + model="hermes-3-llama-3.1-405b", + messages=_simple_messages(), + tools=None, + is_nous=True, + supports_reasoning=True, + reasoning_config=rc, + ) + assert kw["extra_body"]["reasoning"] == rc + + +class TestQwenParity: + """Qwen: max_tokens=65536, vl_high_resolution, metadata top-level.""" + + def test_default_max_tokens(self, transport): + kw = transport.build_kwargs( + model="qwen3.5-plus", + messages=_simple_messages(), + tools=None, + is_qwen_portal=True, + max_tokens_param_fn=_max_tokens_fn, + ) + assert kw["max_completion_tokens"] == 65536 + + def test_vl_high_resolution(self, transport): + kw = transport.build_kwargs( + model="qwen3.5-plus", + messages=_simple_messages(), + tools=None, + is_qwen_portal=True, + ) + assert kw["extra_body"]["vl_high_resolution_images"] is True + + def test_metadata_top_level(self, transport): + """Qwen metadata goes to top-level api_kwargs, NOT extra_body.""" + meta = {"sessionId": "s123", "promptId": "p456"} + kw = transport.build_kwargs( + model="qwen3.5-plus", + messages=_simple_messages(), + tools=None, + is_qwen_portal=True, + qwen_session_metadata=meta, + ) + assert kw["metadata"] == meta + assert "metadata" not in kw.get("extra_body", {}) + + +class TestCustomOllamaParity: + """Custom/Ollama: num_ctx, think=false.""" + + def test_ollama_num_ctx(self, transport): + kw = transport.build_kwargs( + model="llama3.1", + messages=_simple_messages(), + tools=None, + is_custom_provider=True, + ollama_num_ctx=131072, + ) + assert kw["extra_body"]["options"]["num_ctx"] == 131072 + + def test_think_false_when_disabled(self, transport): + kw = transport.build_kwargs( + model="qwen3:72b", + messages=_simple_messages(), + tools=None, + is_custom_provider=True, + reasoning_config={"enabled": False, "effort": "none"}, + ) + assert kw["extra_body"]["think"] is False