fix(auxiliary): preserve max_tokens for NVIDIA NIM aux calls

NVIDIA integrate.api.nvidia.com models such as minimaxai/minimax-m3 can
return HTTP 200 with empty choices when max_tokens is omitted. Keep the
output cap on auxiliary chat-completions routes, matching the main NVIDIA
provider profile behavior.
This commit is contained in:
HexLab98 2026-06-29 16:56:09 +07:00 committed by kshitij
parent f53ba9bb54
commit 88e6f9b98c

View file

@ -5489,10 +5489,24 @@ def _build_call_kwargs(
# ``/anthropic`` endpoint reached through the OpenAI SDK wrapper), where
# max_tokens is a MANDATORY field — omitting it is a hard 400. Keep it only
# there.
#
# NVIDIA NIM (integrate.api.nvidia.com and local NIM endpoints) is a
# second exception: some models—notably minimaxai/minimax-m3—return HTTP
# 200 with an empty choices[] payload when max_tokens is omitted. The main
# NVIDIA chat path already sends an output cap via the provider profile;
# preserve it on the auxiliary path too.
_effective_base = base_url or (
_current_custom_base_url() if provider == "custom" else ""
)
if _is_anthropic_compat_endpoint(provider, _effective_base):
_provider_norm = str(provider or "").strip().lower()
_is_nvidia_nim = (
_provider_norm in {"nvidia", "nvidia-nim", "nim", "build-nvidia", "nemotron"}
or base_url_host_matches(_effective_base, "integrate.api.nvidia.com")
)
if (
_is_anthropic_compat_endpoint(provider, _effective_base)
or _is_nvidia_nim
):
kwargs["max_tokens"] = max_tokens
if tools: