From 88e6f9b98cc93cb3efbe86550186615a337dc246 Mon Sep 17 00:00:00 2001 From: HexLab98 Date: Mon, 29 Jun 2026 16:56:09 +0700 Subject: [PATCH] fix(auxiliary): preserve max_tokens for NVIDIA NIM aux calls NVIDIA integrate.api.nvidia.com models such as minimaxai/minimax-m3 can return HTTP 200 with empty choices when max_tokens is omitted. Keep the output cap on auxiliary chat-completions routes, matching the main NVIDIA provider profile behavior. --- agent/auxiliary_client.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index dfeec87e12d..1807e7be2ee 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -5489,10 +5489,24 @@ def _build_call_kwargs( # ``/anthropic`` endpoint reached through the OpenAI SDK wrapper), where # max_tokens is a MANDATORY field — omitting it is a hard 400. Keep it only # there. + # + # NVIDIA NIM (integrate.api.nvidia.com and local NIM endpoints) is a + # second exception: some models—notably minimaxai/minimax-m3—return HTTP + # 200 with an empty choices[] payload when max_tokens is omitted. The main + # NVIDIA chat path already sends an output cap via the provider profile; + # preserve it on the auxiliary path too. _effective_base = base_url or ( _current_custom_base_url() if provider == "custom" else "" ) - if _is_anthropic_compat_endpoint(provider, _effective_base): + _provider_norm = str(provider or "").strip().lower() + _is_nvidia_nim = ( + _provider_norm in {"nvidia", "nvidia-nim", "nim", "build-nvidia", "nemotron"} + or base_url_host_matches(_effective_base, "integrate.api.nvidia.com") + ) + if ( + _is_anthropic_compat_endpoint(provider, _effective_base) + or _is_nvidia_nim + ): kwargs["max_tokens"] = max_tokens if tools: