From 88e6f9b98cc93cb3efbe86550186615a337dc246 Mon Sep 17 00:00:00 2001
From: HexLab98 <liruixinch@outlook.com>
Date: Mon, 29 Jun 2026 16:56:09 +0700
Subject: [PATCH] fix(auxiliary): preserve max_tokens for NVIDIA NIM aux calls

NVIDIA integrate.api.nvidia.com models such as minimaxai/minimax-m3 can
return HTTP 200 with empty choices when max_tokens is omitted. Keep the
output cap on auxiliary chat-completions routes, matching the main NVIDIA
provider profile behavior.
---
 agent/auxiliary_client.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index dfeec87e12d..1807e7be2ee 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -5489,10 +5489,24 @@ def _build_call_kwargs(
         # ``/anthropic`` endpoint reached through the OpenAI SDK wrapper), where
         # max_tokens is a MANDATORY field — omitting it is a hard 400. Keep it only
         # there.
+        #
+        # NVIDIA NIM (integrate.api.nvidia.com and local NIM endpoints) is a
+        # second exception: some models—notably minimaxai/minimax-m3—return HTTP
+        # 200 with an empty choices[] payload when max_tokens is omitted. The main
+        # NVIDIA chat path already sends an output cap via the provider profile;
+        # preserve it on the auxiliary path too.
         _effective_base = base_url or (
             _current_custom_base_url() if provider == "custom" else ""
         )
-        if _is_anthropic_compat_endpoint(provider, _effective_base):
+        _provider_norm = str(provider or "").strip().lower()
+        _is_nvidia_nim = (
+            _provider_norm in {"nvidia", "nvidia-nim", "nim", "build-nvidia", "nemotron"}
+            or base_url_host_matches(_effective_base, "integrate.api.nvidia.com")
+        )
+        if (
+            _is_anthropic_compat_endpoint(provider, _effective_base)
+            or _is_nvidia_nim
+        ):
             kwargs["max_tokens"] = max_tokens
 
     if tools: