fix(auxiliary): preserve max_tokens for NVIDIA NIM aux calls

NVIDIA integrate.api.nvidia.com models such as minimaxai/minimax-m3 can return HTTP 200 with empty choices when max_tokens is omitted. Keep the output cap on auxiliary chat-completions routes, matching the main NVIDIA provider profile behavior.
2026-06-30 11:52:04 +00:00 · 2026-06-29 16:56:09 +07:00 · 2026-06-29 16:56:09 +07:00 · 88e6f9b98c
commit 88e6f9b98c
parent f53ba9bb54
1 changed files with 15 additions and 1 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -5489,10 +5489,24 @@ def _build_call_kwargs(
        # ``/anthropic`` endpoint reached through the OpenAI SDK wrapper), where
        # max_tokens is a MANDATORY field — omitting it is a hard 400. Keep it only
        # there.
+        #
+        # NVIDIA NIM (integrate.api.nvidia.com and local NIM endpoints) is a
+        # second exception: some models—notably minimaxai/minimax-m3—return HTTP
+        # 200 with an empty choices[] payload when max_tokens is omitted. The main
+        # NVIDIA chat path already sends an output cap via the provider profile;
+        # preserve it on the auxiliary path too.
        _effective_base = base_url or (
            _current_custom_base_url() if provider == "custom" else ""
        )
-        if _is_anthropic_compat_endpoint(provider, _effective_base):
+        _provider_norm = str(provider or "").strip().lower()
+        _is_nvidia_nim = (
+            _provider_norm in {"nvidia", "nvidia-nim", "nim", "build-nvidia", "nemotron"}
+            or base_url_host_matches(_effective_base, "integrate.api.nvidia.com")
+        )
+        if (
+            _is_anthropic_compat_endpoint(provider, _effective_base)
+            or _is_nvidia_nim
+        ):
            kwargs["max_tokens"] = max_tokens

    if tools: