mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-30 11:52:04 +00:00
fix(auxiliary): preserve max_tokens for NVIDIA NIM aux calls
NVIDIA integrate.api.nvidia.com models such as minimaxai/minimax-m3 can return HTTP 200 with empty choices when max_tokens is omitted. Keep the output cap on auxiliary chat-completions routes, matching the main NVIDIA provider profile behavior.
This commit is contained in:
parent
f53ba9bb54
commit
88e6f9b98c
1 changed files with 15 additions and 1 deletions
|
|
@ -5489,10 +5489,24 @@ def _build_call_kwargs(
|
|||
# ``/anthropic`` endpoint reached through the OpenAI SDK wrapper), where
|
||||
# max_tokens is a MANDATORY field — omitting it is a hard 400. Keep it only
|
||||
# there.
|
||||
#
|
||||
# NVIDIA NIM (integrate.api.nvidia.com and local NIM endpoints) is a
|
||||
# second exception: some models—notably minimaxai/minimax-m3—return HTTP
|
||||
# 200 with an empty choices[] payload when max_tokens is omitted. The main
|
||||
# NVIDIA chat path already sends an output cap via the provider profile;
|
||||
# preserve it on the auxiliary path too.
|
||||
_effective_base = base_url or (
|
||||
_current_custom_base_url() if provider == "custom" else ""
|
||||
)
|
||||
if _is_anthropic_compat_endpoint(provider, _effective_base):
|
||||
_provider_norm = str(provider or "").strip().lower()
|
||||
_is_nvidia_nim = (
|
||||
_provider_norm in {"nvidia", "nvidia-nim", "nim", "build-nvidia", "nemotron"}
|
||||
or base_url_host_matches(_effective_base, "integrate.api.nvidia.com")
|
||||
)
|
||||
if (
|
||||
_is_anthropic_compat_endpoint(provider, _effective_base)
|
||||
or _is_nvidia_nim
|
||||
):
|
||||
kwargs["max_tokens"] = max_tokens
|
||||
|
||||
if tools:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue