From 09ec26c66a130051412e747d49a7ea96f2862b57 Mon Sep 17 00:00:00 2001
From: islam666 <islam666@users.noreply.github.com>
Date: Fri, 5 Jun 2026 06:29:36 +0000
Subject: [PATCH] fix(ollama): set default_max_tokens for custom/Ollama
 provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The custom/Ollama provider profile had no default_max_tokens, so no
max_tokens was sent on requests and Ollama fell back to its internal
num_predict=128 — truncating responses after a few tokens with
finish_reason='length' (#39281, e.g. gemma4).

max_tokens resolution is ephemeral > user model.max_tokens > profile
default, so this is only a floor used when the user hasn't set their own
cap. Set it to 65536 (matching the qwen-oauth tier) rather than a
conservative value, since users can always override per-model.

Fixes #39281
---
 plugins/model-providers/custom/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/plugins/model-providers/custom/__init__.py b/plugins/model-providers/custom/__init__.py
index 65e42e1fbee..6b7b13d5bdb 100644
--- a/plugins/model-providers/custom/__init__.py
+++ b/plugins/model-providers/custom/__init__.py
@@ -63,6 +63,11 @@ custom = CustomProfile(
     ),
     env_vars=(),  # No fixed key — custom endpoint
     base_url="",  # User-configured
+    # Without this, no max_tokens is sent and Ollama falls back to its internal
+    # num_predict=128, truncating responses after a few tokens (#39281). This is
+    # only a floor used when the user hasn't set model.max_tokens — they can
+    # override per-model — so we set it generously rather than lowballing it.
+    default_max_tokens=65536,
 )
 
 register_provider(custom)