fix(qwen): correct context lengths for qwen3-coder models and send max_tokens to portal

Based on PR #7285 by @kshitijk4poor.

Two bugs affecting Qwen OAuth users:

1. Wrong context window — qwen3-coder-plus showed 128K instead of 1M.
   Added specific entries before the generic qwen catch-all:
   - qwen3-coder-plus: 1,000,000 (corrected from PR's 1,048,576 per
     official Alibaba Cloud docs and OpenRouter)
   - qwen3-coder: 262,144

2. Random stopping — max_tokens was suppressed for Qwen Portal, so the
   server applied its own low default. Reasoning models exhaust that on
   thinking tokens. Now: honor explicit max_tokens, default to 65536
   when unset.

Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
This commit is contained in:
kshitijk4poor 2026-04-11 03:29:09 -07:00 committed by Teknium
parent f459214010
commit af9caec44f
4 changed files with 45 additions and 6 deletions

View file

@ -953,14 +953,24 @@ class TestBuildApiKwargs:
assert kwargs["messages"][0]["content"][0]["text"] == "hi"
assert "cache_control" not in kwargs["messages"][0]["content"][0]
def test_qwen_portal_omits_max_tokens(self, agent):
def test_qwen_portal_sends_explicit_max_tokens(self, agent):
"""When the user explicitly sets max_tokens, it should be sent to Qwen Portal."""
agent.base_url = "https://portal.qwen.ai/v1"
agent._base_url_lower = agent.base_url.lower()
agent.max_tokens = 4096
messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert "max_tokens" not in kwargs
assert "max_completion_tokens" not in kwargs
assert kwargs["max_tokens"] == 4096
def test_qwen_portal_default_max_tokens(self, agent):
"""When max_tokens is None, Qwen Portal gets a default of 65536
to prevent reasoning models from exhausting their output budget."""
agent.base_url = "https://portal.qwen.ai/v1"
agent._base_url_lower = agent.base_url.lower()
agent.max_tokens = None
messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["max_tokens"] == 65536
class TestBuildAssistantMessage: