mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 01:21:43 +00:00
fix(qwen): correct context lengths for qwen3-coder models and send max_tokens to portal
Based on PR #7285 by @kshitijk4poor. Two bugs affecting Qwen OAuth users: 1. Wrong context window — qwen3-coder-plus showed 128K instead of 1M. Added specific entries before the generic qwen catch-all: - qwen3-coder-plus: 1,000,000 (corrected from PR's 1,048,576 per official Alibaba Cloud docs and OpenRouter) - qwen3-coder: 262,144 2. Random stopping — max_tokens was suppressed for Qwen Portal, so the server applied its own low default. Reasoning models exhaust that on thinking tokens. Now: honor explicit max_tokens, default to 65536 when unset. Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
This commit is contained in:
parent
f459214010
commit
af9caec44f
4 changed files with 45 additions and 6 deletions
|
|
@ -953,14 +953,24 @@ class TestBuildApiKwargs:
|
|||
assert kwargs["messages"][0]["content"][0]["text"] == "hi"
|
||||
assert "cache_control" not in kwargs["messages"][0]["content"][0]
|
||||
|
||||
def test_qwen_portal_omits_max_tokens(self, agent):
|
||||
def test_qwen_portal_sends_explicit_max_tokens(self, agent):
|
||||
"""When the user explicitly sets max_tokens, it should be sent to Qwen Portal."""
|
||||
agent.base_url = "https://portal.qwen.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.max_tokens = 4096
|
||||
messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "max_tokens" not in kwargs
|
||||
assert "max_completion_tokens" not in kwargs
|
||||
assert kwargs["max_tokens"] == 4096
|
||||
|
||||
def test_qwen_portal_default_max_tokens(self, agent):
|
||||
"""When max_tokens is None, Qwen Portal gets a default of 65536
|
||||
to prevent reasoning models from exhausting their output budget."""
|
||||
agent.base_url = "https://portal.qwen.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.max_tokens = None
|
||||
messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["max_tokens"] == 65536
|
||||
|
||||
|
||||
class TestBuildAssistantMessage:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue