diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 31d220110..2ce0cefa0 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -113,7 +113,10 @@ DEFAULT_CONTEXT_LENGTHS = { "deepseek": 128000, # Meta "llama": 131072, - # Qwen + # Qwen — specific model families before the catch-all. + # Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/ + "qwen3-coder-plus": 1000000, # 1M context + "qwen3-coder": 262144, # 256K context "qwen": 131072, # MiniMax — official docs: 204,800 context for all models # https://platform.minimax.io/docs/api-reference/text-anthropic-api diff --git a/run_agent.py b/run_agent.py index 2901ef10a..aef1a3b15 100644 --- a/run_agent.py +++ b/run_agent.py @@ -5888,8 +5888,16 @@ class AIAgent: api_kwargs["tools"] = self.tools if self.max_tokens is not None: - if not self._is_qwen_portal(): - api_kwargs.update(self._max_tokens_param(self.max_tokens)) + api_kwargs.update(self._max_tokens_param(self.max_tokens)) + elif self._is_qwen_portal(): + # Qwen Portal defaults to a very low max_tokens when omitted. + # Reasoning models (qwen3-coder-plus) exhaust that budget on + # thinking tokens alone, causing the portal to return + # finish_reason="stop" with truncated output — the agent sees + # this as an intentional stop and exits the loop. Send 65536 + # (the documented max output for qwen3-coder models) so the + # model has adequate output budget for tool calls. + api_kwargs.update(self._max_tokens_param(65536)) elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower(): # OpenRouter and Nous Portal translate requests to Anthropic's # Messages API, which requires max_tokens as a mandatory field. diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index b95c72e13..1eac37e20 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -222,6 +222,24 @@ class TestGetModelContextLength: mock_fetch.return_value = {} assert get_model_context_length("openai/gpt-4o") == 128000 + @patch("agent.model_metadata.fetch_model_metadata") + def test_qwen3_coder_plus_context_length(self, mock_fetch): + """qwen3-coder-plus has a 1M context window, not the generic 128K Qwen default.""" + mock_fetch.return_value = {} + assert get_model_context_length("qwen3-coder-plus") == 1000000 + + @patch("agent.model_metadata.fetch_model_metadata") + def test_qwen3_coder_context_length(self, mock_fetch): + """qwen3-coder has a 256K context window, not the generic 128K Qwen default.""" + mock_fetch.return_value = {} + assert get_model_context_length("qwen3-coder") == 262144 + + @patch("agent.model_metadata.fetch_model_metadata") + def test_qwen_generic_context_length(self, mock_fetch): + """Generic qwen models still get the 128K default.""" + mock_fetch.return_value = {} + assert get_model_context_length("qwen3-plus") == 131072 + @patch("agent.model_metadata.fetch_model_metadata") def test_api_missing_context_length_key(self, mock_fetch): """Model in API but without context_length → defaults to 128000.""" diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 58e67070c..0f2d1d4de 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -953,14 +953,24 @@ class TestBuildApiKwargs: assert kwargs["messages"][0]["content"][0]["text"] == "hi" assert "cache_control" not in kwargs["messages"][0]["content"][0] - def test_qwen_portal_omits_max_tokens(self, agent): + def test_qwen_portal_sends_explicit_max_tokens(self, agent): + """When the user explicitly sets max_tokens, it should be sent to Qwen Portal.""" agent.base_url = "https://portal.qwen.ai/v1" agent._base_url_lower = agent.base_url.lower() agent.max_tokens = 4096 messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) - assert "max_tokens" not in kwargs - assert "max_completion_tokens" not in kwargs + assert kwargs["max_tokens"] == 4096 + + def test_qwen_portal_default_max_tokens(self, agent): + """When max_tokens is None, Qwen Portal gets a default of 65536 + to prevent reasoning models from exhausting their output budget.""" + agent.base_url = "https://portal.qwen.ai/v1" + agent._base_url_lower = agent.base_url.lower() + agent.max_tokens = None + messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert kwargs["max_tokens"] == 65536 class TestBuildAssistantMessage: