diff --git a/run_agent.py b/run_agent.py index 110c3137c4e..625dc5fce27 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2103,6 +2103,59 @@ class AIAgent: content = re.sub(r'\s*', '', content, flags=re.IGNORECASE) return content + @staticmethod + def _has_natural_response_ending(content: str) -> bool: + """Heuristic: does visible assistant text look intentionally finished?""" + if not content: + return False + stripped = content.rstrip() + if not stripped: + return False + if stripped.endswith("```"): + return True + return stripped[-1] in '.!?:)"\']}。!?:)】」』》' + + def _is_ollama_glm_backend(self) -> bool: + """Detect the narrow backend family affected by Ollama/GLM stop misreports.""" + model_lower = (self.model or "").lower() + provider_lower = (self.provider or "").lower() + if "glm" not in model_lower and provider_lower != "zai": + return False + if "ollama" in self._base_url_lower or ":11434" in self._base_url_lower: + return True + return bool(self.base_url and is_local_endpoint(self.base_url)) + + def _should_treat_stop_as_truncated( + self, + finish_reason: str, + assistant_message, + messages: Optional[list] = None, + ) -> bool: + """Detect conservative stop->length misreports for Ollama-hosted GLM models.""" + if finish_reason != "stop" or self.api_mode != "chat_completions": + return False + if not self._is_ollama_glm_backend(): + return False + if not any( + isinstance(msg, dict) and msg.get("role") == "tool" + for msg in (messages or []) + ): + return False + if assistant_message is None or getattr(assistant_message, "tool_calls", None): + return False + + content = getattr(assistant_message, "content", None) + if not isinstance(content, str): + return False + + visible_text = self._strip_think_blocks(content).strip() + if not visible_text: + return False + if len(visible_text) < 20 or not re.search(r"\s", visible_text): + return False + + return not self._has_natural_response_ending(visible_text) + def _looks_like_codex_intermediate_ack( self, user_message: str, @@ -9038,6 +9091,17 @@ class AIAgent: finish_reason = stop_reason_map.get(response.stop_reason, "stop") else: finish_reason = response.choices[0].finish_reason + assistant_message = response.choices[0].message + if self._should_treat_stop_as_truncated( + finish_reason, + assistant_message, + messages, + ): + self._vprint( + f"{self.log_prefix}⚠️ Treating suspicious Ollama/GLM stop response as truncated", + force=True, + ) + finish_reason = "length" if finish_reason == "length": self._vprint(f"{self.log_prefix}⚠️ Response truncated (finish_reason='length') - model hit max output tokens", force=True) diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 5ff1491e4fb..7422f22f1f9 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2202,6 +2202,114 @@ class TestRunConversation: assert second_call_messages[-1]["role"] == "user" assert "truncated by the output length limit" in second_call_messages[-1]["content"] + def test_ollama_glm_stop_after_tools_without_terminal_boundary_requests_continuation(self, agent): + """Ollama-hosted GLM responses can misreport truncated output as stop.""" + self._setup_agent(agent) + agent.base_url = "http://localhost:11434/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "glm-5.1:cloud" + + tool_turn = _mock_response( + content="", + finish_reason="tool_calls", + tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")], + ) + misreported_stop = _mock_response( + content="Based on the search results, the best next", + finish_reason="stop", + ) + continued = _mock_response( + content=" step is to update the config.", + finish_reason="stop", + ) + agent.client.chat.completions.create.side_effect = [ + tool_turn, + misreported_stop, + continued, + ] + + with ( + patch("run_agent.handle_function_call", return_value="search result"), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("hello") + + assert result["completed"] is True + assert result["api_calls"] == 3 + assert ( + result["final_response"] + == "Based on the search results, the best next step is to update the config." + ) + + third_call_messages = agent.client.chat.completions.create.call_args_list[2].kwargs["messages"] + assert third_call_messages[-1]["role"] == "user" + assert "truncated by the output length limit" in third_call_messages[-1]["content"] + + def test_ollama_glm_stop_with_terminal_boundary_does_not_continue(self, agent): + """Complete Ollama/GLM responses should not be reclassified as truncated.""" + self._setup_agent(agent) + agent.base_url = "http://localhost:11434/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "glm-5.1:cloud" + + tool_turn = _mock_response( + content="", + finish_reason="tool_calls", + tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")], + ) + complete_stop = _mock_response( + content="Based on the search results, the best next step is to update the config.", + finish_reason="stop", + ) + agent.client.chat.completions.create.side_effect = [tool_turn, complete_stop] + + with ( + patch("run_agent.handle_function_call", return_value="search result"), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("hello") + + assert result["completed"] is True + assert result["api_calls"] == 2 + assert ( + result["final_response"] + == "Based on the search results, the best next step is to update the config." + ) + + def test_non_ollama_stop_without_terminal_boundary_does_not_continue(self, agent): + """The stop->length workaround should stay scoped to Ollama/GLM backends.""" + self._setup_agent(agent) + agent.base_url = "https://api.openai.com/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "gpt-4o-mini" + + tool_turn = _mock_response( + content="", + finish_reason="tool_calls", + tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")], + ) + normal_stop = _mock_response( + content="Based on the search results, the best next", + finish_reason="stop", + ) + agent.client.chat.completions.create.side_effect = [tool_turn, normal_stop] + + with ( + patch("run_agent.handle_function_call", return_value="search result"), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("hello") + + assert result["completed"] is True + assert result["api_calls"] == 2 + assert result["final_response"] == "Based on the search results, the best next" + def test_length_thinking_exhausted_skips_continuation(self, agent): """When finish_reason='length' but content is only thinking, skip retries.""" self._setup_agent(agent)