diff --git a/tests/run_agent/test_tool_call_guardrail_runtime.py b/tests/run_agent/test_tool_call_guardrail_runtime.py index f1d90502391..e7ab376281a 100644 --- a/tests/run_agent/test_tool_call_guardrail_runtime.py +++ b/tests/run_agent/test_tool_call_guardrail_runtime.py @@ -304,3 +304,52 @@ def test_config_enabled_hard_stop_run_conversation_returns_controlled_guardrail_ call_ids = [tc["id"] for tc in assistant_msg["tool_calls"]] following_results = [m for m in result["messages"] if m.get("role") == "tool" and m.get("tool_call_id") in call_ids] assert len(following_results) == len(call_ids) + + +def test_guardrail_halt_emits_final_response_through_stream_delta_callback(): + """Regression for #30770: when the guardrail halts the loop, the + synthesized halt message must be pushed through ``stream_delta_callback`` + so SSE/TUI clients see why the agent stopped instead of a silent stream + close. Without this the chat-completions SSE writer drains an empty + queue and emits a finish chunk with zero content (indistinguishable + from a crash for Open WebUI and similar clients). + """ + agent = _make_agent("web_search", max_iterations=10, config=_hard_stop_config()) + same_args = {"query": "same"} + responses = [ + _mock_response( + content="", + finish_reason="tool_calls", + tool_calls=[_mock_tool_call("web_search", json.dumps(same_args), f"c{i}")], + ) + for i in range(1, 10) + ] + agent.client.chat.completions.create.side_effect = responses + + deltas: list = [] + agent.stream_delta_callback = lambda d: deltas.append(d) + # The mocked client returns SimpleNamespace responses which aren't + # iterable as streaming chunks; force the non-streaming code path so + # the guardrail-halt branch is reached without engaging the real + # streaming machinery. + agent._disable_streaming = True + + with ( + patch("run_agent.handle_function_call", return_value=json.dumps({"error": "boom"})), + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("search repeatedly") + + assert result["turn_exit_reason"] == "guardrail_halt" + halt_text = result["final_response"] + assert "stopped retrying" in halt_text + + # The halt message must have been pushed through the callback at least + # once. Empty-queue SSE writers were the bug — clients saw no content + # delta before the finish chunk. + text_deltas = [d for d in deltas if isinstance(d, str)] + assert halt_text in text_deltas, ( + f"halt message was never streamed; callback only saw {deltas!r}" + )