295 lines
7.8 KiB
C++
295 lines
7.8 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "testBase.h"
|
|
|
|
#include <vector>
|
|
|
|
const char *pragma_unroll_kernels[] = {
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" __attribute__((opencl_unroll_hint))\n"
|
|
" for(size_t i = 0; i < 100; ++i)\n"
|
|
" dst[i] = i;\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" __attribute__((opencl_unroll_hint(1)))\n"
|
|
" for(size_t i = 0; i < 100; ++i)\n"
|
|
" dst[i] = i;\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" __attribute__((opencl_unroll_hint(10)))\n"
|
|
" for(size_t i = 0; i < 100; ++i)\n"
|
|
" dst[i] = i;\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" __attribute__((opencl_unroll_hint(100)))\n"
|
|
" for(size_t i = 0; i < 100; ++i)\n"
|
|
" dst[i] = i;\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" __attribute__((opencl_unroll_hint))\n"
|
|
" for(size_t i = 0; i < n; ++i)\n"
|
|
" dst[i] = i;\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" __attribute__((opencl_unroll_hint(1)))\n"
|
|
" for(size_t i = 0; i < n; ++i)\n"
|
|
" dst[i] = i;\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" __attribute__((opencl_unroll_hint(10)))\n"
|
|
" for(size_t i = 0; i < n; ++i)\n"
|
|
" dst[i] = i;\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" __attribute__((opencl_unroll_hint(100)))\n"
|
|
" for(size_t i = 0; i < n; ++i)\n"
|
|
" dst[i] = i;\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint))\n"
|
|
" while(i < 100) {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" }\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(1)))\n"
|
|
" while(i < 100) {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" }\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(10)))\n"
|
|
" while(i < 100) {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" }\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(100)))\n"
|
|
" while(i < 100) {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" }\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint))\n"
|
|
" while(i < n) {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" }\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(1)))\n"
|
|
" while(i < n) {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" }\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(10)))\n"
|
|
" while(i < n) {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" }\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(100)))\n"
|
|
" while(i < n) {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" }\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint))\n"
|
|
" do {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" } while(i < 100);\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(1)))\n"
|
|
" do {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" } while(i < 100);\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(10)))\n"
|
|
" do {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" } while(i < 100);\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(100)))\n"
|
|
" do {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" } while(i < 100);\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint))\n"
|
|
" do {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" } while(i < n);\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(1)))\n"
|
|
" do {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" } while(i < n);\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(10)))\n"
|
|
" do {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" } while(i < n);\n"
|
|
"}\n",
|
|
"__kernel void pragma_unroll(__global uint *dst)\n"
|
|
"{\n"
|
|
" size_t tid = get_global_id(0);\n"
|
|
" size_t n = (tid + 1) * 100;\n"
|
|
" size_t i = 0;\n"
|
|
" __attribute__((opencl_unroll_hint(100)))\n"
|
|
" do {\n"
|
|
" dst[i] = i;\n"
|
|
" ++i;\n"
|
|
" } while(i < n);\n"
|
|
"}\n",
|
|
};
|
|
|
|
int test_pragma_unroll(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
const size_t ELEMENT_NUM = 100;
|
|
const size_t KERNEL_NUM = 24;
|
|
|
|
cl_int error;
|
|
|
|
//execute all kernels and check if the results are as expected
|
|
for (size_t kernelIdx = 0; kernelIdx < KERNEL_NUM; ++kernelIdx) {
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
if (create_single_kernel_helper(
|
|
context, &program, &kernel, 1,
|
|
(const char **)&pragma_unroll_kernels[kernelIdx], "pragma_unroll"))
|
|
{
|
|
log_error("The program we attempted to compile was: \n%s\n",
|
|
pragma_unroll_kernels[kernelIdx]);
|
|
return -1;
|
|
}
|
|
|
|
clMemWrapper buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, ELEMENT_NUM * sizeof(cl_uint), NULL, &error);
|
|
test_error(error, "clCreateBuffer failed");
|
|
|
|
error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
|
|
test_error(error, "clSetKernelArg failed");
|
|
|
|
//only one thread should be enough to verify if kernel is fully functional
|
|
size_t workSize = 1;
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &workSize, NULL, 0, NULL, NULL);
|
|
test_error(error, "clEnqueueNDRangeKernel failed");
|
|
|
|
std::vector<cl_uint> results(ELEMENT_NUM, 0);
|
|
error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, ELEMENT_NUM * sizeof(cl_uint), &results[0], 0, NULL, NULL);
|
|
test_error(error, "clEnqueueReadBuffer failed");
|
|
|
|
for (size_t i = 0; i < ELEMENT_NUM; ++i) {
|
|
if (results[i] != i) {
|
|
log_error("Kernel %d returned invalid result. Test: %d, expected: %d\n", kernelIdx + 1, results[i], i);
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|