241 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			241 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			C++
		
	
	
	
| //
 | |
| // Copyright (c) 2020 The Khronos Group Inc.
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //    http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| //
 | |
| 
 | |
| #include <functional>
 | |
| #include <string>
 | |
| #include <vector>
 | |
| 
 | |
| #include "procs.h"
 | |
| 
 | |
| template <typename T> struct TestDef
 | |
| {
 | |
|     const char *name;
 | |
|     const char *kernel_code;
 | |
|     std::function<T(T, T, T)> ref;
 | |
| };
 | |
| 
 | |
| template <typename T, unsigned N>
 | |
| int test_intmath(cl_device_id device, cl_context context,
 | |
|                  cl_command_queue queue, int num_elements, std::string typestr)
 | |
| {
 | |
|     TestDef<T> tests[] = {
 | |
|         // Test addition
 | |
|         {
 | |
|             "test_add",
 | |
|             R"(
 | |
|   __kernel void test_add(__global TYPE *srcA,
 | |
|                          __global TYPE *srcB,
 | |
|                          __global TYPE *srcC,
 | |
|                          __global TYPE *dst)
 | |
|   {
 | |
|       int  tid = get_global_id(0);
 | |
|       dst[tid] = srcA[tid] + srcB[tid];
 | |
|   };
 | |
| )",
 | |
|             [](T a, T b, T c) { return a + b; },
 | |
|         },
 | |
| 
 | |
|         // Test subtraction
 | |
|         {
 | |
|             "test_sub",
 | |
|             R"(
 | |
|   __kernel void test_sub(__global TYPE *srcA,
 | |
|                          __global TYPE *srcB,
 | |
|                          __global TYPE *srcC,
 | |
|                          __global TYPE *dst)
 | |
|   {
 | |
|       int  tid = get_global_id(0);
 | |
|       dst[tid] = srcA[tid] - srcB[tid];
 | |
|   };
 | |
| )",
 | |
|             [](T a, T b, T c) { return a - b; },
 | |
|         },
 | |
| 
 | |
|         // Test multiplication
 | |
|         {
 | |
|             "test_mul",
 | |
|             R"(
 | |
|   __kernel void test_mul(__global TYPE *srcA,
 | |
|                          __global TYPE *srcB,
 | |
|                          __global TYPE *srcC,
 | |
|                          __global TYPE *dst)
 | |
|   {
 | |
|       int  tid = get_global_id(0);
 | |
|       dst[tid] = srcA[tid] * srcB[tid];
 | |
|   };
 | |
| )",
 | |
|             [](T a, T b, T c) { return a * b; },
 | |
|         },
 | |
| 
 | |
|         // Test multiply-accumulate
 | |
|         {
 | |
|             "test_mad",
 | |
|             R"(
 | |
|   __kernel void test_mad(__global TYPE *srcA,
 | |
|                          __global TYPE *srcB,
 | |
|                          __global TYPE *srcC,
 | |
|                          __global TYPE *dst)
 | |
|   {
 | |
|       int  tid = get_global_id(0);
 | |
|       dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];
 | |
|   };
 | |
| )",
 | |
|             [](T a, T b, T c) { return a * b + c; },
 | |
|         },
 | |
|     };
 | |
| 
 | |
|     clMemWrapper streams[4];
 | |
|     cl_int err;
 | |
| 
 | |
|     if (std::is_same<T, cl_ulong>::value && !gHasLong)
 | |
|     {
 | |
|         log_info("64-bit integers are not supported on this device. Skipping "
 | |
|                  "test.\n");
 | |
|         return TEST_SKIPPED_ITSELF;
 | |
|     }
 | |
| 
 | |
|     // Create host buffers and fill with random data.
 | |
|     std::vector<T> inputA(num_elements * N);
 | |
|     std::vector<T> inputB(num_elements * N);
 | |
|     std::vector<T> inputC(num_elements * N);
 | |
|     std::vector<T> output(num_elements * N);
 | |
|     MTdataHolder d(gRandomSeed);
 | |
|     for (int i = 0; i < num_elements; i++)
 | |
|     {
 | |
|         inputA[i] = (T)genrand_int64(d);
 | |
|         inputB[i] = (T)genrand_int64(d);
 | |
|         inputC[i] = (T)genrand_int64(d);
 | |
|     }
 | |
| 
 | |
|     size_t datasize = sizeof(T) * num_elements * N;
 | |
| 
 | |
|     // Create device buffers.
 | |
|     for (int i = 0; i < ARRAY_SIZE(streams); i++)
 | |
|     {
 | |
|         streams[i] =
 | |
|             clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &err);
 | |
|         test_error(err, "clCreateBuffer failed");
 | |
|     }
 | |
| 
 | |
|     // Copy input data to device.
 | |
|     err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, datasize,
 | |
|                                inputA.data(), 0, NULL, NULL);
 | |
|     test_error(err, "clEnqueueWriteBuffer failed\n");
 | |
|     err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, datasize,
 | |
|                                inputB.data(), 0, NULL, NULL);
 | |
|     test_error(err, "clEnqueueWriteBuffer failed\n");
 | |
|     err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, datasize,
 | |
|                                inputC.data(), 0, NULL, NULL);
 | |
|     test_error(err, "clEnqueueWriteBuffer failed\n");
 | |
| 
 | |
|     std::string build_options = "-DTYPE=";
 | |
|     build_options += typestr;
 | |
| 
 | |
|     // Run test for each operation
 | |
|     for (auto test : tests)
 | |
|     {
 | |
|         log_info("%s... ", test.name);
 | |
| 
 | |
|         // Create kernel and set args
 | |
|         clProgramWrapper program;
 | |
|         clKernelWrapper kernel;
 | |
|         err = create_single_kernel_helper(context, &program, &kernel, 1,
 | |
|                                           &test.kernel_code, test.name,
 | |
|                                           build_options.c_str());
 | |
|         test_error(err, "create_single_kernel_helper failed\n");
 | |
| 
 | |
|         err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]);
 | |
|         err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &streams[1]);
 | |
|         err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &streams[2]);
 | |
|         err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &streams[3]);
 | |
|         test_error(err, "clSetKernelArgs failed\n");
 | |
| 
 | |
|         // Run kernel
 | |
|         size_t threads[1] = { static_cast<size_t>(num_elements) };
 | |
|         err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0,
 | |
|                                      NULL, NULL);
 | |
|         test_error(err, "clEnqueueNDRangeKernel failed\n");
 | |
| 
 | |
|         // Read results
 | |
|         err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, datasize,
 | |
|                                   output.data(), 0, NULL, NULL);
 | |
|         test_error(err, "clEnqueueReadBuffer failed\n");
 | |
| 
 | |
|         // Verify results
 | |
|         for (int i = 0; i < num_elements * N; i++)
 | |
|         {
 | |
|             T r = test.ref(inputA[i], inputB[i], inputC[i]);
 | |
|             if (r != output[i])
 | |
|             {
 | |
|                 log_error("\n\nverification failed at index %d\n", i);
 | |
|                 log_error("-> inputs: %llu, %llu, %llu\n",
 | |
|                           static_cast<cl_uint>(inputA[i]),
 | |
|                           static_cast<cl_uint>(inputB[i]),
 | |
|                           static_cast<cl_uint>(inputC[i]));
 | |
|                 log_error("-> expected %llu, got %llu\n\n",
 | |
|                           static_cast<cl_uint>(r),
 | |
|                           static_cast<cl_uint>(output[i]));
 | |
|                 return TEST_FAIL;
 | |
|             }
 | |
|         }
 | |
|         log_info("passed\n");
 | |
|     }
 | |
| 
 | |
|     return TEST_PASS;
 | |
| }
 | |
| 
 | |
| int test_intmath_int(cl_device_id device, cl_context context,
 | |
|                      cl_command_queue queue, int num_elements)
 | |
| {
 | |
|     return test_intmath<cl_uint, 1>(device, context, queue, num_elements,
 | |
|                                     "uint");
 | |
| }
 | |
| 
 | |
| int test_intmath_int2(cl_device_id device, cl_context context,
 | |
|                       cl_command_queue queue, int num_elements)
 | |
| {
 | |
|     return test_intmath<cl_uint, 2>(device, context, queue, num_elements,
 | |
|                                     "uint2");
 | |
| }
 | |
| 
 | |
| int test_intmath_int4(cl_device_id device, cl_context context,
 | |
|                       cl_command_queue queue, int num_elements)
 | |
| {
 | |
|     return test_intmath<cl_uint, 4>(device, context, queue, num_elements,
 | |
|                                     "uint4");
 | |
| }
 | |
| 
 | |
| int test_intmath_long(cl_device_id device, cl_context context,
 | |
|                       cl_command_queue queue, int num_elements)
 | |
| {
 | |
|     return test_intmath<cl_ulong, 1>(device, context, queue, num_elements,
 | |
|                                      "ulong");
 | |
| }
 | |
| 
 | |
| int test_intmath_long2(cl_device_id device, cl_context context,
 | |
|                        cl_command_queue queue, int num_elements)
 | |
| {
 | |
|     return test_intmath<cl_ulong, 2>(device, context, queue, num_elements,
 | |
|                                      "ulong2");
 | |
| }
 | |
| 
 | |
| int test_intmath_long4(cl_device_id device, cl_context context,
 | |
|                        cl_command_queue queue, int num_elements)
 | |
| {
 | |
|     return test_intmath<cl_ulong, 4>(device, context, queue, num_elements,
 | |
|                                      "ulong4");
 | |
| }
 |