// // Copyright (c) 2020 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include #include #include #include "procs.h" template struct TestDef { const char *name; const char *kernel_code; std::function ref; }; template int test_intmath(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, std::string typestr) { TestDef tests[] = { // Test addition { "test_add", R"( __kernel void test_add(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *srcC, __global TYPE *dst) { int tid = get_global_id(0); dst[tid] = srcA[tid] + srcB[tid]; }; )", [](T a, T b, T c) { return a + b; }, }, // Test subtraction { "test_sub", R"( __kernel void test_sub(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *srcC, __global TYPE *dst) { int tid = get_global_id(0); dst[tid] = srcA[tid] - srcB[tid]; }; )", [](T a, T b, T c) { return a - b; }, }, // Test multiplication { "test_mul", R"( __kernel void test_mul(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *srcC, __global TYPE *dst) { int tid = get_global_id(0); dst[tid] = srcA[tid] * srcB[tid]; }; )", [](T a, T b, T c) { return a * b; }, }, // Test multiply-accumulate { "test_mad", R"( __kernel void test_mad(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *srcC, __global TYPE *dst) { int tid = get_global_id(0); dst[tid] = srcA[tid] * srcB[tid] + srcC[tid]; }; )", [](T a, T b, T c) { return a * b + c; }, }, }; clMemWrapper streams[4]; cl_int err; if (std::is_same::value && !gHasLong) { log_info("64-bit integers are not supported on this device. Skipping " "test.\n"); return TEST_SKIPPED_ITSELF; } // Create host buffers and fill with random data. std::vector inputA(num_elements * N); std::vector inputB(num_elements * N); std::vector inputC(num_elements * N); std::vector output(num_elements * N); MTdataHolder d(gRandomSeed); for (int i = 0; i < num_elements; i++) { inputA[i] = (T)genrand_int64(d); inputB[i] = (T)genrand_int64(d); inputC[i] = (T)genrand_int64(d); } size_t datasize = sizeof(T) * num_elements * N; // Create device buffers. for (int i = 0; i < ARRAY_SIZE(streams); i++) { streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &err); test_error(err, "clCreateBuffer failed"); } // Copy input data to device. err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, datasize, inputA.data(), 0, NULL, NULL); test_error(err, "clEnqueueWriteBuffer failed\n"); err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, datasize, inputB.data(), 0, NULL, NULL); test_error(err, "clEnqueueWriteBuffer failed\n"); err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, datasize, inputC.data(), 0, NULL, NULL); test_error(err, "clEnqueueWriteBuffer failed\n"); std::string build_options = "-DTYPE="; build_options += typestr; // Run test for each operation for (auto test : tests) { log_info("%s... ", test.name); // Create kernel and set args clProgramWrapper program; clKernelWrapper kernel; err = create_single_kernel_helper(context, &program, &kernel, 1, &test.kernel_code, test.name, build_options.c_str()); test_error(err, "create_single_kernel_helper failed\n"); err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &streams[1]); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &streams[2]); err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &streams[3]); test_error(err, "clSetKernelArgs failed\n"); // Run kernel size_t threads[1] = { static_cast(num_elements) }; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL); test_error(err, "clEnqueueNDRangeKernel failed\n"); // Read results err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, datasize, output.data(), 0, NULL, NULL); test_error(err, "clEnqueueReadBuffer failed\n"); // Verify results for (int i = 0; i < num_elements * N; i++) { T r = test.ref(inputA[i], inputB[i], inputC[i]); if (r != output[i]) { log_error("\n\nverification failed at index %d\n", i); log_error("-> inputs: %llu, %llu, %llu\n", static_cast(inputA[i]), static_cast(inputB[i]), static_cast(inputC[i])); log_error("-> expected %llu, got %llu\n\n", static_cast(r), static_cast(output[i])); return TEST_FAIL; } } log_info("passed\n"); } return TEST_PASS; } int test_intmath_int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { return test_intmath(device, context, queue, num_elements, "uint"); } int test_intmath_int2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { return test_intmath(device, context, queue, num_elements, "uint2"); } int test_intmath_int4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { return test_intmath(device, context, queue, num_elements, "uint4"); } int test_intmath_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { return test_intmath(device, context, queue, num_elements, "ulong"); } int test_intmath_long2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { return test_intmath(device, context, queue, num_elements, "ulong2"); } int test_intmath_long4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { return test_intmath(device, context, queue, num_elements, "ulong4"); }