330 lines
11 KiB
C++
330 lines
11 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "procs.h"
|
|
#include "harness/conversions.h"
|
|
#include "harness/typeWrappers.h"
|
|
|
|
struct get_test_data
|
|
{
|
|
cl_uint subGroupSize;
|
|
cl_uint maxSubGroupSize;
|
|
cl_uint numSubGroups;
|
|
cl_uint enqNumSubGroups;
|
|
cl_uint subGroupId;
|
|
cl_uint subGroupLocalId;
|
|
bool operator==(get_test_data x)
|
|
{
|
|
return subGroupSize == x.subGroupSize
|
|
&& maxSubGroupSize == x.maxSubGroupSize
|
|
&& numSubGroups == x.numSubGroups && subGroupId == x.subGroupId
|
|
&& subGroupLocalId == x.subGroupLocalId;
|
|
}
|
|
};
|
|
|
|
static int check_group(const get_test_data *result, int nw, cl_uint ensg,
|
|
int maxwgs)
|
|
{
|
|
int first = -1;
|
|
int last = -1;
|
|
int i, j;
|
|
cl_uint hit[32];
|
|
|
|
for (i = 0; i < nw; ++i)
|
|
{
|
|
if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0)
|
|
first = i;
|
|
if (result[i].subGroupId == result[0].numSubGroups - 1
|
|
&& result[i].subGroupLocalId == 0)
|
|
last = i;
|
|
if (first != -1 && last != -1) break;
|
|
}
|
|
|
|
if (first == -1 || last == -1)
|
|
{
|
|
log_error("ERROR: expected sub group id's are missing\n");
|
|
return -1;
|
|
}
|
|
|
|
// Check them
|
|
if (result[first].subGroupSize == 0)
|
|
{
|
|
log_error("ERROR: get_sub_group_size() returned 0\n");
|
|
return -1;
|
|
}
|
|
if (result[first].maxSubGroupSize == 0
|
|
|| result[first].maxSubGroupSize > maxwgs)
|
|
{
|
|
log_error(
|
|
"ERROR: get_max_subgroup_size() returned incorrect result: %u\n",
|
|
result[first].maxSubGroupSize);
|
|
return -1;
|
|
}
|
|
if (result[first].subGroupSize > result[first].maxSubGroupSize)
|
|
{
|
|
log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n");
|
|
return -1;
|
|
}
|
|
if (result[last].subGroupSize > result[first].subGroupSize)
|
|
{
|
|
log_error("ERROR: last sub group larger than first sub group\n");
|
|
return -1;
|
|
}
|
|
if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg)
|
|
{
|
|
log_error(
|
|
"ERROR: get_num_sub_groups() returned incorrect result: %u \n",
|
|
result[first].numSubGroups);
|
|
return -1;
|
|
}
|
|
|
|
memset(hit, 0, sizeof(hit));
|
|
for (i = 0; i < nw; ++i)
|
|
{
|
|
if (result[i].maxSubGroupSize != result[first].maxSubGroupSize
|
|
|| result[i].numSubGroups != result[first].numSubGroups)
|
|
{
|
|
log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
|
|
return -1;
|
|
}
|
|
if (result[i].subGroupId >= result[first].numSubGroups)
|
|
{
|
|
log_error(
|
|
"ERROR: get_sub_group_id() returned out of range value: %u\n",
|
|
result[i].subGroupId);
|
|
return -1;
|
|
}
|
|
if (result[i].enqNumSubGroups != ensg)
|
|
{
|
|
log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect "
|
|
"value: %u\n",
|
|
result[i].enqNumSubGroups);
|
|
return -1;
|
|
}
|
|
if (result[first].numSubGroups > 1)
|
|
{
|
|
if (result[i].subGroupId < result[first].numSubGroups - 1)
|
|
{
|
|
if (result[i].subGroupSize != result[first].subGroupSize)
|
|
{
|
|
log_error(
|
|
"ERROR: unexpected variation in get_*_sub_group_*()\n");
|
|
return -1;
|
|
}
|
|
if (result[i].subGroupLocalId >= result[first].subGroupSize)
|
|
{
|
|
log_error("ERROR: get_sub_group_local_id() returned out of "
|
|
"bounds value: %u \n",
|
|
result[i].subGroupLocalId);
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (result[i].subGroupSize != result[last].subGroupSize)
|
|
{
|
|
log_error(
|
|
"ERROR: unexpected variation in get_*_sub_group_*()\n");
|
|
return -1;
|
|
}
|
|
if (result[i].subGroupLocalId >= result[last].subGroupSize)
|
|
{
|
|
log_error("ERROR: get_sub_group_local_id() returned out of "
|
|
"bounds value: %u \n",
|
|
result[i].subGroupLocalId);
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (result[i].subGroupSize != result[first].subGroupSize)
|
|
{
|
|
log_error(
|
|
"ERROR: unexpected variation in get_*_sub_group_*()\n");
|
|
return -1;
|
|
}
|
|
if (result[i].subGroupLocalId >= result[first].subGroupSize)
|
|
{
|
|
log_error("ERROR: get_sub_group_local_id() returned out of "
|
|
"bounds value: %u \n",
|
|
result[i].subGroupLocalId);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId
|
|
+ (result[i].subGroupLocalId >> 5);
|
|
if (j < sizeof(hit) / 4)
|
|
{
|
|
cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU);
|
|
if ((hit[j] & b) != 0)
|
|
{
|
|
log_error("ERROR: get_sub_group_local_id() repeated a result "
|
|
"in the same sub group\n");
|
|
return -1;
|
|
}
|
|
hit[j] |= b;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_work_item_functions(cl_device_id device, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
bool useCoreSubgroups)
|
|
{
|
|
static const size_t lsize = 200;
|
|
int error;
|
|
int i, j, k, q, r, nw;
|
|
int maxwgs;
|
|
cl_uint ensg;
|
|
size_t global;
|
|
size_t local;
|
|
get_test_data result[lsize * 6];
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
clMemWrapper out;
|
|
std::stringstream kernel_sstr;
|
|
if (useCoreSubgroups)
|
|
{
|
|
kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
|
|
}
|
|
kernel_sstr
|
|
<< "\n"
|
|
"\n"
|
|
"typedef struct {\n"
|
|
" uint subGroupSize;\n"
|
|
" uint maxSubGroupSize;\n"
|
|
" uint numSubGroups;\n"
|
|
" uint enqNumSubGroups;\n"
|
|
" uint subGroupId;\n"
|
|
" uint subGroupLocalId;\n"
|
|
"} get_test_data;\n"
|
|
"\n"
|
|
"__kernel void get_test( __global get_test_data *outData )\n"
|
|
"{\n"
|
|
" int gid = get_global_id( 0 );\n"
|
|
" outData[gid].subGroupSize = get_sub_group_size();\n"
|
|
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
|
|
" outData[gid].numSubGroups = get_num_sub_groups();\n"
|
|
" outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
|
|
" outData[gid].subGroupId = get_sub_group_id();\n"
|
|
" outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
|
|
"}";
|
|
const std::string &kernel_str = kernel_sstr.str();
|
|
const char *kernel_src = kernel_str.c_str();
|
|
error = create_single_kernel_helper(context, &program, &kernel, 1,
|
|
&kernel_src, "get_test");
|
|
if (error != 0) return error;
|
|
|
|
error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
|
|
if (error != 0) return error;
|
|
|
|
maxwgs = (int)local;
|
|
|
|
// Limit it a bit so we have muliple work groups
|
|
// Ideally this will still be large enough to give us multiple subgroups
|
|
if (local > lsize) local = lsize;
|
|
|
|
// Create our buffer
|
|
out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL,
|
|
&error);
|
|
test_error(error, "clCreateBuffer failed");
|
|
|
|
// Set argument
|
|
error = clSetKernelArg(kernel, 0, sizeof(out), &out);
|
|
test_error(error, "clSetKernelArg failed");
|
|
|
|
global = local * 5;
|
|
|
|
// Make sure we have a flexible range
|
|
global += 3 * local / 4;
|
|
|
|
// Collect the data
|
|
memset((void *)&result, 0xf0, sizeof(result));
|
|
|
|
error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result),
|
|
(void *)&result, 0, NULL, NULL);
|
|
test_error(error, "clEnqueueWriteBuffer failed");
|
|
|
|
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
|
|
NULL, NULL);
|
|
test_error(error, "clEnqueueNDRangeKernel failed");
|
|
|
|
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result),
|
|
(void *)&result, 0, NULL, NULL);
|
|
test_error(error, "clEnqueueReadBuffer failed");
|
|
|
|
error = clFinish(queue);
|
|
test_error(error, "clFinish failed");
|
|
|
|
nw = (int)local;
|
|
ensg = result[0].enqNumSubGroups;
|
|
|
|
// Check the first group
|
|
error = check_group(result, nw, ensg, maxwgs);
|
|
if (error) return error;
|
|
|
|
q = (int)global / nw;
|
|
r = (int)global % nw;
|
|
|
|
// Check the remaining work groups including the last if it is the same size
|
|
for (k = 1; k < q; ++k)
|
|
{
|
|
for (j = 0; j < nw; ++j)
|
|
{
|
|
i = k * nw + j;
|
|
if (!(result[i] == result[i - nw]))
|
|
{
|
|
log_error("ERROR: sub group mapping is not identical for all "
|
|
"work groups\n");
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check the last group if it wasn't the same size
|
|
if (r != 0)
|
|
{
|
|
error = check_group(result + q * nw, r, ensg, maxwgs);
|
|
if (error) return error;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_work_item_functions_core(cl_device_id device, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_work_item_functions(device, context, queue, num_elements, true);
|
|
}
|
|
|
|
int test_work_item_functions_ext(cl_device_id device, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
|
|
|
|
if (!hasExtension)
|
|
{
|
|
log_info(
|
|
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
|
return TEST_SKIPPED_ITSELF;
|
|
}
|
|
|
|
return test_work_item_functions(device, context, queue, num_elements,
|
|
false);
|
|
} |