// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include #include #include "harness/testHarness.h" #include "harness/typeWrappers.h" #include #include "procs.h" #include "utils.h" #include #ifdef CL_VERSION_2_0 extern int gWimpyMode; #define BITS_DEPTH 28 static const char* enqueue_flags_wait_kernel_simple[] = { NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH) NL, "" NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res)" NL, "{" NL, " int val = 0;" NL, " size_t lid = get_local_id(0);" NL, " size_t tid = get_global_id(0);" NL, "" NL, " if(tid == 0)" NL, " {" NL, " if((index + 1) < BITS_DEPTH)" NL, " {" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), " NL, " ^{" NL, " block_fn(array, index + 1, ls, gs, res);" NL, " });" NL, " }" NL, " }" NL, "" NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;" NL, "" NL, " if((index + 1) == BITS_DEPTH)" NL, " {" NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, "" NL, " if(lid == 0)" NL, " {" NL, " size_t gid = get_group_id(0);" NL, " res[gid] = 1;" NL, "" NL, " for(int j = 0; j < BITS_DEPTH; j++)" NL, " {" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))" NL, " {" NL, " res[gid] = 2;" NL, " break;" NL, " }" NL, " }" NL, " }" NL, " }" NL, " }" NL, "}" NL, "" NL, "kernel void enqueue_flags_wait_kernel_simple(__global int* res, __global int* array)" NL, "{" NL, " size_t ls = get_local_size(0);" NL, " size_t gs = get_global_size(0);" NL, " size_t tid = get_global_id(0);" NL, "" NL, " res[tid] = 0;" NL, " array[tid] = tid;" NL, "" NL, " if(tid == 0)" NL, " {" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), " NL, " ^{" NL, " block_fn(array, 1, ls, gs, res);" NL, " });" NL, " }" NL, "}" NL }; static const char* enqueue_flags_wait_kernel_event[] = { NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH) NL, "" NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res)" NL, "{" NL, " int val = 0;" NL, " size_t lid = get_local_id(0);" NL, " size_t tid = get_global_id(0);" NL, "" NL, " if(tid == 0)" NL, " {" NL, " if((index + 1) < BITS_DEPTH)" NL, " {" NL, " clk_event_t block_evt;" NL, " clk_event_t user_evt = create_user_event();" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, " NL, " ^{" NL, " block_fn(array, index + 1, ls, gs, res);" NL, " });" NL, " set_user_event_status(user_evt, CL_COMPLETE);" NL, " release_event(user_evt);" NL, " release_event(block_evt);" NL, " }" NL, " }" NL, "" NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;" NL, "" NL, " if((index + 1) == BITS_DEPTH)" NL, " {" NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, "" NL, " if(lid == 0)" NL, " {" NL, " size_t gid = get_group_id(0);" NL, " res[gid] = 1;" NL, "" NL, " for(int j = 0; j < BITS_DEPTH; j++)" NL, " {" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))" NL, " {" NL, " res[gid] = 2;" NL, " break;" NL, " }" NL, " }" NL, " }" NL, " }" NL, " }" NL, "}" NL, "" NL, "kernel void enqueue_flags_wait_kernel_event(__global int* res, __global int* array)" NL, "{" NL, " size_t tid = get_global_id(0);" NL, " size_t gs = get_global_size(0);" NL, " size_t ls = get_local_size(0);" NL, "" NL, " res[tid] = 0;" NL, " array[tid] = tid;" NL, "" NL, " if(tid == 0)" NL, " {" NL, " clk_event_t block_evt;" NL, " clk_event_t user_evt = create_user_event();" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, " NL, " ^{" NL, " block_fn(array, 1, ls, gs, res);" NL, " });" NL, " set_user_event_status(user_evt, CL_COMPLETE);" NL, " release_event(user_evt);" NL, " release_event(block_evt);" NL, " }" NL, "}" NL }; static const char* enqueue_flags_wait_kernel_local[] = { NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH) NL, "" NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res, __local int* sub_array)" NL, "{" NL, " int val = 0;" NL, " size_t gid = get_group_id(0);" NL, " size_t lid = get_local_id(0);" NL, " size_t tid = get_global_id(0);" NL, "" NL, " sub_array[lid] = array[(index - 1) * gs + tid];" NL, " barrier(CLK_LOCAL_MEM_FENCE);" NL, "" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " int id = gid * ls + i;" NL, " val += sub_array[i];" NL, " val -= (tid == id)? 0: (id + index - 1);" NL, " }" NL, "" NL, " if(tid == 0)" NL, " {" NL, " if((index + 1) < BITS_DEPTH)" NL, " {" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), " NL, " ^(__local void* sub_array){" NL, " block_fn(array, index + 1, ls, gs, res, sub_array);" NL, " }, ls * sizeof(int));" NL, " }" NL, " }" NL, "" NL, " array[index * gs + tid] = val + 1;" NL, "" NL, " if((index + 1) == BITS_DEPTH)" NL, " {" NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, "" NL, " if(lid == 0)" NL, " {" NL, " res[gid] = 1;" NL, "" NL, " for(int j = 0; j < BITS_DEPTH; j++)" NL, " {" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))" NL, " {" NL, " res[gid] = 2;" NL, " break;" NL, " }" NL, " }" NL, " }" NL, " }" NL, " }" NL, "}" NL, "" NL, "kernel void enqueue_flags_wait_kernel_local(__global int* res, __global int* array)" NL, "{" NL, " size_t ls = get_local_size(0);" NL, " size_t gs = get_global_size(0);" NL, " size_t tid = get_global_id(0);" NL, "" NL, " res[tid] = 0;" NL, " array[tid] = tid;" NL, "" NL, " if(tid == 0)" NL, " {" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), " NL, " ^(__local void* sub_array){" NL, " block_fn(array, 1, ls, gs, res, sub_array);" NL, " }, ls * sizeof(int));" NL, " }" NL, "}" NL }; static const char* enqueue_flags_wait_kernel_event_local[] = { NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH) NL, "" NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res, __local int* sub_array)" NL, "{" NL, " int val = 0;" NL, " size_t gid = get_group_id(0);" NL, " size_t lid = get_local_id(0);" NL, " size_t tid = get_global_id(0);" NL, "" NL, " sub_array[lid] = array[(index - 1) * gs + tid];" NL, " barrier(CLK_LOCAL_MEM_FENCE);" NL, "" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " int id = gid * ls + i;" NL, " val += sub_array[i];" NL, " val -= (tid == id)? 0: (id + index - 1);" NL, " }" NL, "" NL, " if(tid == 0)" NL, " {" NL, " if((index + 1) < BITS_DEPTH)" NL, " {" NL, " clk_event_t block_evt;" NL, " clk_event_t user_evt = create_user_event();" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, " NL, " ^(__local void* sub_array){" NL, " block_fn(array, index + 1, ls, gs, res, sub_array);" NL, " }, ls * sizeof(int));" NL, " set_user_event_status(user_evt, CL_COMPLETE);" NL, " release_event(user_evt);" NL, " release_event(block_evt);" NL, " }" NL, " }" NL, "" NL, " array[index * gs + tid] = val + 1;" NL, "" NL, " if((index + 1) == BITS_DEPTH)" NL, " {" NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, "" NL, " if(lid == 0)" NL, " {" NL, " res[gid] = 1;" NL, "" NL, " for(int j = 0; j < BITS_DEPTH; j++)" NL, " {" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))" NL, " {" NL, " res[gid] = 2;" NL, " break;" NL, " }" NL, " }" NL, " }" NL, " }" NL, " }" NL, "}" NL, "" NL, "kernel void enqueue_flags_wait_kernel_event_local(__global int* res, __global int* array)" NL, "{" NL, " size_t ls = get_local_size(0);" NL, " size_t gs = get_global_size(0);" NL, " size_t tid = get_global_id(0);" NL, "" NL, " res[tid] = 0;" NL, " array[tid] = tid;" NL, "" NL, " if(tid == 0)" NL, " {" NL, " clk_event_t block_evt;" NL, " clk_event_t user_evt = create_user_event();" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, " NL, " ^(__local void* sub_array){" NL, " block_fn(array, 1, ls, gs, res, sub_array);" NL, " }, ls * sizeof(int));" NL, " set_user_event_status(user_evt, CL_COMPLETE);" NL, " release_event(user_evt);" NL, " release_event(block_evt);" NL, " }" NL, "}" NL }; static const char* enqueue_flags_wait_work_group_simple[] = { NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH) NL, "" NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, int group_id)" NL, "{" NL, " size_t tid = get_global_id(0);" NL, " size_t lid = get_local_id(0);" NL, " size_t gs = get_global_size(0);" NL, " size_t gid = get_group_id(0);" NL, " " NL, " if(gid == group_id)" NL, " {" NL, " if((index + 1) < BITS_DEPTH && lid == 0)" NL, " {" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), " NL, " ^{" NL, " block_fn(array, index + 1, ls, res, gid);" NL, " });" NL, " }" NL, " " NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;" NL, " }" NL, "" NL, " if((index + 1) == BITS_DEPTH)" NL, " {" NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, "" NL, " if(lid == 0 && gid == group_id)" NL, " {" NL, " res[gid] = 1;" NL, "" NL, " for(int j = 0; j < BITS_DEPTH; j++)" NL, " {" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))" NL, " {" NL, " res[gid] = 2;" NL, " break;" NL, " }" NL, " }" NL, " }" NL, " }" NL, " }" NL, "}" NL, "" NL, "kernel void enqueue_flags_wait_work_group_simple(__global int* res, __global int* array)" NL, "{" NL, " size_t ls = get_local_size(0);" NL, " size_t gs = get_global_size(0);" NL, " size_t tid = get_global_id(0);" NL, " size_t gid = get_group_id(0);" NL, " size_t lid = get_local_id(0);" NL, "" NL, " res[tid] = 0;" NL, " array[tid] = tid;" NL, "" NL, " if(lid == 0)" NL, " {" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), " NL, " ^{" NL, " block_fn(array, 1, ls, res, gid);" NL, " });" NL, " }" NL, "}" NL }; static const char* enqueue_flags_wait_work_group_event[] = { NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH) NL, "" NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, int group_id)" NL, "{" NL, " size_t tid = get_global_id(0);" NL, " size_t lid = get_local_id(0);" NL, " size_t gs = get_global_size(0);" NL, " size_t gid = get_group_id(0);" NL, " " NL, " if(gid == group_id)" NL, " {" NL, " if((index + 1) < BITS_DEPTH && lid == 0)" NL, " {" NL, " clk_event_t block_evt;" NL, " clk_event_t user_evt = create_user_event();" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, " NL, " ^{" NL, " block_fn(array, index + 1, ls, res, gid);" NL, " });" NL, " set_user_event_status(user_evt, CL_COMPLETE);" NL, " release_event(user_evt);" NL, " release_event(block_evt);" NL, " }" NL, " " NL, " array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;" NL, " }" NL, "" NL, "" NL, " if((index + 1) == BITS_DEPTH)" NL, " {" NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, "" NL, " if(lid == 0 && gid == group_id)" NL, " {" NL, " res[gid] = 1;" NL, "" NL, " for(int j = 0; j < BITS_DEPTH; j++)" NL, " {" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))" NL, " {" NL, " res[gid] = 2;" NL, " break;" NL, " }" NL, " }" NL, " }" NL, " }" NL, " }" NL, "}" NL, "" NL, "kernel void enqueue_flags_wait_work_group_event(__global int* res, __global int* array)" NL, "{" NL, " size_t ls = get_local_size(0);" NL, " size_t gs = get_global_size(0);" NL, " size_t tid = get_global_id(0);" NL, " size_t gid = get_group_id(0);" NL, " size_t lid = get_local_id(0);" NL, "" NL, " res[tid] = 0;" NL, " array[tid] = tid;" NL, "" NL, " if(lid == 0)" NL, " {" NL, " clk_event_t block_evt;" NL, " clk_event_t user_evt = create_user_event();" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, " NL, " ^{" NL, " block_fn(array, 1, ls, res, gid);" NL, " });" NL, " set_user_event_status(user_evt, CL_COMPLETE);" NL, " release_event(user_evt);" NL, " release_event(block_evt);" NL, " }" NL, "}" NL }; static const char* enqueue_flags_wait_work_group_local[] = { NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH) NL, "" NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, __local int* sub_array, int group_id)" NL, "{" NL, " int val = 0;" NL, " size_t gid = get_group_id(0);" NL, " size_t lid = get_local_id(0);" NL, " size_t tid = get_global_id(0);" NL, " size_t gs = get_global_size(0);" NL, "" NL, " sub_array[lid] = array[(index - 1) * gs + tid];" NL, " barrier(CLK_LOCAL_MEM_FENCE);" NL, "" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " int id = gid * ls + i;" NL, " val += sub_array[i];" NL, " val -= (tid == id)? 0: (id + index - 1);" NL, " }" NL, " " NL, " if(gid == group_id)" NL, " {" NL, " if((index + 1) < BITS_DEPTH && lid == 0)" NL, " {" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), " NL, " ^(__local void* sub_array){" NL, " block_fn(array, index + 1, ls, res, sub_array, gid);" NL, " }, ls * sizeof(int));" NL, " }" NL, " " NL, " array[index * gs + tid] = val + 1;" NL, " }" NL, "" NL, "" NL, " if((index + 1) == BITS_DEPTH)" NL, " {" NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, "" NL, " if(lid == 0 && gid == group_id)" NL, " {" NL, " res[gid] = 1;" NL, "" NL, " for(int j = 0; j < BITS_DEPTH; j++)" NL, " {" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))" NL, " {" NL, " res[gid] = 2;" NL, " break;" NL, " }" NL, " }" NL, " }" NL, " }" NL, " }" NL, "}" NL, "" NL, "kernel void enqueue_flags_wait_work_group_local(__global int* res, __global int* array)" NL, "{" NL, " size_t ls = get_local_size(0);" NL, " size_t gs = get_global_size(0);" NL, " size_t tid = get_global_id(0);" NL, " size_t gid = get_group_id(0);" NL, " size_t lid = get_local_id(0);" NL, "" NL, " res[tid] = 0;" NL, " array[tid] = tid;" NL, "" NL, " if(lid == 0)" NL, " {" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), " NL, " ^(__local void* sub_array){" NL, " block_fn(array, 1, ls, res, sub_array, gid);" NL, " }, ls * sizeof(int));" NL, " }" NL, "}" NL }; static const char* enqueue_flags_wait_work_group_event_local[] = { NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH) NL, "" NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, __local int* sub_array, int group_id)" NL, "{" NL, " int val = 0;" NL, " size_t gid = get_group_id(0);" NL, " size_t lid = get_local_id(0);" NL, " size_t tid = get_global_id(0);" NL, " size_t gs = get_global_size(0);" NL, "" NL, " sub_array[lid] = array[(index - 1) * gs + tid];" NL, " barrier(CLK_LOCAL_MEM_FENCE);" NL, "" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " int id = gid * ls + i;" NL, " val += sub_array[i];" NL, " val -= (tid == id)? 0: (id + index - 1);" NL, " }" NL, "" NL, " if(gid == group_id)" NL, " {" NL, " if((index + 1) < BITS_DEPTH && lid == 0)" NL, " {" NL, " clk_event_t block_evt;" NL, " clk_event_t user_evt = create_user_event();" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, " NL, " ^(__local void* sub_array){" NL, " block_fn(array, index + 1, ls, res, sub_array, gid);" NL, " }, ls * sizeof(int));" NL, " set_user_event_status(user_evt, CL_COMPLETE);" NL, " release_event(user_evt);" NL, " release_event(block_evt);" NL, " }" NL, " " NL, " array[index * gs + tid] = val + 1;" NL, " }" NL, "" NL, " if((index + 1) == BITS_DEPTH)" NL, " {" NL, " barrier(CLK_GLOBAL_MEM_FENCE);" NL, "" NL, " if(lid == 0 && gid == group_id)" NL, " {" NL, " res[gid] = 1;" NL, "" NL, " for(int j = 0; j < BITS_DEPTH; j++)" NL, " {" NL, " for(int i = 0; i < ls; i++)" NL, " {" NL, " if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))" NL, " {" NL, " res[gid] = 2;" NL, " break;" NL, " }" NL, " }" NL, " }" NL, " }" NL, " }" NL, "}" NL, "" NL, "kernel void enqueue_flags_wait_work_group_event_local(__global int* res, __global int* array)" NL, "{" NL, " size_t ls = get_local_size(0);" NL, " size_t gs = get_global_size(0);" NL, " size_t tid = get_global_id(0);" NL, " size_t gid = get_group_id(0);" NL, " size_t lid = get_local_id(0);" NL, "" NL, " res[tid] = 0;" NL, " array[tid] = tid;" NL, "" NL, " if(lid == 0)" NL, " {" NL, " clk_event_t block_evt;" NL, " clk_event_t user_evt = create_user_event();" NL, " enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, " NL, " ^(__local void* sub_array){" NL, " block_fn(array, 1, ls, res, sub_array, gid);" NL, " }, ls * sizeof(int));" NL, " set_user_event_status(user_evt, CL_COMPLETE);" NL, " release_event(user_evt);" NL, " release_event(block_evt);" NL, " }" NL, "}" NL }; static const kernel_src sources_enqueue_block_flags[] = { KERNEL(enqueue_flags_wait_kernel_simple), KERNEL(enqueue_flags_wait_kernel_event), KERNEL(enqueue_flags_wait_kernel_local), KERNEL(enqueue_flags_wait_kernel_event_local), KERNEL(enqueue_flags_wait_work_group_simple), KERNEL(enqueue_flags_wait_work_group_event), KERNEL(enqueue_flags_wait_work_group_local), KERNEL(enqueue_flags_wait_work_group_event_local) }; static const size_t num_enqueue_block_flags = arr_size(sources_enqueue_block_flags); int test_enqueue_flags(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { cl_uint i; cl_int err_ret, res = 0; clCommandQueueWrapper dev_queue; cl_int kernel_results[MAX_GWS] = { -1 }; int buff[MAX_GWS * BITS_DEPTH] = { 0 }; size_t ret_len; size_t max_local_size = 1; cl_uint maxQueueSize = 0; err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0); test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed"); err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len); test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed"); cl_queue_properties queue_prop_def[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT, CL_QUEUE_SIZE, maxQueueSize, 0 }; dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret); test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed"); size_t global_size = MAX_GWS; size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size; if(gWimpyMode) { global_size = 4; local_size = 2; } size_t failCnt = 0; for(i = 0; i < num_enqueue_block_flags; ++i) { if (!gKernelName.empty() && gKernelName != sources_enqueue_block_flags[i].kernel_name) continue; log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block_flags[i].kernel_name, i + 1, num_enqueue_block_flags); clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, global_size * BITS_DEPTH * sizeof(cl_int), buff, &err_ret); test_error(err_ret, "clCreateBuffer() failed"); kernel_arg args[] = { { sizeof(cl_mem), &mem } }; err_ret = run_n_kernel_args(context, queue, sources_enqueue_block_flags[i].lines, sources_enqueue_block_flags[i].num_lines, sources_enqueue_block_flags[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), arr_size(args), args); if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block_flags[i].kernel_name)) { ++failCnt; res = -1; } else { int r = 0; for (int j=0; j= (global_size / local_size) && check_error(-1, "'%s' kernel result[idx: %d] validation failed (test) %d != (expected) 0", sources_enqueue_block_flags[i].kernel_name, j, kernel_results[j])) { r = -1; break; } } if(r == 0) log_info("'%s' kernel is OK.\n", sources_enqueue_block_flags[i].kernel_name); else res = -1; } } if (failCnt > 0) { log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_enqueue_block_flags); } return res; } #endif