// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "testBase.h" #include "harness/conversions.h" static const int vector_sizes[] = {1, 2, 3, 4, 8, 16}; #define NUM_VECTOR_SIZES 6 const char *permute_2_param_kernel_pattern = "__kernel void test_upsample(__global %s *sourceA, __global %s *sourceB, __global %s *destValues)\n" "{\n" " int tid = get_global_id(0);\n" " destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n" "\n" "}\n"; const char *permute_2_param_kernel_pattern_v3srcdst = "__kernel void test_upsample(__global %s *sourceA, __global %s *sourceB, __global %s *destValues)\n" "{\n" " int tid = get_global_id(0);\n" " vstore3( %s( vload3(tid,sourceA), vload3(tid, sourceB) ), tid, destValues);\n" "\n" "}\n"; int test_upsample_2_param_fn(cl_command_queue queue, cl_context context, const char *fnName, ExplicitType sourceAType, ExplicitType sourceBType, ExplicitType outType, size_t sourceAVecSize, size_t sourceBVecSize, size_t outVecSize, size_t count, void *sourceA, void *sourceB, void *expectedResults ) { cl_program program; cl_kernel kernel; int error, retCode = 0; cl_mem streams[3]; void *outData; size_t threadSize, groupSize, i; unsigned char *expectedPtr, *outPtr; size_t sourceATypeSize, sourceBTypeSize, outTypeSize, outStride; char programSource[ 10240 ], aType[ 64 ], bType[ 64 ], tType[ 64 ]; const char *progPtr; sourceATypeSize = get_explicit_type_size( sourceAType ); sourceBTypeSize = get_explicit_type_size( sourceBType ); outTypeSize = get_explicit_type_size( outType ); outStride = outTypeSize * outVecSize; outData = malloc( outStride * count ); /* Construct the program */ strcpy( aType, get_explicit_type_name( sourceAType ) ); strcpy( bType, get_explicit_type_name( sourceBType ) ); strcpy( tType, get_explicit_type_name( outType ) ); if( sourceAVecSize > 1 && sourceAVecSize != 3) sprintf( aType + strlen( aType ), "%d", (int)sourceAVecSize ); if( sourceBVecSize > 1 && sourceBVecSize != 3) sprintf( bType + strlen( bType ), "%d", (int)sourceBVecSize ); if( outVecSize > 1 && outVecSize != 3) sprintf( tType + strlen( tType ), "%d", (int)outVecSize ); if(sourceAVecSize == 3 && sourceBVecSize == 3 && outVecSize == 3) { // permute_2_param_kernel_pattern_v3srcdst sprintf( programSource, permute_2_param_kernel_pattern_v3srcdst, aType, bType, tType, fnName ); } else if(sourceAVecSize != 3 && sourceBVecSize != 3 && outVecSize != 3) { sprintf( programSource, permute_2_param_kernel_pattern, aType, bType, tType, fnName ); } else { vlog_error("Not implemented for %d,%d -> %d\n", (int)sourceAVecSize, (int)sourceBVecSize, (int)outVecSize); return -1; } progPtr = (const char *)programSource; if( create_single_kernel_helper( context, &program, &kernel, 1, &progPtr, "test_upsample" ) ) { free( outData ); return -1; } /* Set up parameters */ streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sourceATypeSize * sourceAVecSize * count, sourceA, NULL); if (!streams[0]) { log_error("ERROR: Creating input array A failed!\n"); return -1; } streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sourceBTypeSize * sourceBVecSize * count, sourceB, NULL); if (!streams[1]) { log_error("ERROR: Creating input array B failed!\n"); return -1; } streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, outStride * count, NULL, NULL); if (!streams[2]) { log_error("ERROR: Creating output array failed!\n"); return -1; } /* Set the arguments */ error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] ); test_error( error, "Unable to set kernel arguments" ); error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] ); test_error( error, "Unable to set kernel arguments" ); error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2] ); test_error( error, "Unable to set kernel arguments" ); /* Run the kernel */ threadSize = count; error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize ); test_error( error, "Unable to get work group size to use" ); error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &threadSize, &groupSize, 0, NULL, NULL ); test_error( error, "Unable to execute test kernel" ); /* Now verify the results. Each value should have been duplicated four times, and we should be able to just do a memcpy instead of relying on the actual type of data */ error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, outStride * count, outData, 0, NULL, NULL ); test_error( error, "Unable to read output values!" ); expectedPtr = (unsigned char *)expectedResults; outPtr = (unsigned char *)outData; for( i = 0; i < count; i++ ) { if( memcmp( outPtr, expectedPtr, outTypeSize * outVecSize ) != 0 ) { log_error( "ERROR: Output value %d does not validate!\n", (int)i ); retCode = -1; break; } expectedPtr += outTypeSize * outVecSize; outPtr += outStride; } clReleaseMemObject( streams[0] ); clReleaseMemObject( streams[1] ); clReleaseMemObject( streams[2] ); clReleaseKernel( kernel ); clReleaseProgram( program ); free( outData ); return retCode; } void * create_upsample_data( ExplicitType type, void *sourceA, void *sourceB, size_t count ) { void *outData; size_t i, tSize; tSize = get_explicit_type_size( type ); outData = malloc( tSize * count * 2 ); switch( tSize ) { case 1: { const cl_uchar *aPtr = (const cl_uchar *) sourceA; const cl_uchar *bPtr = (const cl_uchar *) sourceB; cl_ushort *dPtr = (cl_ushort*) outData; for( i = 0; i < count; i++ ) { cl_ushort u = *bPtr++; u |= ((cl_ushort) *aPtr++) << 8; *dPtr++ = u; } } break; case 2: { const cl_ushort *aPtr = (const cl_ushort *) sourceA; const cl_ushort *bPtr = (const cl_ushort *) sourceB; cl_uint *dPtr = (cl_uint*) outData; for( i = 0; i < count; i++ ) { cl_uint u = *bPtr++; u |= ((cl_uint) *aPtr++) << 16; *dPtr++ = u; } } break; case 4: { const cl_uint *aPtr = (const cl_uint *) sourceA; const cl_uint *bPtr = (const cl_uint *) sourceB; cl_ulong *dPtr = (cl_ulong*) outData; for( i = 0; i < count; i++ ) { cl_ulong u = *bPtr++; u |= ((cl_ulong) *aPtr++) << 32; *dPtr++ = u; } } break; default: log_error( "ERROR: unknown type size: %ld\n", tSize ); return NULL; } return outData; } int test_integer_upsample(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { ExplicitType typesToTest[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kNumExplicitTypes }; ExplicitType baseTypes[] = { kUChar, kUChar, kUShort, kUShort, kUInt, kUInt, kNumExplicitTypes }; ExplicitType outTypes[] = { kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes }; int i, err = 0; int sizeIndex; size_t size; void *sourceA, *sourceB, *expected; RandomSeed seed(gRandomSeed ); for( i = 0; typesToTest[ i ] != kNumExplicitTypes; i++ ) { if ((outTypes[i] == kLong || outTypes[i] == kULong) && !gHasLong) { log_info( "Longs unsupported on this device. Skipping...\n"); continue; } for( sizeIndex = 0; sizeIndex < NUM_VECTOR_SIZES; sizeIndex++) { size = (size_t)vector_sizes[sizeIndex]; log_info("running upsample test for %s %s vector size %d\n", get_explicit_type_name(typesToTest[i]), get_explicit_type_name(baseTypes[i]), (int)size); sourceA = create_random_data( typesToTest[ i ], seed, 256 ); sourceB = create_random_data( baseTypes[ i ], seed, 256 ); expected = create_upsample_data( typesToTest[ i ], sourceA, sourceB, 256 ); if( test_upsample_2_param_fn( queue, context, "upsample", typesToTest[ i ], baseTypes[ i ], outTypes[ i ], size, size, size, 256 / size, sourceA, sourceB, expected ) != 0 ) { log_error( "TEST FAILED: %s for %s%d\n", "upsample", get_explicit_type_name( typesToTest[ i ] ), (int)size ); err = -1; } free( sourceA ); free( sourceB ); free( expected ); } } return err; }