632 lines
24 KiB
C++
632 lines
24 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "procs.h"
|
|
|
|
// Design:
|
|
// To test sub buffers, we first create one main buffer. We then create several sub-buffers and
|
|
// queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
|
|
// what results it expects, and so we can test scaling degrees of Actions on scaling numbers of
|
|
// sub-buffers.
|
|
|
|
class SubBufferWrapper : public clMemWrapper
|
|
{
|
|
public:
|
|
cl_mem mParentBuffer;
|
|
size_t mOrigin;
|
|
size_t mSize;
|
|
|
|
cl_int Allocate( cl_mem parent, cl_mem_flags flags, size_t origin, size_t size )
|
|
{
|
|
mParentBuffer = parent;
|
|
mOrigin = origin;
|
|
mSize = size;
|
|
|
|
cl_buffer_region region;
|
|
region.origin = mOrigin;
|
|
region.size = mSize;
|
|
|
|
cl_int error;
|
|
mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error );
|
|
return error;
|
|
}
|
|
};
|
|
|
|
class Action
|
|
{
|
|
public:
|
|
virtual ~Action() {}
|
|
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) = 0;
|
|
virtual const char * GetName( void ) const = 0;
|
|
|
|
static MTdata d;
|
|
static MTdata GetRandSeed( void )
|
|
{
|
|
if ( d == 0 )
|
|
d = init_genrand( gRandomSeed );
|
|
return d;
|
|
}
|
|
static void FreeRandSeed() {
|
|
if ( d != 0 ) {
|
|
free_mtdata(d);
|
|
d = 0;
|
|
}
|
|
}
|
|
};
|
|
|
|
MTdata Action::d = 0;
|
|
|
|
class ReadWriteAction : public Action
|
|
{
|
|
public:
|
|
virtual ~ReadWriteAction() {}
|
|
virtual const char * GetName( void ) const { return "ReadWrite";}
|
|
|
|
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
|
|
{
|
|
cl_char *tempBuffer = (cl_char*)malloc(buffer1.mSize);
|
|
if (!tempBuffer) {
|
|
log_error("Out of memory\n");
|
|
return -1;
|
|
}
|
|
cl_int error = clEnqueueReadBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
|
|
test_error( error, "Unable to enqueue buffer read" );
|
|
|
|
size_t start = get_random_size_t( 0, buffer1.mSize / 2, GetRandSeed() );
|
|
size_t end = get_random_size_t( start, buffer1.mSize, GetRandSeed() );
|
|
|
|
for ( size_t i = start; i < end; i++ )
|
|
{
|
|
tempBuffer[ i ] |= tag;
|
|
parentBufferState[ i + buffer1.mOrigin ] |= tag;
|
|
}
|
|
|
|
error = clEnqueueWriteBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
|
|
test_error( error, "Unable to enqueue buffer write" );
|
|
free(tempBuffer);
|
|
return CL_SUCCESS;
|
|
}
|
|
};
|
|
|
|
#ifndef MAX
|
|
#define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
|
|
#endif
|
|
#ifndef MIN
|
|
#define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
|
|
#endif
|
|
|
|
class CopyAction : public Action
|
|
{
|
|
public:
|
|
virtual ~CopyAction() {}
|
|
virtual const char * GetName( void ) const { return "Copy";}
|
|
|
|
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
|
|
{
|
|
// Copy from sub-buffer 1 to sub-buffer 2
|
|
size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() );
|
|
|
|
size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
|
|
size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );
|
|
|
|
cl_int error = clEnqueueCopyBuffer( queue, buffer1, buffer2, startOffset, endOffset, size, 0, NULL, NULL );
|
|
test_error( error, "Unable to enqueue buffer copy" );
|
|
|
|
memcpy( parentBufferState + buffer2.mOrigin + endOffset, parentBufferState + buffer1.mOrigin + startOffset, size );
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
};
|
|
|
|
class MapAction : public Action
|
|
{
|
|
public:
|
|
virtual ~MapAction() {}
|
|
virtual const char * GetName( void ) const { return "Map";}
|
|
|
|
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
|
|
{
|
|
size_t size = get_random_size_t( 0, buffer1.mSize, GetRandSeed() );
|
|
size_t start = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
|
|
|
|
cl_int error;
|
|
void * mappedPtr = clEnqueueMapBuffer( queue, buffer1, CL_TRUE, (cl_map_flags)( CL_MAP_READ | CL_MAP_WRITE ),
|
|
start, size, 0, NULL, NULL, &error );
|
|
test_error( error, "Unable to map buffer" );
|
|
|
|
cl_char *cPtr = (cl_char *)mappedPtr;
|
|
for ( size_t i = 0; i < size; i++ )
|
|
{
|
|
cPtr[ i ] |= tag;
|
|
parentBufferState[ i + start + buffer1.mOrigin ] |= tag;
|
|
}
|
|
|
|
error = clEnqueueUnmapMemObject( queue, buffer1, mappedPtr, 0, NULL, NULL );
|
|
test_error( error, "Unable to unmap buffer" );
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
};
|
|
|
|
class KernelReadWriteAction : public Action
|
|
{
|
|
public:
|
|
virtual ~KernelReadWriteAction() {}
|
|
virtual const char * GetName( void ) const { return "KernelReadWrite";}
|
|
|
|
virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
|
|
{
|
|
const char *kernelCode[] = {
|
|
"__kernel void readTest( __global char *inBuffer, char tag )\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
" inBuffer[ tid ] |= tag;\n"
|
|
"}\n" };
|
|
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
cl_int error;
|
|
|
|
if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
size_t threads[1] = { buffer1.mSize };
|
|
|
|
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &buffer1 );
|
|
test_error( error, "Unable to set kernel argument" );
|
|
error = clSetKernelArg( kernel, 1, sizeof( tag ), &tag );
|
|
test_error( error, "Unable to set kernel argument" );
|
|
|
|
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
|
test_error( error, "Unable to queue kernel" );
|
|
|
|
for ( size_t i = 0; i < buffer1.mSize; i++ )
|
|
parentBufferState[ i + buffer1.mOrigin ] |= tag;
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
};
|
|
|
|
cl_int get_reasonable_buffer_size( cl_device_id device, size_t &outSize )
|
|
{
|
|
cl_ulong maxAllocSize;
|
|
cl_int error;
|
|
|
|
// Get the largest possible buffer we could allocate
|
|
error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
|
|
test_error( error, "Unable to get max alloc size" );
|
|
|
|
// Don't create a buffer quite that big, just so we have some space left over for other work
|
|
outSize = (size_t)( maxAllocSize / 5 );
|
|
|
|
// Cap at 32M so tests complete in a reasonable amount of time.
|
|
if ( outSize > 32 << 20 )
|
|
outSize = 32 << 20;
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
size_t find_subbuffer_by_index( SubBufferWrapper * subBuffers, size_t numSubBuffers, size_t index )
|
|
{
|
|
for ( size_t i = 0; i < numSubBuffers; i++ )
|
|
{
|
|
if ( subBuffers[ i ].mOrigin > index )
|
|
return numSubBuffers;
|
|
if ( ( subBuffers[ i ].mOrigin <= index ) && ( ( subBuffers[ i ].mOrigin + subBuffers[ i ].mSize ) > index ) )
|
|
return i;
|
|
}
|
|
return numSubBuffers;
|
|
}
|
|
|
|
// This tests the read/write capabilities of sub buffers (if we are read/write, the sub buffers
|
|
// can't overlap)
|
|
int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queueA, cl_command_queue queueB, size_t mainSize, size_t addressAlign )
|
|
{
|
|
clMemWrapper mainBuffer;
|
|
SubBufferWrapper subBuffers[ 8 ];
|
|
size_t numSubBuffers;
|
|
cl_int error;
|
|
size_t i;
|
|
MTdata m = init_genrand( 22 );
|
|
|
|
|
|
cl_char * mainBufferContents = (cl_char*)calloc(1,mainSize);
|
|
cl_char * actualResults = (cl_char*)calloc(1,mainSize);
|
|
|
|
for ( i = 0; i < mainSize / 4; i++ )
|
|
((cl_uint*) mainBufferContents)[i] = genrand_int32(m);
|
|
|
|
free_mtdata( m );
|
|
|
|
// Create the main buffer to test against
|
|
mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, mainSize, mainBufferContents, &error );
|
|
test_error( error, "Unable to create test main buffer" );
|
|
|
|
// Create some sub-buffers to use
|
|
size_t toStartFrom = 0;
|
|
for ( numSubBuffers = 0; numSubBuffers < 8; numSubBuffers++ )
|
|
{
|
|
size_t endRange = toStartFrom + ( mainSize / 4 );
|
|
if ( endRange > mainSize )
|
|
endRange = mainSize;
|
|
|
|
size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
|
|
size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
|
|
error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
|
|
test_error( error, "Unable to allocate sub buffer" );
|
|
|
|
toStartFrom = offset + size;
|
|
if ( toStartFrom > ( mainSize - ( addressAlign * 256 ) ) )
|
|
break;
|
|
}
|
|
|
|
ReadWriteAction rwAction;
|
|
MapAction mapAction;
|
|
CopyAction copyAction;
|
|
KernelReadWriteAction kernelAction;
|
|
|
|
Action * actions[] = { &rwAction, &mapAction, ©Action, &kernelAction };
|
|
int numErrors = 0;
|
|
|
|
// Do the following steps twice, to make sure the parent gets updated *and* we can
|
|
// still work on the sub-buffers
|
|
cl_command_queue prev_queue = queueA;
|
|
for ( int time = 0; time < 2; time++ )
|
|
{
|
|
// Randomly apply actions to the set of sub buffers
|
|
size_t i;
|
|
for ( i = 0; i < 64; i++ )
|
|
{
|
|
int which = random_in_range( 0, 3, Action::GetRandSeed() );
|
|
int whichQueue = random_in_range( 0, 1, Action::GetRandSeed() );
|
|
int whichBufferA = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
|
|
int whichBufferB;
|
|
do
|
|
{
|
|
whichBufferB = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
|
|
} while ( whichBufferB == whichBufferA );
|
|
|
|
cl_command_queue queue = ( whichQueue == 1 ) ? queueB : queueA;
|
|
if (queue != prev_queue) {
|
|
error = clFinish( prev_queue );
|
|
test_error( error, "Error finishing other queue." );
|
|
|
|
prev_queue = queue;
|
|
}
|
|
|
|
error = actions[ which ]->Execute( context, queue, (cl_int)i, subBuffers[ whichBufferA ], subBuffers[ whichBufferB ], mainBufferContents );
|
|
test_error( error, "Unable to execute action against sub buffers" );
|
|
}
|
|
|
|
error = clFinish( queueA );
|
|
test_error( error, "Error finishing queueA." );
|
|
|
|
error = clFinish( queueB );
|
|
test_error( error, "Error finishing queueB." );
|
|
|
|
// Validate by reading the final contents of the main buffer and
|
|
// validating against our ref copy we generated
|
|
error = clEnqueueReadBuffer( queueA, mainBuffer, CL_TRUE, 0, mainSize, actualResults, 0, NULL, NULL );
|
|
test_error( error, "Unable to enqueue buffer read" );
|
|
|
|
for ( i = 0; i < mainSize; i += 65536 )
|
|
{
|
|
size_t left = 65536;
|
|
if ( ( i + left ) > mainSize )
|
|
left = mainSize - i;
|
|
|
|
if ( memcmp( actualResults + i, mainBufferContents + i, left ) == 0 )
|
|
continue;
|
|
|
|
// The fast compare failed, so we need to determine where exactly the failure is
|
|
|
|
for ( size_t j = 0; j < left; j++ )
|
|
{
|
|
if ( actualResults[ i + j ] != mainBufferContents[ i + j ] )
|
|
{
|
|
// Hit a failure; report the subbuffer at this address as having failed
|
|
size_t sbThatFailed = find_subbuffer_by_index( subBuffers, numSubBuffers, i + j );
|
|
if ( sbThatFailed == numSubBuffers )
|
|
{
|
|
log_error( "ERROR: Validation failure outside of a sub-buffer! (Shouldn't be possible, but it happened at index %ld out of %ld...)\n", i + j, mainSize );
|
|
// Since this is a nonsensical, don't bother continuing to check
|
|
// (we will, however, print our map of sub-buffers for comparison)
|
|
for ( size_t k = 0; k < numSubBuffers; k++ )
|
|
{
|
|
log_error( "\tBuffer %ld: %ld to %ld (length %ld)\n", k, subBuffers[ k ].mOrigin, subBuffers[ k ].mOrigin + subBuffers[ k ].mSize, subBuffers[ k ].mSize );
|
|
}
|
|
return -1;
|
|
}
|
|
log_error( "ERROR: Validation failure on sub-buffer %ld (start: %ld, length: %ld)\n", sbThatFailed, subBuffers[ sbThatFailed ].mOrigin, subBuffers[ sbThatFailed ].mSize );
|
|
size_t newPos = subBuffers[ sbThatFailed ].mOrigin + subBuffers[ sbThatFailed ].mSize - 1;
|
|
i = newPos & ~65535;
|
|
j = newPos - i;
|
|
numErrors++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
free(mainBufferContents);
|
|
free(actualResults);
|
|
Action::FreeRandSeed();
|
|
|
|
return numErrors;
|
|
}
|
|
|
|
int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
|
{
|
|
cl_int error;
|
|
size_t mainSize;
|
|
cl_uint addressAlignBits;
|
|
|
|
// Get the size of the main buffer to use
|
|
error = get_reasonable_buffer_size( deviceID, mainSize );
|
|
test_error( error, "Unable to get reasonable buffer size" );
|
|
|
|
// Determine the alignment of the device so we can make sure sub buffers are valid
|
|
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlignBits ), &addressAlignBits, NULL );
|
|
test_error( error, "Unable to get device's address alignment" );
|
|
|
|
size_t addressAlign = addressAlignBits/8;
|
|
|
|
return test_sub_buffers_read_write_core( context, queue, queue, mainSize, addressAlign );
|
|
}
|
|
|
|
// This test performs the same basic operations as sub_buffers_read_write, but instead of a single
|
|
// device, it creates a context and buffer shared between two devices, then executes commands
|
|
// on queues for each device to ensure that everything still operates as expected.
|
|
int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
|
{
|
|
cl_int error;
|
|
|
|
|
|
// First obtain the second device
|
|
cl_device_id otherDevice = GetOpposingDevice( deviceID );
|
|
if ( otherDevice == NULL )
|
|
{
|
|
log_error( "ERROR: Unable to obtain a second device for sub-buffer dual-device test.\n" );
|
|
return -1;
|
|
}
|
|
if ( otherDevice == deviceID )
|
|
{
|
|
log_info( "Note: Unable to run dual-device sub-buffer test (only one device available). Skipping test (implicitly passing).\n" );
|
|
return 0;
|
|
}
|
|
|
|
// Determine the device id.
|
|
size_t param_size;
|
|
error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, ¶m_size );
|
|
test_error( error, "Error obtaining device name" );
|
|
|
|
#if !(defined(_WIN32) && defined(_MSC_VER))
|
|
char device_name[param_size];
|
|
#else
|
|
char* device_name = (char*)_malloca(param_size);
|
|
#endif
|
|
error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
|
|
test_error( error, "Error obtaining device name" );
|
|
|
|
log_info( "\tOther device obtained for dual device test is type %s\n", device_name );
|
|
|
|
// Create a shared context for these two devices
|
|
cl_device_id devices[ 2 ] = { deviceID, otherDevice };
|
|
clContextWrapper testingContext = clCreateContext( NULL, 2, devices, NULL, NULL, &error );
|
|
test_error( error, "Unable to create shared context" );
|
|
|
|
// Create two queues (can't use the existing one, because it's on the wrong context)
|
|
clCommandQueueWrapper queue1 = clCreateCommandQueue( testingContext, deviceID, 0, &error );
|
|
test_error( error, "Unable to create command queue on main device" );
|
|
|
|
clCommandQueueWrapper queue2 = clCreateCommandQueue( testingContext, otherDevice, 0, &error );
|
|
test_error( error, "Unable to create command queue on secondary device" );
|
|
|
|
// Determine the reasonable buffer size and address alignment that applies to BOTH devices
|
|
size_t maxBuffer1, maxBuffer2;
|
|
error = get_reasonable_buffer_size( deviceID, maxBuffer1 );
|
|
test_error( error, "Unable to get buffer size for main device" );
|
|
|
|
error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
|
|
test_error( error, "Unable to get buffer size for secondary device" );
|
|
maxBuffer1 = MIN( maxBuffer1, maxBuffer2 );
|
|
|
|
cl_uint addressAlign1Bits, addressAlign2Bits;
|
|
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
|
|
test_error( error, "Unable to get main device's address alignment" );
|
|
|
|
error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
|
|
test_error( error, "Unable to get secondary device's address alignment" );
|
|
|
|
cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8;
|
|
|
|
// Finally time to run!
|
|
return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
|
|
}
|
|
|
|
cl_int read_buffer_via_kernel( cl_context context, cl_command_queue queue, cl_mem buffer, size_t length, cl_char *outResults )
|
|
{
|
|
const char *kernelCode[] = {
|
|
"__kernel void readTest( __global char *inBuffer, __global char *outBuffer )\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
" outBuffer[ tid ] = inBuffer[ tid ];\n"
|
|
"}\n" };
|
|
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
cl_int error;
|
|
|
|
if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
size_t threads[1] = { length };
|
|
|
|
clMemWrapper outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, length, NULL, &error );
|
|
test_error( error, "Unable to create output stream" );
|
|
|
|
error = clSetKernelArg( kernel, 0, sizeof( buffer ), &buffer );
|
|
test_error( error, "Unable to set kernel argument" );
|
|
error = clSetKernelArg( kernel, 1, sizeof( outStream ), &outStream );
|
|
test_error( error, "Unable to set kernel argument" );
|
|
|
|
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
|
test_error( error, "Unable to queue kernel" );
|
|
|
|
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, length, outResults, 0, NULL, NULL );
|
|
test_error( error, "Unable to read results from kernel" );
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
|
|
int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
|
|
{
|
|
cl_int error;
|
|
size_t mainSize;
|
|
cl_uint addressAlign;
|
|
|
|
clMemWrapper mainBuffer;
|
|
SubBufferWrapper subBuffers[ 16 ];
|
|
|
|
|
|
// Create the main buffer to test against
|
|
error = get_reasonable_buffer_size( deviceID, mainSize );
|
|
test_error( error, "Unable to get reasonable buffer size" );
|
|
|
|
mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, mainSize, NULL, &error );
|
|
test_error( error, "Unable to create test main buffer" );
|
|
|
|
// Determine the alignment of the device so we can make sure sub buffers are valid
|
|
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign ), &addressAlign, NULL );
|
|
test_error( error, "Unable to get device's address alignment" );
|
|
|
|
// Create some sub-buffers to use. Note: they don't have to not overlap (we actually *want* them to overlap)
|
|
for ( size_t i = 0; i < 16; i++ )
|
|
{
|
|
size_t offset = get_random_size_t( 0, mainSize / addressAlign, Action::GetRandSeed() ) * addressAlign;
|
|
size_t size = get_random_size_t( 1, ( mainSize - offset ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
|
|
|
|
error = subBuffers[ i ].Allocate( mainBuffer, CL_MEM_READ_ONLY, offset, size );
|
|
test_error( error, "Unable to allocate sub buffer" );
|
|
}
|
|
|
|
/// For logging, we determine the amount of overlap we just generated
|
|
// Build a fast in-out map to help with generating the stats
|
|
int sbMap[ 32 ], mapSize = 0;
|
|
for ( int i = 0; i < 16; i++ )
|
|
{
|
|
int j;
|
|
for ( j = 0; j < mapSize; j++ )
|
|
{
|
|
size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
|
|
: subBuffers[ sbMap[ j ] ].mOrigin;
|
|
if ( subBuffers[ i ].mOrigin < pt )
|
|
{
|
|
// Origin is before this part of the map, so move map forward so we can insert
|
|
memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
|
|
sbMap[ j ] = i;
|
|
mapSize++;
|
|
break;
|
|
}
|
|
}
|
|
if ( j == mapSize )
|
|
{
|
|
sbMap[ j ] = i;
|
|
mapSize++;
|
|
}
|
|
|
|
size_t endPt = subBuffers[ i ].mOrigin + subBuffers[ i ].mSize;
|
|
for ( j = 0; j < mapSize; j++ )
|
|
{
|
|
size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
|
|
: subBuffers[ sbMap[ j ] ].mOrigin;
|
|
if ( endPt < pt )
|
|
{
|
|
// Origin is before this part of the map, so move map forward so we can insert
|
|
memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
|
|
sbMap[ j ] = -( i + 1 );
|
|
mapSize++;
|
|
break;
|
|
}
|
|
}
|
|
if ( j == mapSize )
|
|
{
|
|
sbMap[ j ] = -( i + 1 );
|
|
mapSize++;
|
|
}
|
|
}
|
|
long long delta = 0;
|
|
size_t maxOverlap = 1, overlap = 0;
|
|
for ( int i = 0; i < 32; i++ )
|
|
{
|
|
if ( sbMap[ i ] >= 0 )
|
|
{
|
|
overlap++;
|
|
if ( overlap > 1 )
|
|
delta -= (long long)( subBuffers[ sbMap[ i ] ].mOrigin );
|
|
if ( overlap > maxOverlap )
|
|
maxOverlap = overlap;
|
|
}
|
|
else
|
|
{
|
|
if ( overlap > 1 )
|
|
delta += (long long)( subBuffers[ -sbMap[ i ] - 1 ].mOrigin + subBuffers[ -sbMap[ i ] - 1 ].mSize );
|
|
overlap--;
|
|
}
|
|
}
|
|
|
|
log_info( "\tTesting %d sub-buffers with %lld overlapping Kbytes (%d%%; as many as %ld buffers overlapping at once)\n",
|
|
16, ( delta / 1024LL ), (int)( delta * 100LL / (long long)mainSize ), maxOverlap );
|
|
|
|
// Write some random contents to the main buffer
|
|
cl_char * contents = new cl_char[ mainSize ];
|
|
generate_random_data( kChar, mainSize, Action::GetRandSeed(), contents );
|
|
|
|
error = clEnqueueWriteBuffer( queue, mainBuffer, CL_TRUE, 0, mainSize, contents, 0, NULL, NULL );
|
|
test_error( error, "Unable to write to main buffer" );
|
|
|
|
// Now read from each sub-buffer and check to make sure that they make sense w.r.t. the main contents
|
|
cl_char * tempBuffer = new cl_char[ mainSize ];
|
|
|
|
int numErrors = 0;
|
|
for ( size_t i = 0; i < 16; i++ )
|
|
{
|
|
// Read from this buffer
|
|
int which = random_in_range( 0, 1, Action::GetRandSeed() );
|
|
if ( which )
|
|
error = clEnqueueReadBuffer( queue, subBuffers[ i ], CL_TRUE, 0, subBuffers[ i ].mSize, tempBuffer, 0, NULL, NULL );
|
|
else
|
|
error = read_buffer_via_kernel( context, queue, subBuffers[ i ], subBuffers[ i ].mSize, tempBuffer );
|
|
test_error( error, "Unable to read sub buffer contents" );
|
|
|
|
if ( memcmp( tempBuffer, contents + subBuffers[ i ].mOrigin, subBuffers[ i ].mSize ) != 0 )
|
|
{
|
|
log_error( "ERROR: Validation for sub-buffer %ld failed!\n", i );
|
|
numErrors++;
|
|
}
|
|
}
|
|
|
|
delete [] contents;
|
|
delete [] tempBuffer;
|
|
Action::FreeRandSeed();
|
|
|
|
return numErrors;
|
|
}
|
|
|