//
// Copyright (c) 2017 The Khronos Group Inc.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"

// Design:
// To test sub buffers, we first create one main buffer. We then create several sub-buffers and
// queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
// what results it expects, and so we can test scaling degrees of Actions on scaling numbers of
// sub-buffers.

class SubBufferWrapper : public clMemWrapper
{
public:
    cl_mem mParentBuffer;
    size_t mOrigin;
    size_t mSize;

    cl_int Allocate( cl_mem parent, cl_mem_flags flags, size_t origin, size_t size )
    {
        mParentBuffer = parent;
        mOrigin = origin;
        mSize = size;

        cl_buffer_region region;
        region.origin = mOrigin;
        region.size = mSize;

        cl_int error;
        mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
        return error;
    }
};

class Action
{
public:
    virtual ~Action() {}
    virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) = 0;
    virtual const char * GetName( void ) const = 0;

    static MTdata d;
    static MTdata GetRandSeed( void )
    {
        if ( d == 0 )
            d = init_genrand( gRandomSeed );
        return d;
    }
    static void FreeRandSeed() {
        if ( d != 0 ) {
            free_mtdata(d);
            d = 0;
        }
    }
};

MTdata Action::d = 0;

class ReadWriteAction : public Action
{
public:
    virtual ~ReadWriteAction() {}
    virtual const char * GetName( void ) const { return "ReadWrite";}

    virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
    {
        cl_char *tempBuffer = (cl_char*)malloc(buffer1.mSize);
        if (!tempBuffer) {
            log_error("Out of memory\n");
            return -1;
        }
        cl_int error = clEnqueueReadBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
        test_error( error, "Unable to enqueue buffer read" );

        size_t start = get_random_size_t( 0, buffer1.mSize / 2, GetRandSeed() );
        size_t end = get_random_size_t( start, buffer1.mSize, GetRandSeed() );

        for ( size_t i = start; i < end; i++ )
        {
            tempBuffer[ i ] |= tag;
            parentBufferState[ i + buffer1.mOrigin ] |= tag;
        }

        error = clEnqueueWriteBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
        test_error( error, "Unable to enqueue buffer write" );
        free(tempBuffer);
        return CL_SUCCESS;
    }
};

#ifndef MAX
#define MAX( _a, _b )   ( (_a) > (_b) ? (_a) : (_b) )
#endif
#ifndef MIN
#define MIN( _a, _b )   ( (_a) < (_b) ? (_a) : (_b) )
#endif

class CopyAction : public Action
{
public:
    virtual ~CopyAction() {}
    virtual const char * GetName( void ) const { return "Copy";}

    virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
    {
        // Copy from sub-buffer 1 to sub-buffer 2
        size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() );

        size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
        size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );

        cl_int error = clEnqueueCopyBuffer( queue, buffer1, buffer2, startOffset, endOffset, size, 0, NULL, NULL );
        test_error( error, "Unable to enqueue buffer copy" );

        memcpy( parentBufferState + buffer2.mOrigin + endOffset, parentBufferState + buffer1.mOrigin + startOffset, size );

        return CL_SUCCESS;
    }
};

class MapAction : public Action
{
public:
    virtual ~MapAction() {}
    virtual const char * GetName( void ) const { return "Map";}

    virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
    {
        size_t size = get_random_size_t( 0, buffer1.mSize, GetRandSeed() );
        size_t start = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );

        cl_int error;
        void * mappedPtr = clEnqueueMapBuffer( queue, buffer1, CL_TRUE, (cl_map_flags)( CL_MAP_READ | CL_MAP_WRITE ),
                                               start, size, 0, NULL, NULL, &error );
        test_error( error, "Unable to map buffer" );

        cl_char *cPtr = (cl_char *)mappedPtr;
        for ( size_t i = 0; i < size; i++ )
        {
            cPtr[ i ] |= tag;
            parentBufferState[ i + start + buffer1.mOrigin ] |= tag;
        }

        error = clEnqueueUnmapMemObject( queue, buffer1, mappedPtr, 0, NULL, NULL );
        test_error( error, "Unable to unmap buffer" );

        return CL_SUCCESS;
    }
};

class KernelReadWriteAction : public Action
{
public:
    virtual ~KernelReadWriteAction() {}
    virtual const char * GetName( void ) const { return "KernelReadWrite";}

    virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
    {
        const char *kernelCode[] = {
            "__kernel void readTest( __global char *inBuffer, char tag )\n"
            "{\n"
            "    int tid = get_global_id(0);\n"
            "    inBuffer[ tid ] |= tag;\n"
            "}\n" };

        clProgramWrapper program;
        clKernelWrapper kernel;
        cl_int error;

        if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
        {
            return -1;
        }

        size_t threads[1] = { buffer1.mSize };

        error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &buffer1 );
        test_error( error, "Unable to set kernel argument" );
        error = clSetKernelArg( kernel, 1, sizeof( tag ), &tag );
        test_error( error, "Unable to set kernel argument" );

        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
        test_error( error, "Unable to queue kernel" );

        for ( size_t i = 0; i < buffer1.mSize; i++ )
            parentBufferState[ i + buffer1.mOrigin ] |= tag;

        return CL_SUCCESS;
    }
};

cl_int get_reasonable_buffer_size( cl_device_id device, size_t &outSize )
{
    cl_ulong maxAllocSize;
    cl_int error;

    // Get the largest possible buffer we could allocate
    error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
    test_error( error, "Unable to get max alloc size" );

    // Don't create a buffer quite that big, just so we have some space left over for other work
    outSize = (size_t)( maxAllocSize / 5 );

    // Cap at 32M so tests complete in a reasonable amount of time.
    if ( outSize > 32 << 20 )
        outSize = 32 << 20;

    return CL_SUCCESS;
}

size_t find_subbuffer_by_index( SubBufferWrapper * subBuffers, size_t numSubBuffers, size_t index )
{
    for ( size_t i = 0; i < numSubBuffers; i++ )
    {
        if ( subBuffers[ i ].mOrigin > index )
            return numSubBuffers;
        if ( ( subBuffers[ i ].mOrigin <= index ) && ( ( subBuffers[ i ].mOrigin + subBuffers[ i ].mSize ) > index ) )
            return i;
    }
    return numSubBuffers;
}

// This tests the read/write capabilities of sub buffers (if we are read/write, the sub buffers
// can't overlap)
int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queueA, cl_command_queue queueB, size_t mainSize, size_t addressAlign )
{
    clMemWrapper mainBuffer;
    SubBufferWrapper subBuffers[ 8 ];
    size_t numSubBuffers;
    cl_int error;
    size_t i;
    MTdata m = init_genrand( 22 );


    cl_char * mainBufferContents = (cl_char*)calloc(1,mainSize);
    cl_char * actualResults      = (cl_char*)calloc(1,mainSize);

    for ( i = 0; i < mainSize / 4; i++ )
        ((cl_uint*) mainBufferContents)[i] = genrand_int32(m);

    free_mtdata( m );

    // Create the main buffer to test against
    mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, mainSize, mainBufferContents, &error );
    test_error( error, "Unable to create test main buffer" );

    // Create some sub-buffers to use
    size_t toStartFrom = 0;
    for ( numSubBuffers = 0; numSubBuffers < 8; numSubBuffers++ )
    {
        size_t endRange = toStartFrom + ( mainSize / 4 );
        if ( endRange > mainSize )
            endRange = mainSize;

        size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
        size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
        error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
        test_error( error, "Unable to allocate sub buffer" );

        toStartFrom = offset + size;
        if ( toStartFrom > ( mainSize - ( addressAlign * 256 ) ) )
            break;
    }

    ReadWriteAction rwAction;
    MapAction mapAction;
    CopyAction copyAction;
    KernelReadWriteAction kernelAction;

    Action * actions[] = { &rwAction, &mapAction, &copyAction, &kernelAction };
    int numErrors = 0;

    // Do the following steps twice, to make sure the parent gets updated *and* we can
    // still work on the sub-buffers
    cl_command_queue prev_queue = queueA;
    for ( int time = 0; time < 2; time++ )
    {
        // Randomly apply actions to the set of sub buffers
        size_t i;
        for (  i = 0; i < 64; i++ )
        {
            int which = random_in_range( 0, 3, Action::GetRandSeed() );
            int whichQueue = random_in_range( 0, 1, Action::GetRandSeed() );
            int whichBufferA = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
            int whichBufferB;
            do
            {
                whichBufferB = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
            } while ( whichBufferB == whichBufferA );

            cl_command_queue queue = ( whichQueue == 1 ) ? queueB : queueA;
            if (queue != prev_queue) {
                error = clFinish( prev_queue );
                test_error( error, "Error finishing other queue." );

                prev_queue = queue;
            }

            error = actions[ which ]->Execute( context, queue, (cl_int)i, subBuffers[ whichBufferA ], subBuffers[ whichBufferB ], mainBufferContents );
            test_error( error, "Unable to execute action against sub buffers" );
        }

        error = clFinish( queueA );
        test_error( error, "Error finishing queueA." );

        error = clFinish( queueB );
        test_error( error, "Error finishing queueB." );

        // Validate by reading the final contents of the main buffer and
        // validating against our ref copy we generated
        error = clEnqueueReadBuffer( queueA, mainBuffer, CL_TRUE, 0, mainSize, actualResults, 0, NULL, NULL );
        test_error( error, "Unable to enqueue buffer read" );

        for ( i = 0; i < mainSize; i += 65536 )
        {
            size_t left = 65536;
            if ( ( i + left ) > mainSize )
                left = mainSize - i;

            if ( memcmp( actualResults + i, mainBufferContents + i, left ) == 0 )
                continue;

            // The fast compare failed, so we need to determine where exactly the failure is

            for ( size_t j = 0; j < left; j++ )
            {
                if ( actualResults[ i + j ] != mainBufferContents[ i + j ] )
                {
                    // Hit a failure; report the subbuffer at this address as having failed
                    size_t sbThatFailed = find_subbuffer_by_index( subBuffers, numSubBuffers, i + j );
                    if ( sbThatFailed == numSubBuffers )
                    {
                        log_error( "ERROR: Validation failure outside of a sub-buffer! (Shouldn't be possible, but it happened at index %ld out of %ld...)\n", i + j, mainSize );
                        // Since this is a nonsensical, don't bother continuing to check
                        // (we will, however, print our map of sub-buffers for comparison)
                        for ( size_t k = 0; k < numSubBuffers; k++ )
                        {
                            log_error( "\tBuffer %ld: %ld to %ld (length %ld)\n", k, subBuffers[ k ].mOrigin, subBuffers[ k ].mOrigin + subBuffers[ k ].mSize, subBuffers[ k ].mSize );
                        }
                        return -1;
                    }
                    log_error( "ERROR: Validation failure on sub-buffer %ld (start: %ld, length: %ld)\n", sbThatFailed, subBuffers[ sbThatFailed ].mOrigin, subBuffers[ sbThatFailed ].mSize );
                    size_t newPos = subBuffers[ sbThatFailed ].mOrigin + subBuffers[ sbThatFailed ].mSize - 1;
                    i = newPos & ~65535;
                    j = newPos - i;
                    numErrors++;
                }
            }
        }
    }

    free(mainBufferContents);
    free(actualResults);
    Action::FreeRandSeed();

    return numErrors;
}

int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
    cl_int error;
    size_t mainSize;
    cl_uint addressAlignBits;

    // Get the size of the main buffer to use
    error = get_reasonable_buffer_size( deviceID, mainSize );
    test_error( error, "Unable to get reasonable buffer size" );

    // Determine the alignment of the device so we can make sure sub buffers are valid
    error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlignBits ), &addressAlignBits, NULL );
    test_error( error, "Unable to get device's address alignment" );

    size_t addressAlign = addressAlignBits/8;

    return test_sub_buffers_read_write_core( context, queue, queue, mainSize, addressAlign );
}

// This test performs the same basic operations as sub_buffers_read_write, but instead of a single
// device, it creates a context and buffer shared between two devices, then executes commands
// on queues for each device to ensure that everything still operates as expected.
int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
    cl_int error;


    // First obtain the second device
    cl_device_id otherDevice = GetOpposingDevice( deviceID );
    if ( otherDevice == NULL )
    {
        log_error( "ERROR: Unable to obtain a second device for sub-buffer dual-device test.\n" );
        return -1;
    }
    if ( otherDevice == deviceID )
    {
        log_info( "Note: Unable to run dual-device sub-buffer test (only one device available). Skipping test (implicitly passing).\n" );
        return 0;
    }

    // Determine the device id.
    size_t param_size;
    error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, &param_size );
    test_error( error, "Error obtaining device name" );

#if !(defined(_WIN32) && defined(_MSC_VER))
    char device_name[param_size];
#else
    char* device_name = (char*)_malloca(param_size);
#endif
    error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
    test_error( error, "Error obtaining device name" );

    log_info( "\tOther device obtained for dual device test is type %s\n", device_name );

    // Create a shared context for these two devices
    cl_device_id devices[ 2 ] = { deviceID, otherDevice };
    clContextWrapper testingContext = clCreateContext( NULL, 2, devices, NULL, NULL, &error );
    test_error( error, "Unable to create shared context" );

    // Create two queues (can't use the existing one, because it's on the wrong context)
    clCommandQueueWrapper queue1 = clCreateCommandQueue( testingContext, deviceID, 0, &error );
    test_error( error, "Unable to create command queue on main device" );

    clCommandQueueWrapper queue2 = clCreateCommandQueue( testingContext, otherDevice, 0, &error );
    test_error( error, "Unable to create command queue on secondary device" );

    // Determine the reasonable buffer size and address alignment that applies to BOTH devices
    size_t maxBuffer1, maxBuffer2;
    error = get_reasonable_buffer_size( deviceID, maxBuffer1 );
    test_error( error, "Unable to get buffer size for main device" );

    error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
    test_error( error, "Unable to get buffer size for secondary device" );
    maxBuffer1 = MIN( maxBuffer1, maxBuffer2 );

    cl_uint addressAlign1Bits, addressAlign2Bits;
    error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
    test_error( error, "Unable to get main device's address alignment" );

    error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
    test_error( error, "Unable to get secondary device's address alignment" );

    cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8;

    // Finally time to run!
    return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
}

cl_int read_buffer_via_kernel( cl_context context, cl_command_queue queue, cl_mem buffer, size_t length, cl_char *outResults )
{
    const char *kernelCode[] = {
        "__kernel void readTest( __global char *inBuffer, __global char *outBuffer )\n"
        "{\n"
        "    int tid = get_global_id(0);\n"
        "    outBuffer[ tid ] = inBuffer[ tid ];\n"
        "}\n" };

    clProgramWrapper program;
    clKernelWrapper kernel;
    cl_int error;

    if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
    {
        return -1;
    }

    size_t threads[1] = { length };

    clMemWrapper outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, length, NULL, &error );
    test_error( error, "Unable to create output stream" );

    error = clSetKernelArg( kernel, 0, sizeof( buffer ), &buffer );
    test_error( error, "Unable to set kernel argument" );
    error = clSetKernelArg( kernel, 1, sizeof( outStream ), &outStream );
    test_error( error, "Unable to set kernel argument" );

    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
    test_error( error, "Unable to queue kernel" );

    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, length, outResults, 0, NULL, NULL );
    test_error( error, "Unable to read results from kernel" );

    return CL_SUCCESS;
}


int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
{
    cl_int error;
    size_t mainSize;
    cl_uint addressAlign;

    clMemWrapper mainBuffer;
    SubBufferWrapper subBuffers[ 16 ];


    // Create the main buffer to test against
    error = get_reasonable_buffer_size( deviceID, mainSize );
    test_error( error, "Unable to get reasonable buffer size" );

    mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, mainSize, NULL, &error );
    test_error( error, "Unable to create test main buffer" );

    // Determine the alignment of the device so we can make sure sub buffers are valid
    error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign ), &addressAlign, NULL );
    test_error( error, "Unable to get device's address alignment" );

    // Create some sub-buffers to use. Note: they don't have to not overlap (we actually *want* them to overlap)
    for ( size_t i = 0; i < 16; i++ )
    {
        size_t offset = get_random_size_t( 0, mainSize / addressAlign, Action::GetRandSeed() ) * addressAlign;
        size_t size = get_random_size_t( 1, ( mainSize - offset ) / addressAlign, Action::GetRandSeed() ) * addressAlign;

        error = subBuffers[ i ].Allocate( mainBuffer, CL_MEM_READ_ONLY, offset, size );
        test_error( error, "Unable to allocate sub buffer" );
    }

    /// For logging, we determine the amount of overlap we just generated
    // Build a fast in-out map to help with generating the stats
    int sbMap[ 32 ], mapSize = 0;
    for ( int i = 0; i < 16; i++ )
    {
        int j;
        for ( j = 0; j < mapSize; j++ )
        {
            size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
                        : subBuffers[ sbMap[ j ] ].mOrigin;
            if ( subBuffers[ i ].mOrigin < pt )
            {
                // Origin is before this part of the map, so move map forward so we can insert
                memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
                sbMap[ j ] = i;
                mapSize++;
                break;
            }
        }
        if ( j == mapSize )
        {
            sbMap[ j ] = i;
            mapSize++;
        }

        size_t endPt = subBuffers[ i ].mOrigin + subBuffers[ i ].mSize;
        for ( j = 0; j < mapSize; j++ )
        {
            size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
                        : subBuffers[ sbMap[ j ] ].mOrigin;
            if ( endPt < pt )
            {
                // Origin is before this part of the map, so move map forward so we can insert
                memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
                sbMap[ j ] = -( i + 1 );
                mapSize++;
                break;
            }
        }
        if ( j == mapSize )
        {
            sbMap[ j ] = -( i + 1 );
            mapSize++;
        }
    }
    long long delta = 0;
    size_t maxOverlap = 1, overlap = 0;
    for ( int i = 0; i < 32; i++ )
    {
        if ( sbMap[ i ] >= 0 )
        {
            overlap++;
            if ( overlap > 1 )
                delta -= (long long)( subBuffers[ sbMap[ i ] ].mOrigin );
            if ( overlap > maxOverlap )
                maxOverlap = overlap;
        }
        else
        {
            if ( overlap > 1 )
                delta += (long long)( subBuffers[ -sbMap[ i ] - 1 ].mOrigin + subBuffers[ -sbMap[ i ] - 1 ].mSize );
            overlap--;
        }
    }

    log_info( "\tTesting %d sub-buffers with %lld overlapping Kbytes (%d%%; as many as %ld buffers overlapping at once)\n",
              16, ( delta / 1024LL ), (int)( delta * 100LL / (long long)mainSize ), maxOverlap );

    // Write some random contents to the main buffer
    cl_char * contents = new cl_char[ mainSize ];
    generate_random_data( kChar, mainSize, Action::GetRandSeed(), contents );

    error = clEnqueueWriteBuffer( queue, mainBuffer, CL_TRUE, 0, mainSize, contents, 0, NULL, NULL );
    test_error( error, "Unable to write to main buffer" );

    // Now read from each sub-buffer and check to make sure that they make sense w.r.t. the main contents
    cl_char * tempBuffer = new cl_char[ mainSize ];

    int numErrors = 0;
    for ( size_t i = 0; i < 16; i++ )
    {
        // Read from this buffer
        int which = random_in_range( 0, 1, Action::GetRandSeed() );
        if ( which )
            error = clEnqueueReadBuffer( queue, subBuffers[ i ], CL_TRUE, 0, subBuffers[ i ].mSize, tempBuffer, 0, NULL, NULL );
        else
            error = read_buffer_via_kernel( context, queue, subBuffers[ i ], subBuffers[ i ].mSize, tempBuffer );
        test_error( error, "Unable to read sub buffer contents" );

        if ( memcmp( tempBuffer, contents + subBuffers[ i ].mOrigin, subBuffers[ i ].mSize ) != 0 )
        {
            log_error( "ERROR: Validation for sub-buffer %ld failed!\n", i );
            numErrors++;
        }
    }

    delete [] contents;
    delete [] tempBuffer;
    Action::FreeRandSeed();

    return numErrors;
}