265 lines
8.9 KiB
C++
265 lines
8.9 KiB
C++
/*
|
|
* Copyright (C) 2021 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H
|
|
#define ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H
|
|
|
|
#include <android-base/thread_annotations.h>
|
|
|
|
#include <atomic>
|
|
#include <condition_variable>
|
|
#include <cstddef>
|
|
#include <mutex>
|
|
#include <thread>
|
|
#include <vector>
|
|
|
|
namespace android {
|
|
namespace renderscript {
|
|
|
|
/**
|
|
* Description of the data to be processed for one Toolkit method call, e.g. one blur or one
|
|
* blend operation.
|
|
*
|
|
* The data to be processed is a 2D array of cells. Each cell is a vector of 1 to 4 unsigned bytes.
|
|
* The most typical configuration is a 2D array of uchar4 used to represent RGBA images.
|
|
*
|
|
* This is a base class. There will be a subclass for each Toolkit op.
|
|
*
|
|
* Typical usage of a derived class would look like:
|
|
* BlurTask task(in, out, sizeX, sizeY, vectorSize, etc);
|
|
* processor->doTask(&task);
|
|
*
|
|
* The TaskProcessor should call setTiling() and setUsesSimd() once, before calling processTile().
|
|
* Other classes should not call setTiling(), setUsesSimd(), and processTile().
|
|
*/
|
|
class Task {
|
|
protected:
|
|
/**
|
|
* Number of cells in the X direction.
|
|
*/
|
|
const size_t mSizeX;
|
|
/**
|
|
* Number of cells in the Y direction.
|
|
*/
|
|
const size_t mSizeY;
|
|
/**
|
|
* Number of elements in a vector (cell). From 1-4.
|
|
*/
|
|
const size_t mVectorSize;
|
|
/**
|
|
* Whether the task prefers the processData call to represent the work to be done as
|
|
* one line rather than a rectangle. This would be the case for work that don't involve
|
|
* vertical neighbors, e.g. blend or histogram. A task would prefer this to minimize the
|
|
* number of SIMD calls to make, i.e. have one call that covers all the rows.
|
|
*
|
|
* This setting will be used only when a tile covers the entire width of the data to be
|
|
* processed.
|
|
*/
|
|
const bool mPrefersDataAsOneRow;
|
|
/**
|
|
* Whether the processor we're working on supports SIMD operations.
|
|
*/
|
|
bool mUsesSimd = false;
|
|
|
|
private:
|
|
/**
|
|
* If not null, we'll process a subset of the whole 2D array. This specifies the restriction.
|
|
*/
|
|
const struct Restriction* mRestriction;
|
|
|
|
/**
|
|
* We'll divide the work into rectangular tiles. See setTiling().
|
|
*/
|
|
|
|
/**
|
|
* Size of a tile in the X direction, as a number of cells.
|
|
*/
|
|
size_t mCellsPerTileX = 0;
|
|
/**
|
|
* Size of a tile in the Y direction, as a number of cells.
|
|
*/
|
|
size_t mCellsPerTileY = 0;
|
|
/**
|
|
* Number of tiles per row of the restricted area we're working on.
|
|
*/
|
|
size_t mTilesPerRow = 0;
|
|
/**
|
|
* Number of tiles per column of the restricted area we're working on.
|
|
*/
|
|
size_t mTilesPerColumn = 0;
|
|
|
|
public:
|
|
/**
|
|
* Construct a task.
|
|
*
|
|
* sizeX and sizeY should be greater than 0. vectorSize should be between 1 and 4.
|
|
* The restriction should outlive this instance. The Toolkit validates the
|
|
* arguments so we won't do that again here.
|
|
*/
|
|
Task(size_t sizeX, size_t sizeY, size_t vectorSize, bool prefersDataAsOneRow,
|
|
const Restriction* restriction)
|
|
: mSizeX{sizeX},
|
|
mSizeY{sizeY},
|
|
mVectorSize{vectorSize},
|
|
mPrefersDataAsOneRow{prefersDataAsOneRow},
|
|
mRestriction{restriction} {}
|
|
virtual ~Task() {}
|
|
|
|
void setUsesSimd(bool uses) { mUsesSimd = uses; }
|
|
|
|
/**
|
|
* Divide the work into a number of tiles that can be distributed to the various threads.
|
|
* A tile will be a rectangular region. To be robust, we'll want to handle regular cases
|
|
* like 400x300 but also unusual ones like 1x120000, 120000x1, 1x1.
|
|
*
|
|
* We have a target size for the tiles, which corresponds roughly to how much data a thread
|
|
* will want to process before checking for more work. If the target is set too low, we'll spend
|
|
* more time in synchronization. If it's too large, some cores may not be used as efficiently.
|
|
*
|
|
* This method returns the number of tiles.
|
|
*
|
|
* @param targetTileSizeInBytes Target size. Values less than 1000 will be treated as 1000.
|
|
*/
|
|
int setTiling(unsigned int targetTileSizeInBytes);
|
|
|
|
/**
|
|
* This is called by the TaskProcessor to instruct the task to process a tile.
|
|
*
|
|
* @param threadIndex The index of the thread that's processing the tile.
|
|
* @param tileIndex The index of the tile to process.
|
|
*/
|
|
void processTile(unsigned int threadIndex, size_t tileIndex);
|
|
|
|
private:
|
|
/**
|
|
* Call to the derived class to process the data bounded by the rectangle specified
|
|
* by (startX, startY) and (endX, endY). The end values are EXCLUDED. This rectangle
|
|
* will be contained with the restriction, if one is provided.
|
|
*/
|
|
virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
|
|
size_t endY) = 0;
|
|
};
|
|
|
|
/**
|
|
* There's one instance of the task processor for the Toolkit. This class owns the thread pool,
|
|
* and dispatches the tiles of work to the threads.
|
|
*/
|
|
class TaskProcessor {
|
|
/**
|
|
* Does this processor support SIMD-like instructions?
|
|
*/
|
|
const bool mUsesSimd;
|
|
/**
|
|
* The number of separate threads we'll spawn. It's one less than the number of threads that
|
|
* do the work as the client thread that starts the work will also be used.
|
|
*/
|
|
const unsigned int mNumberOfPoolThreads;
|
|
/**
|
|
* Ensures that only one task is done at a time.
|
|
*/
|
|
std::mutex mTaskMutex;
|
|
/**
|
|
* Ensures consistent access to the shared queue state.
|
|
*/
|
|
std::mutex mQueueMutex;
|
|
/**
|
|
* The thread pool workers.
|
|
*/
|
|
std::vector<std::thread> mPoolThreads;
|
|
/**
|
|
* The task being processed, if any. We only do one task at a time. We could create a queue
|
|
* of tasks but using a mTaskMutex is sufficient for now.
|
|
*/
|
|
Task* mCurrentTask GUARDED_BY(mTaskMutex) = nullptr;
|
|
/**
|
|
* Signals that the mPoolThreads should terminate.
|
|
*/
|
|
bool mStopThreads GUARDED_BY(mQueueMutex) = false;
|
|
/**
|
|
* Signaled when work is available or the mPoolThreads need to shut down. mStopThreads is used
|
|
* to distinguish between the two.
|
|
*/
|
|
std::condition_variable mWorkAvailableOrStop;
|
|
/**
|
|
* Signaled when the work for the task is finished.
|
|
*/
|
|
std::condition_variable mWorkIsFinished;
|
|
/**
|
|
* A user task, e.g. a blend or a blur, is split into a number of tiles. When a thread starts
|
|
* working on a new tile, it uses this count to identify which tile to work on. The tile
|
|
* number is sufficient to determine the boundaries of the data to process.
|
|
*
|
|
* The number of tiles left to process.
|
|
*/
|
|
int mTilesNotYetStarted GUARDED_BY(mQueueMutex) = 0;
|
|
/**
|
|
* The number of tiles currently being processed. Must not be greater than
|
|
* mNumberOfPoolThreads + 1.
|
|
*/
|
|
int mTilesInProcess GUARDED_BY(mQueueMutex) = 0;
|
|
|
|
/**
|
|
* Determines how we'll tile the work and signals the thread pool of available work.
|
|
*
|
|
* @param task The task to be performed.
|
|
*/
|
|
void startWork(Task* task) REQUIRES(mTaskMutex);
|
|
|
|
/**
|
|
* Tells the thread to start processing work off the queue.
|
|
*
|
|
* The flag is used for prevent the main thread from blocking forever if the work is
|
|
* so trivial that the worker threads complete the work before the main thread calls this
|
|
* method.
|
|
*
|
|
* @param threadIndex The index number (0..mNumberOfPoolThreads) this thread will referred by.
|
|
* @param returnWhenNoWork If there's no work, return immediately.
|
|
*/
|
|
void processTilesOfWork(int threadIndex, bool returnWhenNoWork);
|
|
|
|
/**
|
|
* Wait for the pool workers to complete the work on the current task.
|
|
*/
|
|
void waitForPoolWorkersToComplete();
|
|
|
|
public:
|
|
/**
|
|
* Create the processor.
|
|
*
|
|
* @param numThreads The total number of threads to use. If 0, we'll decided based on system
|
|
* properties.
|
|
*/
|
|
explicit TaskProcessor(unsigned int numThreads = 0);
|
|
|
|
~TaskProcessor();
|
|
|
|
/**
|
|
* Do the specified task. Returns only after the task has been completed.
|
|
*/
|
|
void doTask(Task* task);
|
|
|
|
/**
|
|
* Some Tasks need to allocate temporary storage for each worker thread.
|
|
* This provides the number of threads.
|
|
*/
|
|
unsigned int getNumberOfThreads() const { return mNumberOfPoolThreads + 1; }
|
|
};
|
|
|
|
} // namespace renderscript
|
|
} // namespace android
|
|
|
|
#endif // ANDROID_RENDERSCRIPT_TOOLKIT_TASKPROCESSOR_H
|