300 lines
10 KiB
C++
300 lines
10 KiB
C++
/*
|
|
* Copyright (C) 2013 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <array>
|
|
#include <cstdint>
|
|
|
|
#include "RenderScriptToolkit.h"
|
|
#include "TaskProcessor.h"
|
|
#include "Utils.h"
|
|
|
|
#define LOG_TAG "renderscript.toolkit.Histogram"
|
|
|
|
namespace android {
|
|
namespace renderscript {
|
|
|
|
class HistogramTask : public Task {
|
|
const uchar* mIn;
|
|
std::vector<int> mSums;
|
|
uint32_t mThreadCount;
|
|
|
|
// Process a 2D tile of the overall work. threadIndex identifies which thread does the work.
|
|
virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
|
|
size_t endY) override;
|
|
|
|
void kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
|
|
void kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
|
|
void kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
|
|
void kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
|
|
|
|
public:
|
|
HistogramTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
|
|
uint32_t threadCount, const Restriction* restriction);
|
|
void collateSums(int* out);
|
|
};
|
|
|
|
class HistogramDotTask : public Task {
|
|
const uchar* mIn;
|
|
float mDot[4];
|
|
int mDotI[4];
|
|
std::vector<int> mSums;
|
|
uint32_t mThreadCount;
|
|
|
|
void kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
|
|
void kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
|
|
void kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
|
|
void kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend);
|
|
|
|
public:
|
|
HistogramDotTask(const uint8_t* in, size_t sizeX, size_t sizeY, size_t vectorSize,
|
|
uint32_t threadCount, const float* coefficients,
|
|
const Restriction* restriction);
|
|
void collateSums(int* out);
|
|
|
|
virtual void processData(int threadIndex, size_t startX, size_t startY, size_t endX,
|
|
size_t endY) override;
|
|
};
|
|
|
|
HistogramTask::HistogramTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
|
|
uint32_t threadCount, const Restriction* restriction)
|
|
: Task{sizeX, sizeY, vectorSize, true, restriction},
|
|
mIn{in},
|
|
mSums(256 * paddedSize(vectorSize) * threadCount) {
|
|
mThreadCount = threadCount;
|
|
}
|
|
|
|
void HistogramTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
|
|
size_t endY) {
|
|
typedef void (HistogramTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
|
|
|
|
KernelFunction kernel;
|
|
switch (mVectorSize) {
|
|
case 4:
|
|
kernel = &HistogramTask::kernelP1U4;
|
|
break;
|
|
case 3:
|
|
kernel = &HistogramTask::kernelP1U3;
|
|
break;
|
|
case 2:
|
|
kernel = &HistogramTask::kernelP1U2;
|
|
break;
|
|
case 1:
|
|
kernel = &HistogramTask::kernelP1U1;
|
|
break;
|
|
default:
|
|
ALOGE("Bad vector size %zd", mVectorSize);
|
|
return;
|
|
}
|
|
|
|
int* sums = &mSums[256 * paddedSize(mVectorSize) * threadIndex];
|
|
|
|
for (size_t y = startY; y < endY; y++) {
|
|
const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
|
|
std::invoke(kernel, this, inPtr, sums, startX, endX);
|
|
}
|
|
}
|
|
|
|
void HistogramTask::kernelP1U4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
|
|
for (uint32_t x = xstart; x < xend; x++) {
|
|
sums[(in[0] << 2)]++;
|
|
sums[(in[1] << 2) + 1]++;
|
|
sums[(in[2] << 2) + 2]++;
|
|
sums[(in[3] << 2) + 3]++;
|
|
in += 4;
|
|
}
|
|
}
|
|
|
|
void HistogramTask::kernelP1U3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
|
|
for (uint32_t x = xstart; x < xend; x++) {
|
|
sums[(in[0] << 2)]++;
|
|
sums[(in[1] << 2) + 1]++;
|
|
sums[(in[2] << 2) + 2]++;
|
|
in += 4;
|
|
}
|
|
}
|
|
|
|
void HistogramTask::kernelP1U2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
|
|
for (uint32_t x = xstart; x < xend; x++) {
|
|
sums[(in[0] << 1)]++;
|
|
sums[(in[1] << 1) + 1]++;
|
|
in += 2;
|
|
}
|
|
}
|
|
|
|
void HistogramTask::kernelP1U1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
|
|
for (uint32_t x = xstart; x < xend; x++) {
|
|
sums[in[0]]++;
|
|
in++;
|
|
}
|
|
}
|
|
|
|
void HistogramTask::collateSums(int* out) {
|
|
for (uint32_t ct = 0; ct < (256 * paddedSize(mVectorSize)); ct++) {
|
|
out[ct] = mSums[ct];
|
|
for (uint32_t t = 1; t < mThreadCount; t++) {
|
|
out[ct] += mSums[ct + (256 * paddedSize(mVectorSize) * t)];
|
|
}
|
|
}
|
|
}
|
|
|
|
HistogramDotTask::HistogramDotTask(const uchar* in, size_t sizeX, size_t sizeY, size_t vectorSize,
|
|
uint32_t threadCount, const float* coefficients,
|
|
const Restriction* restriction)
|
|
: Task{sizeX, sizeY, vectorSize, true, restriction}, mIn{in}, mSums(256 * threadCount, 0) {
|
|
mThreadCount = threadCount;
|
|
|
|
if (coefficients == nullptr) {
|
|
mDot[0] = 0.299f;
|
|
mDot[1] = 0.587f;
|
|
mDot[2] = 0.114f;
|
|
mDot[3] = 0;
|
|
} else {
|
|
memcpy(mDot, coefficients, 16);
|
|
}
|
|
mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
|
|
mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
|
|
mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
|
|
mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
|
|
}
|
|
|
|
void HistogramDotTask::processData(int threadIndex, size_t startX, size_t startY, size_t endX,
|
|
size_t endY) {
|
|
typedef void (HistogramDotTask::*KernelFunction)(const uchar*, int*, uint32_t, uint32_t);
|
|
|
|
KernelFunction kernel;
|
|
switch (mVectorSize) {
|
|
case 4:
|
|
kernel = &HistogramDotTask::kernelP1L4;
|
|
break;
|
|
case 3:
|
|
kernel = &HistogramDotTask::kernelP1L3;
|
|
break;
|
|
case 2:
|
|
kernel = &HistogramDotTask::kernelP1L2;
|
|
break;
|
|
case 1:
|
|
kernel = &HistogramDotTask::kernelP1L1;
|
|
break;
|
|
default:
|
|
ALOGI("Bad vector size %zd", mVectorSize);
|
|
return;
|
|
}
|
|
|
|
int* sums = &mSums[256 * threadIndex];
|
|
|
|
for (size_t y = startY; y < endY; y++) {
|
|
const uchar* inPtr = mIn + (mSizeX * y + startX) * paddedSize(mVectorSize);
|
|
std::invoke(kernel, this, inPtr, sums, startX, endX);
|
|
}
|
|
}
|
|
|
|
void HistogramDotTask::kernelP1L4(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
|
|
for (uint32_t x = xstart; x < xend; x++) {
|
|
int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]) + (mDotI[3] * in[3]);
|
|
sums[(t + 0x7f) >> 8]++;
|
|
in += 4;
|
|
}
|
|
}
|
|
|
|
void HistogramDotTask::kernelP1L3(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
|
|
for (uint32_t x = xstart; x < xend; x++) {
|
|
int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]) + (mDotI[2] * in[2]);
|
|
sums[(t + 0x7f) >> 8]++;
|
|
in += 4;
|
|
}
|
|
}
|
|
|
|
void HistogramDotTask::kernelP1L2(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
|
|
for (uint32_t x = xstart; x < xend; x++) {
|
|
int t = (mDotI[0] * in[0]) + (mDotI[1] * in[1]);
|
|
sums[(t + 0x7f) >> 8]++;
|
|
in += 2;
|
|
}
|
|
}
|
|
|
|
void HistogramDotTask::kernelP1L1(const uchar* in, int* sums, uint32_t xstart, uint32_t xend) {
|
|
for (uint32_t x = xstart; x < xend; x++) {
|
|
int t = (mDotI[0] * in[0]);
|
|
sums[(t + 0x7f) >> 8]++;
|
|
in++;
|
|
}
|
|
}
|
|
|
|
void HistogramDotTask::collateSums(int* out) {
|
|
for (uint32_t ct = 0; ct < 256; ct++) {
|
|
out[ct] = mSums[ct];
|
|
for (uint32_t t = 1; t < mThreadCount; t++) {
|
|
out[ct] += mSums[ct + (256 * t)];
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
void RenderScriptToolkit::histogram(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
|
|
size_t vectorSize, const Restriction* restriction) {
|
|
#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
|
|
if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
|
|
return;
|
|
}
|
|
if (vectorSize < 1 || vectorSize > 4) {
|
|
ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
HistogramTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(), restriction);
|
|
processor->doTask(&task);
|
|
task.collateSums(out);
|
|
}
|
|
|
|
void RenderScriptToolkit::histogramDot(const uint8_t* in, int32_t* out, size_t sizeX, size_t sizeY,
|
|
size_t vectorSize, const float* coefficients,
|
|
const Restriction* restriction) {
|
|
#ifdef ANDROID_RENDERSCRIPT_TOOLKIT_VALIDATE
|
|
if (!validRestriction(LOG_TAG, sizeX, sizeY, restriction)) {
|
|
return;
|
|
}
|
|
if (vectorSize < 1 || vectorSize > 4) {
|
|
ALOGE("The vectorSize should be between 1 and 4. %zu provided.", vectorSize);
|
|
return;
|
|
}
|
|
if (coefficients != nullptr) {
|
|
float sum = 0.0f;
|
|
for (size_t i = 0; i < vectorSize; i++) {
|
|
if (coefficients[i] < 0.0f) {
|
|
ALOGE("histogramDot coefficients should not be negative. Coefficient %zu was %f.",
|
|
i, coefficients[i]);
|
|
return;
|
|
}
|
|
sum += coefficients[i];
|
|
}
|
|
if (sum > 1.0f) {
|
|
ALOGE("histogramDot coefficients should add to 1 or less. Their sum is %f.", sum);
|
|
return;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
HistogramDotTask task(in, sizeX, sizeY, vectorSize, processor->getNumberOfThreads(),
|
|
coefficients, restriction);
|
|
processor->doTask(&task);
|
|
task.collateSums(out);
|
|
}
|
|
|
|
} // namespace renderscript
|
|
} // namespace android
|