391 lines
18 KiB
C++
391 lines
18 KiB
C++
/*
|
|
* Copyright (C) 2017 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#define LOG_TAG "Operations"
|
|
|
|
#include "Pooling.h"
|
|
|
|
#include <vector>
|
|
|
|
#include "OperationResolver.h"
|
|
#include "Tracing.h"
|
|
#include "nnapi/Validation.h"
|
|
|
|
#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
|
|
#pragma clang diagnostic push
|
|
#pragma clang diagnostic ignored "-Wunused-parameter"
|
|
#pragma clang diagnostic ignored "-Wsign-compare"
|
|
#pragma clang diagnostic ignored "-Winvalid-partial-specialization"
|
|
#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
|
|
#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
|
|
#pragma clang diagnostic pop
|
|
|
|
#include "CpuOperationUtils.h"
|
|
#endif // NN_INCLUDE_CPU_IMPLEMENTATION
|
|
|
|
namespace android {
|
|
namespace nn {
|
|
|
|
namespace pooling {
|
|
|
|
#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
|
|
namespace {
|
|
|
|
struct PoolingParam {
|
|
int32_t padding_left, padding_right;
|
|
int32_t padding_top, padding_bottom;
|
|
int32_t stride_width, stride_height;
|
|
int32_t filter_width, filter_height;
|
|
int32_t activation;
|
|
bool useNchw = false;
|
|
|
|
bool initialize(const IOperationExecutionContext* context) {
|
|
uint32_t inCount = context->getNumInputs();
|
|
int32_t padding_implicit = 0;
|
|
if (inCount >= 10) {
|
|
padding_left = context->getInputValue<int32_t>(1);
|
|
padding_right = context->getInputValue<int32_t>(2);
|
|
padding_top = context->getInputValue<int32_t>(3);
|
|
padding_bottom = context->getInputValue<int32_t>(4);
|
|
stride_width = context->getInputValue<int32_t>(5);
|
|
stride_height = context->getInputValue<int32_t>(6);
|
|
filter_width = context->getInputValue<int32_t>(7);
|
|
filter_height = context->getInputValue<int32_t>(8);
|
|
activation = context->getInputValue<int32_t>(9);
|
|
if (inCount == 11) {
|
|
useNchw = context->getInputValue<bool>(10);
|
|
}
|
|
} else {
|
|
padding_implicit = context->getInputValue<int32_t>(1);
|
|
stride_width = context->getInputValue<int32_t>(2);
|
|
stride_height = context->getInputValue<int32_t>(3);
|
|
filter_width = context->getInputValue<int32_t>(4);
|
|
filter_height = context->getInputValue<int32_t>(5);
|
|
activation = context->getInputValue<int32_t>(6);
|
|
if (inCount == 8) {
|
|
useNchw = context->getInputValue<bool>(7);
|
|
}
|
|
}
|
|
if (inCount <= 8) {
|
|
Shape inputShape = context->getInputShape(kInputTensor);
|
|
int32_t input_height = getSizeOfDimension(inputShape, useNchw ? 2 : 1);
|
|
int32_t input_width = getSizeOfDimension(inputShape, useNchw ? 3 : 2);
|
|
calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
|
|
&padding_left, &padding_right);
|
|
calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
|
|
&padding_top, &padding_bottom);
|
|
}
|
|
NN_RET_CHECK_GE(padding_left, 0);
|
|
NN_RET_CHECK_GE(padding_right, 0);
|
|
NN_RET_CHECK_GE(padding_top, 0);
|
|
NN_RET_CHECK_GE(padding_bottom, 0);
|
|
NN_RET_CHECK_GT(stride_width, 0);
|
|
NN_RET_CHECK_GT(stride_height, 0);
|
|
NN_RET_CHECK_GT(filter_width, 0);
|
|
NN_RET_CHECK_GT(filter_height, 0);
|
|
NN_RET_CHECK_GE(activation, 0);
|
|
NN_RET_CHECK_GT(filter_width, padding_left);
|
|
NN_RET_CHECK_GT(filter_width, padding_right);
|
|
NN_RET_CHECK_GT(filter_height, padding_top);
|
|
NN_RET_CHECK_GT(filter_height, padding_bottom);
|
|
return true;
|
|
}
|
|
|
|
tflite::PoolParams toTfliteParam(const Shape& output) const {
|
|
tflite::PoolParams params = {
|
|
.padding_values = {.width = static_cast<int16_t>(padding_left),
|
|
.height = static_cast<int16_t>(padding_top),
|
|
.width_offset = 0,
|
|
.height_offset = 0},
|
|
.stride_height = stride_height,
|
|
.stride_width = stride_width,
|
|
.filter_height = filter_height,
|
|
.filter_width = filter_width,
|
|
};
|
|
if (output.type == OperandType::TENSOR_QUANT8_ASYMM) {
|
|
int32_t output_activation_min = 0;
|
|
int32_t output_activation_max = 0;
|
|
CalculateActivationRangeUint8(activation, output, &output_activation_min,
|
|
&output_activation_max);
|
|
params.quantized_activation_min = output_activation_min;
|
|
params.quantized_activation_max = output_activation_max;
|
|
} else if (output.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
|
|
int32_t output_activation_min = 0;
|
|
int32_t output_activation_max = 0;
|
|
CalculateActivationRangeInt8(activation, output, &output_activation_min,
|
|
&output_activation_max);
|
|
params.quantized_activation_min = output_activation_min;
|
|
params.quantized_activation_max = output_activation_max;
|
|
} else {
|
|
float output_activation_min, output_activation_max;
|
|
CalculateActivationRangeFloat(activation, &output_activation_min,
|
|
&output_activation_max);
|
|
params.float_activation_min = output_activation_min;
|
|
params.float_activation_max = output_activation_max;
|
|
}
|
|
return params;
|
|
}
|
|
};
|
|
|
|
bool averagePoolNhwc(const float* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
float* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("averagePoolFloat32");
|
|
auto op_params = param.toTfliteParam(outputShape);
|
|
NNTRACE_COMP_SWITCH("optimized_ops::AveragePool");
|
|
tflite::optimized_ops::AveragePool(op_params, convertShapeToTflshape(inputShape), inputData,
|
|
convertShapeToTflshape(outputShape), outputData);
|
|
return true;
|
|
}
|
|
|
|
bool averagePoolNhwc(const _Float16* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
_Float16* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("averagePoolFloat16");
|
|
std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
|
|
std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
|
|
|
|
convertFloat16ToFloat32(inputData, &inputDataFloat32);
|
|
averagePoolNhwc(inputDataFloat32.data(), inputShape, param, outputDataFloat32.data(),
|
|
outputShape);
|
|
convertFloat32ToFloat16(outputDataFloat32, outputData);
|
|
return true;
|
|
}
|
|
|
|
bool averagePoolNhwc(const uint8_t* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
uint8_t* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("averagePoolQuant8");
|
|
auto op_params = param.toTfliteParam(outputShape);
|
|
NNTRACE_COMP_SWITCH("optimized_ops::AveragePool");
|
|
tflite::optimized_ops::AveragePool(op_params, convertShapeToTflshape(inputShape), inputData,
|
|
convertShapeToTflshape(outputShape), outputData);
|
|
return true;
|
|
}
|
|
|
|
bool averagePoolNhwc(const int8_t* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
int8_t* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("averagePoolQuant8Signed");
|
|
auto op_params = param.toTfliteParam(outputShape);
|
|
NNTRACE_COMP_SWITCH("optimized_integer_ops::AveragePool");
|
|
// We are using reference implementation of the AveragePool op because the
|
|
// optimized version fails to pass some of the quantization coupling tests.
|
|
tflite::reference_integer_ops::AveragePool(op_params, convertShapeToTflshape(inputShape),
|
|
inputData, convertShapeToTflshape(outputShape),
|
|
outputData);
|
|
return true;
|
|
}
|
|
|
|
bool l2PoolNhwc(const float* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
float* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("l2PoolFloat32");
|
|
auto op_params = param.toTfliteParam(outputShape);
|
|
NNTRACE_COMP_SWITCH("optimized_ops::L2Pool");
|
|
tflite::optimized_ops::L2Pool(op_params, convertShapeToTflshape(inputShape), inputData,
|
|
convertShapeToTflshape(outputShape), outputData);
|
|
return true;
|
|
}
|
|
|
|
bool l2PoolNhwc(const _Float16* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
_Float16* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("l2PoolFloat16");
|
|
std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
|
|
std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
|
|
|
|
convertFloat16ToFloat32(inputData, &inputDataFloat32);
|
|
l2PoolNhwc(inputDataFloat32.data(), inputShape, param, outputDataFloat32.data(), outputShape);
|
|
convertFloat32ToFloat16(outputDataFloat32, outputData);
|
|
return true;
|
|
}
|
|
|
|
bool maxPoolNhwc(const float* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
float* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("maxPoolFloat32");
|
|
auto op_params = param.toTfliteParam(outputShape);
|
|
NNTRACE_COMP_SWITCH("optimized_ops::MaxPool");
|
|
tflite::optimized_ops::MaxPool(op_params, convertShapeToTflshape(inputShape), inputData,
|
|
convertShapeToTflshape(outputShape), outputData);
|
|
return true;
|
|
}
|
|
|
|
bool maxPoolNhwc(const uint8_t* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
uint8_t* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("maxPoolQuant8");
|
|
auto op_params = param.toTfliteParam(outputShape);
|
|
NNTRACE_COMP_SWITCH("optimized_ops::MaxPool");
|
|
tflite::optimized_ops::MaxPool(op_params, convertShapeToTflshape(inputShape), inputData,
|
|
convertShapeToTflshape(outputShape), outputData);
|
|
return true;
|
|
}
|
|
|
|
bool maxPoolNhwc(const int8_t* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
int8_t* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("maxPoolQuant8Signed");
|
|
auto op_params = param.toTfliteParam(outputShape);
|
|
NNTRACE_COMP_SWITCH("optimized_integer_ops::MaxPool");
|
|
// We are using reference implementation of the MaxPool op because the
|
|
// optimized version fails to pass some of the quantization coupling tests.
|
|
tflite::reference_integer_ops::MaxPool(op_params, convertShapeToTflshape(inputShape), inputData,
|
|
convertShapeToTflshape(outputShape), outputData);
|
|
return true;
|
|
}
|
|
|
|
bool maxPoolNhwc(const _Float16* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
_Float16* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("maxPoolFloat16");
|
|
std::vector<float> inputData_float32(getNumberOfElements(inputShape));
|
|
std::vector<float> outputData_float32(getNumberOfElements(outputShape));
|
|
|
|
convertFloat16ToFloat32(inputData, &inputData_float32);
|
|
maxPoolNhwc(inputData_float32.data(), inputShape, param, outputData_float32.data(),
|
|
outputShape);
|
|
convertFloat32ToFloat16(outputData_float32, outputData);
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
bool averagePool(const T* inputData, const Shape& inputShape, const PoolingParam& param,
|
|
T* outputData, const Shape& outputShape) {
|
|
InputWithLayout<T> input(param.useNchw);
|
|
OutputWithLayout<T> output(param.useNchw);
|
|
NN_RET_CHECK(input.initialize(inputData, inputShape));
|
|
NN_RET_CHECK(output.initialize(outputData, outputShape));
|
|
NN_RET_CHECK(averagePoolNhwc(input.getNhwcBuffer(), input.getNhwcShape(), param,
|
|
output.getNhwcBuffer(), output.getNhwcShape()));
|
|
NN_RET_CHECK(output.commit());
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
bool l2Pool(const T* inputData, const Shape& inputShape, const PoolingParam& param, T* outputData,
|
|
const Shape& outputShape) {
|
|
InputWithLayout<T> input(param.useNchw);
|
|
OutputWithLayout<T> output(param.useNchw);
|
|
NN_RET_CHECK(input.initialize(inputData, inputShape));
|
|
NN_RET_CHECK(output.initialize(outputData, outputShape));
|
|
NN_RET_CHECK(l2PoolNhwc(input.getNhwcBuffer(), input.getNhwcShape(), param,
|
|
output.getNhwcBuffer(), output.getNhwcShape()));
|
|
NN_RET_CHECK(output.commit());
|
|
return true;
|
|
}
|
|
|
|
template <typename T>
|
|
bool maxPool(const T* inputData, const Shape& inputShape, const PoolingParam& param, T* outputData,
|
|
const Shape& outputShape) {
|
|
InputWithLayout<T> input(param.useNchw);
|
|
OutputWithLayout<T> output(param.useNchw);
|
|
NN_RET_CHECK(input.initialize(inputData, inputShape));
|
|
NN_RET_CHECK(output.initialize(outputData, outputShape));
|
|
NN_RET_CHECK(maxPoolNhwc(input.getNhwcBuffer(), input.getNhwcShape(), param,
|
|
output.getNhwcBuffer(), output.getNhwcShape()));
|
|
NN_RET_CHECK(output.commit());
|
|
return true;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
bool prepare(IOperationExecutionContext* context) {
|
|
Shape input = context->getInputShape(kInputTensor);
|
|
NN_RET_CHECK_EQ(getNumberOfDimensions(input), 4u);
|
|
|
|
PoolingParam param;
|
|
NN_RET_CHECK(param.initialize(context));
|
|
|
|
// Only batches can be zero.
|
|
uint32_t batches = getSizeOfDimension(input, 0);
|
|
uint32_t height = getSizeOfDimension(input, param.useNchw ? 2 : 1);
|
|
uint32_t width = getSizeOfDimension(input, param.useNchw ? 3 : 2);
|
|
uint32_t channels = getSizeOfDimension(input, param.useNchw ? 1 : 3);
|
|
NN_RET_CHECK_GT(height, 0u);
|
|
NN_RET_CHECK_GT(width, 0u);
|
|
NN_RET_CHECK_GT(channels, 0u);
|
|
|
|
uint32_t outWidth = computeOutSize(width, param.filter_width, param.stride_width,
|
|
param.padding_left, param.padding_right);
|
|
uint32_t outHeight = computeOutSize(height, param.filter_height, param.stride_height,
|
|
param.padding_top, param.padding_bottom);
|
|
|
|
Shape output = input;
|
|
if (param.useNchw) {
|
|
output.dimensions = {batches, channels, outHeight, outWidth};
|
|
} else {
|
|
output.dimensions = {batches, outHeight, outWidth, channels};
|
|
}
|
|
return context->setOutputShape(kOutputTensor, output);
|
|
}
|
|
|
|
#define POOLING_DISPATCH_INPUT_TYPE(name, type, cppType) \
|
|
case OperandType::type: \
|
|
return name(context->getInputBuffer<cppType>(kInputTensor), \
|
|
context->getInputShape(kInputTensor), param, \
|
|
context->getOutputBuffer<cppType>(kOutputTensor), \
|
|
context->getOutputShape(kOutputTensor))
|
|
|
|
bool executeAveragePool(IOperationExecutionContext* context) {
|
|
// Bypass execution in the case of zero-sized input.
|
|
if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
|
|
PoolingParam param;
|
|
NN_RET_CHECK(param.initialize(context));
|
|
switch (context->getInputType(kInputTensor)) {
|
|
POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_FLOAT32, float);
|
|
POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_FLOAT16, _Float16);
|
|
POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_QUANT8_ASYMM, uint8_t);
|
|
POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_QUANT8_ASYMM_SIGNED, int8_t);
|
|
default:
|
|
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation AVERAGE_POOL_2D";
|
|
}
|
|
}
|
|
|
|
bool executeL2Pool(IOperationExecutionContext* context) {
|
|
// Bypass execution in the case of zero-sized input.
|
|
if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
|
|
PoolingParam param;
|
|
NN_RET_CHECK(param.initialize(context));
|
|
switch (context->getInputType(kInputTensor)) {
|
|
POOLING_DISPATCH_INPUT_TYPE(l2Pool, TENSOR_FLOAT32, float);
|
|
POOLING_DISPATCH_INPUT_TYPE(l2Pool, TENSOR_FLOAT16, _Float16);
|
|
default:
|
|
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation L2_POOL_2D";
|
|
}
|
|
}
|
|
|
|
bool executeMaxPool(IOperationExecutionContext* context) {
|
|
// Bypass execution in the case of zero-sized input.
|
|
if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
|
|
PoolingParam param;
|
|
NN_RET_CHECK(param.initialize(context));
|
|
switch (context->getInputType(kInputTensor)) {
|
|
POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_FLOAT32, float);
|
|
POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_FLOAT16, _Float16);
|
|
POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_QUANT8_ASYMM, uint8_t);
|
|
POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_QUANT8_ASYMM_SIGNED, int8_t);
|
|
default:
|
|
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation MAX_POOL_2D";
|
|
}
|
|
}
|
|
#endif // NN_INCLUDE_CPU_IMPLEMENTATION
|
|
|
|
#undef POOLING_DISPATCH_INPUT_TYPE
|
|
|
|
} // namespace pooling
|
|
|
|
NN_REGISTER_OPERATION_DEFAULT_VALIDATION(AVERAGE_POOL_2D, pooling::prepare,
|
|
pooling::executeAveragePool, .allowZeroSizedInput = true);
|
|
NN_REGISTER_OPERATION_DEFAULT_VALIDATION(L2_POOL_2D, pooling::prepare, pooling::executeL2Pool,
|
|
.allowZeroSizedInput = true);
|
|
NN_REGISTER_OPERATION_DEFAULT_VALIDATION(MAX_POOL_2D, pooling::prepare, pooling::executeMaxPool,
|
|
.allowZeroSizedInput = true);
|
|
|
|
} // namespace nn
|
|
} // namespace android
|