277 lines
12 KiB
C++
277 lines
12 KiB
C++
/*
|
|
* Copyright (C) 2021 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifdef NN_EXPERIMENTAL_FEATURE
|
|
|
|
#include "Densify.h"
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <numeric>
|
|
#include <vector>
|
|
|
|
#include "OperationResolver.h"
|
|
#include "OperationsExecutionUtils.h"
|
|
#include "OperationsValidationUtils.h"
|
|
#include "Tracing.h"
|
|
#include "nnapi/OperandTypes.h"
|
|
#include "nnapi/TypeUtils.h"
|
|
#include "nnapi/Validation.h"
|
|
|
|
#define LOG_TAG "Operations"
|
|
|
|
namespace android {
|
|
namespace nn {
|
|
namespace densify_op {
|
|
|
|
/**
|
|
* getFlattenedIndex:
|
|
* Gets the index of destData where indices points to. Uses shape and origRank
|
|
* for calculations.
|
|
*/
|
|
uint64_t getFlattenedIndex(const std::vector<int32_t>& indices, const std::vector<uint32_t>& shape,
|
|
const int origRank) {
|
|
uint64_t index = 0;
|
|
int subElems = 1;
|
|
// origRank = size of destDims
|
|
for (int i = origRank - 1; i >= 0; i--) {
|
|
index += uint64_t(indices[i] * subElems);
|
|
subElems *= shape[i];
|
|
}
|
|
return index;
|
|
}
|
|
|
|
/**
|
|
* populate (Recursive Function):
|
|
* Used to populate the destData with elements from srcData one value at a time.
|
|
* Inputs:
|
|
* * srcData = input data of non-zero values.
|
|
* * indices = used to determine the index in destData where we write srcData to. Uses block
|
|
* dimension.
|
|
* * level = used to keep track of recursion level. Each recursive instance exits when level == size
|
|
* of traversal order.
|
|
* * prevIdx = used to keep placement in array segments and srcData.
|
|
* * destData = dense output data. Input being written to.
|
|
* * destDims = shape of the output tensor. Used to calculate the flattened idx.
|
|
* * dimFormat = dimension format for each entry in traversal order. The format is either DENSE
|
|
* (dimFormat[i] == 0) or SPARSE_CSR (dimFormat[i] == 1). Format is significant to determine how
|
|
* recursive iterations will occur and what metadata is stored in dimMetadata.
|
|
* * traversalOrder = contains n+k elements. The first n elements are a permutation of the dense
|
|
* tensor shape. The last k elements are a permutation of the block dimensions. Used to determine
|
|
* order of traversal paths.
|
|
* * blockSize = dense size of blocks. The last k elements of dimensions.
|
|
* * blockMap = Used to determine how the block dimension maps to the original tensor dimension.
|
|
* * dimMetadata = metadata varies depending on dimFormat values. If format is DENSE,
|
|
* dimMetadata[i*2][0] is the total number of elements in the dense tensor on the ith traversal
|
|
* path, and recursive iterations are through a standard for loop from 0 to dimMetadata[i*2][0].
|
|
* If format is SPARSE_CSR, dimMetadata[i*2] is a vector of array segments and
|
|
* dimMetadata[i*2+1] is a vector of array indices. The next recursive iterations will be
|
|
* looping through the array segments vector (since array segments are the same as row pointers in
|
|
* CSR format, the ith entry should never be greater than the ith+1 entry) and modifying the input
|
|
* indices with elements from the array indices vector.
|
|
* * origRank = the size of destDims. Used for calculating flattened index of indices.
|
|
*/
|
|
template <typename T>
|
|
void populate(const T* srcData, std::vector<int32_t>* indices, uint32_t level, uint32_t prevIdx,
|
|
T* destData, const std::vector<uint32_t>& destDims,
|
|
const std::vector<int32_t>& dimFormat, const int32_t* traversalOrder,
|
|
const std::vector<int32_t>& blockSize, const int32_t* blockMap,
|
|
const std::vector<std::vector<int32_t>>& dimMetadata, const int origRank) {
|
|
if (level == (*indices).size()) { // level == size of traversal order
|
|
std::vector<int> origIdx(origRank);
|
|
size_t i = 0;
|
|
// Calculating origIdx using dense tensor dimensions
|
|
for (; i < origIdx.size(); i++) {
|
|
int origDim = traversalOrder[i];
|
|
origIdx[origDim] = (*indices)[i];
|
|
}
|
|
// Modifying origIdx using block dimensions
|
|
for (; i < (*indices).size(); i++) {
|
|
const int blockIdx = traversalOrder[i] - origRank;
|
|
const int origDim = blockMap[blockIdx];
|
|
origIdx[origDim] = origIdx[origDim] * blockSize[blockIdx] + (*indices)[i];
|
|
}
|
|
// Writing srcData to destData
|
|
destData[getFlattenedIndex(origIdx, destDims, origRank)] = srcData[prevIdx];
|
|
return;
|
|
}
|
|
const int metadataIdx = 2 * level;
|
|
if (dimFormat[level] == DENSE) { // DENSE dimension format
|
|
const int shapeOfLevel = dimMetadata[metadataIdx].front();
|
|
for (int i = 0; i < shapeOfLevel; i++) {
|
|
(*indices)[level] = i;
|
|
populate(srcData, indices, level + 1, prevIdx * shapeOfLevel + i, destData, destDims,
|
|
dimFormat, traversalOrder, blockSize, blockMap, dimMetadata, origRank);
|
|
}
|
|
} else { // SPARSE_CSR dimension format
|
|
const auto& arraySegments = dimMetadata[metadataIdx];
|
|
const auto& arrayIndices = dimMetadata[metadataIdx + 1];
|
|
for (int i = arraySegments[prevIdx]; i < arraySegments[prevIdx + 1]; i++) {
|
|
(*indices)[level] = arrayIndices[i];
|
|
populate(srcData, indices, level + 1, i, destData, destDims, dimFormat, traversalOrder,
|
|
blockSize, blockMap, dimMetadata, origRank);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* arrToVector:
|
|
* Converts a T array into an T vector.
|
|
*/
|
|
template <typename T>
|
|
std::vector<T> arrToVector(const T* arr, uint32_t size) {
|
|
return arr == nullptr ? std::vector<T>() : std::vector<T>(arr, arr + size);
|
|
}
|
|
|
|
template <typename T>
|
|
inline bool densify(IOperationExecutionContext* context) {
|
|
// Getting all inputs
|
|
std::vector<Shape> inputShapes;
|
|
const uint32_t inputCount = context->getNumInputs();
|
|
inputShapes.reserve(inputCount);
|
|
const T* srcData = context->getInputBuffer<T>(kInputTensor);
|
|
inputShapes.push_back(context->getInputShape(kInputTensor));
|
|
const int32_t* traversalOrder = context->getInputBuffer<int32_t>(kInputTravOrder);
|
|
inputShapes.push_back(context->getInputShape(kInputTravOrder));
|
|
const int32_t* blockMap = context->getInputBuffer<int32_t>(kInputBlockMap);
|
|
inputShapes.push_back(context->getInputShape(kInputBlockMap));
|
|
const int32_t* dimFormatPtr = context->getInputBuffer<int32_t>(kInputDimFormat);
|
|
inputShapes.push_back(context->getInputShape(kInputDimFormat));
|
|
const int32_t* dimensionsPtr = context->getInputBuffer<int32_t>(kInputDimensions);
|
|
inputShapes.push_back(context->getInputShape(kInputDimensions));
|
|
|
|
std::vector<const int32_t*> dimMetadataPtrs;
|
|
for (uint32_t i = kInputArrSeg; i < inputCount; i++) {
|
|
inputShapes.push_back(context->getInputShape(i));
|
|
const int32_t* metadata = context->getInputBuffer<int32_t>(i);
|
|
dimMetadataPtrs.push_back(metadata);
|
|
}
|
|
Shape destShape = context->getOutputShape(kOutputTensor);
|
|
|
|
// Organizing dimFormat, dimensions, dimMetadata into vectors
|
|
std::vector<int32_t> dimFormat(
|
|
inputShapes[kInputDimFormat].dimensions.front()); // size of dimFormatPtr
|
|
std::vector<int32_t> dimensions(dimFormat.size());
|
|
std::vector<std::vector<int32_t>> dimMetadata(2 * dimFormat.size());
|
|
for (size_t i = 0; i < dimFormat.size(); i++) {
|
|
dimFormat[i] = dimFormatPtr[i];
|
|
dimensions[i] = dimensionsPtr[i];
|
|
if (dimFormat[i] == 0) {
|
|
dimMetadata[i * 2] = {dimensions[i]};
|
|
} else {
|
|
dimMetadata[i * 2] = // array segments
|
|
arrToVector(dimMetadataPtrs[i * 2],
|
|
inputShapes[i * 2 + kInputArrSeg].dimensions.front());
|
|
dimMetadata[i * 2 + 1] = // array indices
|
|
arrToVector(dimMetadataPtrs[i * 2 + 1],
|
|
inputShapes[i * 2 + kInputArrIdx].dimensions.front());
|
|
}
|
|
}
|
|
|
|
// Creating blockSize vector
|
|
const int origRank = destShape.dimensions.size();
|
|
std::vector<int32_t> blockSize(
|
|
inputShapes[kInputBlockMap].dimensions.front()); // size of block map
|
|
for (uint32_t i = 0; i < inputShapes[kInputBlockMap].dimensions.front(); i++) {
|
|
const int32_t origDim = traversalOrder[origRank + i];
|
|
blockSize[i] = dimensions[origDim];
|
|
}
|
|
|
|
// Calculating the number of output entries
|
|
const size_t denseTotal =
|
|
std::accumulate(destShape.dimensions.begin(), destShape.dimensions.end(),
|
|
static_cast<size_t>(1), std::multiplies<>{});
|
|
T zeroPoint = T();
|
|
if (const OperandType type = inputShapes.front().type;
|
|
type == OperandType::TENSOR_QUANT8_ASYMM ||
|
|
type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED ||
|
|
type == OperandType::TENSOR_QUANT16_ASYMM) {
|
|
zeroPoint = static_cast<T>(inputShapes.front().offset);
|
|
}
|
|
|
|
T* destData = context->getOutputBuffer<T>(kOutputTensor);
|
|
for (size_t i = 0; i < denseTotal; i++) {
|
|
destData[i] = zeroPoint;
|
|
}
|
|
|
|
std::vector<int32_t> indices(
|
|
inputShapes[kInputTravOrder].dimensions.front()); // size of traversal order
|
|
populate(srcData, &indices, 0u, 0u, destData, destShape.dimensions, dimFormat, traversalOrder,
|
|
blockSize, blockMap, dimMetadata, origRank);
|
|
return true;
|
|
}
|
|
|
|
bool prepare(IOperationExecutionContext* context) {
|
|
// Setting OutputShape
|
|
Shape destShape = context->getInputShape(kInputTensor);
|
|
|
|
const int32_t* traversalOrder = context->getInputBuffer<int32_t>(kInputTravOrder);
|
|
const int32_t* blockMap = context->getInputBuffer<int32_t>(kInputBlockMap);
|
|
const int32_t* dimensions = context->getInputBuffer<int32_t>(kInputDimensions);
|
|
Shape dimensionsShape = context->getInputShape(kInputDimensions);
|
|
Shape blockMapShape = context->getInputShape(kInputBlockMap);
|
|
const uint32_t origRank = dimensionsShape.dimensions.front() - blockMapShape.dimensions.front();
|
|
std::vector<uint32_t> destDims(origRank);
|
|
|
|
size_t i = 0;
|
|
for (; i < destDims.size(); i++) {
|
|
const int32_t origDim = traversalOrder[i];
|
|
destDims[origDim] = dimensions[i];
|
|
}
|
|
for (; i < dimensionsShape.dimensions.front(); i++) {
|
|
const int32_t traversalIdx = traversalOrder[i] - origRank;
|
|
const int32_t origDim = blockMap[traversalIdx];
|
|
destDims[origDim] *= dimensions[i];
|
|
}
|
|
destShape.dimensions = destDims;
|
|
return context->setOutputShape(kOutputTensor, destShape);
|
|
}
|
|
|
|
bool execute(IOperationExecutionContext* context) {
|
|
switch (context->getInputType(kInputTensor)) {
|
|
case OperandType::TENSOR_BOOL8:
|
|
return densify<bool8>(context);
|
|
case OperandType::TENSOR_FLOAT32:
|
|
return densify<float>(context);
|
|
case OperandType::TENSOR_FLOAT16:
|
|
return densify<_Float16>(context);
|
|
case OperandType::TENSOR_INT32:
|
|
return densify<int32_t>(context);
|
|
case OperandType::TENSOR_QUANT8_ASYMM:
|
|
return densify<uint8_t>(context);
|
|
case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
|
|
case OperandType::TENSOR_QUANT8_SYMM:
|
|
return densify<int8_t>(context);
|
|
case OperandType::TENSOR_QUANT16_SYMM:
|
|
return densify<int16_t>(context);
|
|
case OperandType::TENSOR_QUANT16_ASYMM:
|
|
return densify<uint16_t>(context);
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
} // namespace densify_op
|
|
|
|
NN_REGISTER_OPERATION_DEFAULT_VALIDATION(DENSIFY, densify_op::prepare, densify_op::execute,
|
|
.allowOmittedOperand = true);
|
|
|
|
} // namespace nn
|
|
} // namespace android
|
|
|
|
#endif // NN_EXPERIMENTAL_FEATURE
|