1375 lines
		
	
	
		
			62 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			1375 lines
		
	
	
		
			62 KiB
		
	
	
	
		
			C++
		
	
	
	
| /*
 | |
|  * Copyright (C) 2017 The Android Open Source Project
 | |
|  *
 | |
|  * Licensed under the Apache License, Version 2.0 (the "License");
 | |
|  * you may not use this file except in compliance with the License.
 | |
|  * You may obtain a copy of the License at
 | |
|  *
 | |
|  *      http://www.apache.org/licenses/LICENSE-2.0
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an "AS IS" BASIS,
 | |
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|  * See the License for the specific language governing permissions and
 | |
|  * limitations under the License.
 | |
|  */
 | |
| 
 | |
| #define LOG_TAG "Manager"
 | |
| 
 | |
| #include "Manager.h"
 | |
| 
 | |
| #include <CpuExecutor.h>
 | |
| #include <LegacyUtils.h>
 | |
| #include <MetaModel.h>
 | |
| #include <Tracing.h>
 | |
| #include <android-base/properties.h>
 | |
| #include <nnapi/IBurst.h>
 | |
| #include <nnapi/IDevice.h>
 | |
| #include <nnapi/IExecution.h>
 | |
| #include <nnapi/IPreparedModel.h>
 | |
| #include <nnapi/SharedMemory.h>
 | |
| #include <nnapi/TypeUtils.h>
 | |
| #include <nnapi/Types.h>
 | |
| #include <nnapi/Validation.h>
 | |
| 
 | |
| #include <algorithm>
 | |
| #include <functional>
 | |
| #include <iterator>
 | |
| #include <map>
 | |
| #include <memory>
 | |
| #include <regex>
 | |
| #include <set>
 | |
| #include <string>
 | |
| #include <tuple>
 | |
| #include <utility>
 | |
| #include <vector>
 | |
| 
 | |
| #include "ExecutionCallback.h"
 | |
| #include "Memory.h"
 | |
| #include "ModelArgumentInfo.h"
 | |
| #include "ServerFlag.h"
 | |
| #include "TypeManager.h"
 | |
| 
 | |
| #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
 | |
| #include <build/version.h>
 | |
| #include <cutils/native_handle.h>
 | |
| #include <nnapi/hal/1.3/Buffer.h>
 | |
| #include <nnapi/hal/Service.h>
 | |
| #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
 | |
| 
 | |
| #ifdef NN_EXPERIMENTAL_FEATURE
 | |
| #include "NeuralNetworksExperimentalFeatures.h"
 | |
| #endif  // NN_EXPERIMENTAL_FEATURE
 | |
| 
 | |
| namespace android {
 | |
| namespace nn {
 | |
| namespace {
 | |
| 
 | |
| Version getRuntimeFeatureLevelVersionHelper() {
 | |
| #if defined(NN_EXPERIMENTAL_FEATURE) && defined(NN_COMPATIBILITY_LIBRARY_BUILD)
 | |
| #error "NN_EXPERIMENTAL_FEATURE is not supported when NN_COMPATIBILITY_LIBRARY_BUILD is defined"
 | |
| #elif defined(NN_EXPERIMENTAL_FEATURE)
 | |
|     auto version = kVersionFeatureLevelExperimental;
 | |
|     // Enable "runtimeOnlyFeatures" to indicate that the runtime feature level version supports
 | |
|     // features that are only available in the runtime.
 | |
|     version.runtimeOnlyFeatures = true;
 | |
| #elif defined(NN_COMPATIBILITY_LIBRARY_BUILD)
 | |
|     auto version = serverFeatureLevelToVersion(kMaxFeatureLevelNum);
 | |
| #else   // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
 | |
|     auto version = serverFeatureLevelToVersion(getServerFeatureLevelFlag());
 | |
|     // Enable "runtimeOnlyFeatures" to indicate that the runtime feature level version supports
 | |
|     // features that are only available in the runtime.
 | |
|     version.runtimeOnlyFeatures = true;
 | |
| #endif  // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
 | |
|     return version;
 | |
| }
 | |
| 
 | |
| Version getRuntimeFeatureLevelVersion() {
 | |
|     static const Version version = getRuntimeFeatureLevelVersionHelper();
 | |
|     return version;
 | |
| }
 | |
| 
 | |
| bool getWhetherPlatformTelemetryIsEnabled() {
 | |
| #if !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
 | |
|     return getServerTelemetryEnableFlag();
 | |
| #else   // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
 | |
|     return false;
 | |
| #endif  // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
 | |
| }
 | |
| 
 | |
| }  // namespace
 | |
| 
 | |
| // A Device with actual underlying driver
 | |
| class DriverDevice : public Device {
 | |
|    public:
 | |
|     // Create a DriverDevice from a name and a DeviceFactory function.
 | |
|     // Returns nullptr on failure.
 | |
|     static std::shared_ptr<DriverDevice> create(SharedDevice device);
 | |
| 
 | |
|     // Prefer using DriverDevice::create
 | |
|     explicit DriverDevice(SharedDevice device);
 | |
| 
 | |
|     const std::string& getName() const override { return kInterface->getName(); }
 | |
|     const std::string& getVersionString() const override { return kInterface->getVersionString(); }
 | |
|     Version getFeatureLevel() const override { return kInterface->getFeatureLevel(); }
 | |
|     int32_t getType() const override { return static_cast<int32_t>(kInterface->getType()); }
 | |
|     const std::vector<Extension>& getSupportedExtensions() const override {
 | |
|         return kInterface->getSupportedExtensions();
 | |
|     }
 | |
|     std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const override;
 | |
|     const Capabilities& getCapabilities() const override { return kInterface->getCapabilities(); }
 | |
|     Capabilities::PerformanceInfo getPerformance(OperandType type) const override {
 | |
|         return getCapabilities().operandPerformance.lookup(type);
 | |
|     }
 | |
|     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
 | |
|         return getCapabilities().relaxedFloat32toFloat16PerformanceScalar;
 | |
|     }
 | |
|     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
 | |
|         return getCapabilities().relaxedFloat32toFloat16PerformanceTensor;
 | |
|     }
 | |
|     Capabilities::PerformanceInfo getIfPerformance() const override {
 | |
|         return getCapabilities().ifPerformance;
 | |
|     }
 | |
|     Capabilities::PerformanceInfo getWhilePerformance() const override {
 | |
|         return getCapabilities().whilePerformance;
 | |
|     }
 | |
|     std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const override {
 | |
|         return kInterface->getNumberOfCacheFilesNeeded();
 | |
|     }
 | |
|     bool isCachingSupported() const override {
 | |
|         // Caching is supported if either of numModelCache or numDataCache is greater than 0.
 | |
|         const auto [numModelCacheFiles, numDataCacheFiles] = getNumberOfCacheFilesNeeded();
 | |
|         return numModelCacheFiles > 0 || numDataCacheFiles > 0;
 | |
|     }
 | |
|     int wait() const override {
 | |
|         auto result = kInterface->wait();
 | |
|         if (!result.ok()) {
 | |
|             LOG(ERROR) << "DriverDevice::wait error: " << result.error().message;
 | |
|             return convertErrorStatusToResultCode(result.error().code);
 | |
|         }
 | |
|         return ANEURALNETWORKS_NO_ERROR;
 | |
|     }
 | |
| 
 | |
|     std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
 | |
|             const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
 | |
|             const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
 | |
|             const std::optional<CacheToken>& maybeToken,
 | |
|             const std::vector<TokenValuePair>& metaData,
 | |
|             const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const override;
 | |
| 
 | |
|     std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
 | |
|                                                             OperandType) const override;
 | |
| 
 | |
|    private:
 | |
|     const SharedDevice kInterface;
 | |
| 
 | |
|     GeneralResult<std::vector<bool>> getSupportedOperationsImpl(const MetaModel& metaModel) const;
 | |
|     GeneralResult<SharedPreparedModel> prepareModelFromCacheInternal(
 | |
|             const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
 | |
|             const CacheToken& token) const;
 | |
| 
 | |
| #ifdef NN_DEBUGGABLE
 | |
|     // For debugging: behavior of IDevice::getSupportedOperations for SampleDriver.
 | |
|     // 0 - all operations reported by IDevice::getSupportedOperations() supported
 | |
|     // 1 - some operations reported by IDevice::getSupportedOperations() supported
 | |
|     uint32_t mSupported = 0;
 | |
| #endif  // NN_DEBUGGABLE
 | |
| };
 | |
| 
 | |
| // A RuntimePreparedModel with underlying IPreparedModel instance return by actual driver.
 | |
| class DriverPreparedModel : public RuntimePreparedModel {
 | |
|    public:
 | |
|     DriverPreparedModel(const Device* device, const SharedPreparedModel& preparedModel)
 | |
|         : mDevice(device), mPreparedModel(preparedModel) {
 | |
|         CHECK(mDevice != nullptr);
 | |
|         CHECK(mPreparedModel != nullptr);
 | |
|     }
 | |
| 
 | |
|     const Device* getDevice() const override { return mDevice; }
 | |
|     SharedPreparedModel getInterface() const override { return mPreparedModel; }
 | |
| 
 | |
|     std::tuple<int, std::vector<OutputShape>, Timing> execute(
 | |
|             const std::vector<ModelArgumentInfo>& inputs,
 | |
|             const std::vector<ModelArgumentInfo>& outputs,
 | |
|             const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
 | |
|             MeasureTiming measure, const OptionalTimePoint& deadline,
 | |
|             const OptionalDuration& loopTimeoutDuration,
 | |
|             const std::vector<TokenValuePair>& metaData) const override;
 | |
| 
 | |
|     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
 | |
|             const std::vector<ModelArgumentInfo>& inputs,
 | |
|             const std::vector<ModelArgumentInfo>& outputs,
 | |
|             const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
 | |
|             MeasureTiming measure, const OptionalTimePoint& deadline,
 | |
|             const OptionalDuration& loopTimeoutDuration,
 | |
|             const OptionalDuration& timeoutDurationAfterFence,
 | |
|             const std::vector<TokenValuePair>& metaData) const override;
 | |
| 
 | |
|     std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
 | |
|             const std::vector<ModelArgumentInfo>& inputs,
 | |
|             const std::vector<ModelArgumentInfo>& outputs,
 | |
|             const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
 | |
|             const OptionalDuration& loopTimeoutDuration,
 | |
|             const std::vector<TokenValuePair>& metaData) const override;
 | |
| 
 | |
|     GeneralResult<SharedBurst> configureExecutionBurst() const override {
 | |
|         return mPreparedModel->configureExecutionBurst();
 | |
|     }
 | |
| 
 | |
|     MemoryPreference getMemoryPreference() const override {
 | |
|         if (isCompliantVersion(kVersionFeatureLevel5, mDevice->getFeatureLevel())) {
 | |
|             return {kDefaultRequestMemoryAlignment, kDefaultRequestMemoryPadding};
 | |
|         } else {
 | |
|             // We are not able to pass memory padding information to HIDL drivers, so return the
 | |
|             // minimum padding.
 | |
|             return {kDefaultRequestMemoryAlignment, kMinMemoryPadding};
 | |
|         }
 | |
|     }
 | |
| 
 | |
|    private:
 | |
|     const Device* mDevice;
 | |
|     const SharedPreparedModel mPreparedModel;
 | |
| };
 | |
| 
 | |
| class DriverExecution : public RuntimeExecution {
 | |
|    public:
 | |
|     DriverExecution(SharedExecution execution, Request request,
 | |
|                     std::vector<const RuntimeMemory*> memories, MeasureTiming measure,
 | |
|                     OptionalDuration loopTimeoutDuration, Version deviceFeatureLevel,
 | |
|                     const std::vector<TokenValuePair>& metaData)
 | |
|         : kExecution(std::move(execution)),
 | |
|           kRequest(std::move(request)),
 | |
|           kMemories(std::move(memories)),
 | |
|           kMeasure(measure),
 | |
|           kLoopTimeoutDuration(std::move(loopTimeoutDuration)),
 | |
|           kDeviceFeatureLevel(deviceFeatureLevel),
 | |
|           kMetaData(metaData) {
 | |
|         CHECK(kExecution != nullptr);
 | |
|     }
 | |
| 
 | |
|     std::tuple<int, std::vector<OutputShape>, Timing> compute(
 | |
|             const SharedBurst& burstController, const OptionalTimePoint& deadline) const override;
 | |
| 
 | |
|     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
 | |
|             const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
 | |
|             const OptionalDuration& timeoutDurationAfterFence) const override;
 | |
| 
 | |
|    private:
 | |
|     const SharedExecution kExecution;
 | |
| 
 | |
|     // For burst execution.
 | |
|     const Request kRequest;
 | |
|     const std::vector<const RuntimeMemory*> kMemories;
 | |
|     const MeasureTiming kMeasure;
 | |
|     const OptionalDuration kLoopTimeoutDuration;
 | |
|     mutable std::map<const IBurst*, SharedExecution> mCachedBurstExecutions;
 | |
| 
 | |
|     // For fenced execution.
 | |
|     const Version kDeviceFeatureLevel;
 | |
| 
 | |
|     // Execution metadata.
 | |
|     std::vector<TokenValuePair> kMetaData;
 | |
| };
 | |
| 
 | |
| DriverDevice::DriverDevice(SharedDevice device) : kInterface(std::move(device)) {
 | |
|     CHECK(kInterface != nullptr);
 | |
| #ifdef NN_DEBUGGABLE
 | |
|     static const char samplePrefix[] = "sample";
 | |
|     if (getName().substr(0, sizeof(samplePrefix) - 1) == samplePrefix) {
 | |
|         mSupported = getProp("debug.nn.sample.supported");
 | |
|     }
 | |
| #endif  // NN_DEBUGGABLE
 | |
| }
 | |
| 
 | |
| std::shared_ptr<DriverDevice> DriverDevice::create(SharedDevice device) {
 | |
|     if (device == nullptr) {
 | |
|         LOG(ERROR) << "DriverDevice::create called with nullptr";
 | |
|         return nullptr;
 | |
|     }
 | |
| 
 | |
|     return std::make_shared<DriverDevice>(std::move(device));
 | |
| }
 | |
| 
 | |
| int64_t DeviceManager::versionToFeatureLevel(Version::Level versionLevel) {
 | |
|     switch (versionLevel) {
 | |
|         case Version::Level::FEATURE_LEVEL_1:
 | |
|             return ANEURALNETWORKS_FEATURE_LEVEL_1;
 | |
|         case Version::Level::FEATURE_LEVEL_2:
 | |
|             return ANEURALNETWORKS_FEATURE_LEVEL_2;
 | |
|         case Version::Level::FEATURE_LEVEL_3:
 | |
|             return ANEURALNETWORKS_FEATURE_LEVEL_3;
 | |
|         case Version::Level::FEATURE_LEVEL_4:
 | |
|             return ANEURALNETWORKS_FEATURE_LEVEL_4;
 | |
|         case Version::Level::FEATURE_LEVEL_5:
 | |
|             return ANEURALNETWORKS_FEATURE_LEVEL_5;
 | |
|         case Version::Level::FEATURE_LEVEL_6:
 | |
|             return ANEURALNETWORKS_FEATURE_LEVEL_6;
 | |
|         case Version::Level::FEATURE_LEVEL_7:
 | |
|             return ANEURALNETWORKS_FEATURE_LEVEL_7;
 | |
|         case Version::Level::FEATURE_LEVEL_8:
 | |
|             return ANEURALNETWORKS_FEATURE_LEVEL_8;
 | |
| #ifdef NN_EXPERIMENTAL_FEATURE
 | |
|         case Version::Level::FEATURE_LEVEL_EXPERIMENTAL:
 | |
|             return ANEURALNETWORKS_FEATURE_LEVEL_EXPERIMENTAL;
 | |
| #endif  // NN_EXPERIMENTAL_FEATURE
 | |
|     }
 | |
|     LOG(FATAL) << "Unrecognized version " << versionLevel;
 | |
|     return -1;
 | |
| }
 | |
| 
 | |
| GeneralResult<std::vector<bool>> DriverDevice::getSupportedOperationsImpl(
 | |
|         const MetaModel& metaModel) const {
 | |
|     const auto featureLevel = kInterface->getFeatureLevel();
 | |
|     const auto slice = metaModel.getSlice(featureLevel);
 | |
|     if (!slice.has_value()) {
 | |
|         return NN_ERROR() << "getSlice(" << featureLevel << ") failed";
 | |
|     }
 | |
| 
 | |
|     const auto& [sliceModel, slicedModelOperationIndexToModelOperationIndex] = *slice;
 | |
|     const std::vector<bool> supported = NN_TRY(kInterface->getSupportedOperations(sliceModel));
 | |
|     const uint32_t slicedOperationCount = sliceModel.main.operations.size();
 | |
|     if (supported.size() != slicedOperationCount) {
 | |
|         return NN_ERROR() << "IDevice::getSupportedOperations returned a vector of length "
 | |
|                           << supported.size() << " when expecting " << slicedOperationCount;
 | |
|     }
 | |
| 
 | |
|     const Model& model = metaModel.getModel();
 | |
|     const uint32_t operationCount = model.main.operations.size();
 | |
|     std::vector<bool> remappedSupported(operationCount, false);
 | |
|     for (size_t i = 0; i < supported.size(); ++i) {
 | |
|         if (supported[i]) {
 | |
|             remappedSupported[slicedModelOperationIndexToModelOperationIndex(i)] = true;
 | |
|         }
 | |
|     }
 | |
|     return remappedSupported;
 | |
| }
 | |
| 
 | |
| std::vector<bool> DriverDevice::getSupportedOperations(const MetaModel& metaModel) const {
 | |
|     const Model& model = metaModel.getModel();
 | |
| 
 | |
|     auto result = getSupportedOperationsImpl(metaModel);
 | |
|     if (!result.ok()) {
 | |
|         LOG(ERROR) << "getSupportedOperations failed with code " << result.error().code << ": "
 | |
|                    << result.error().message;
 | |
|         // Set the supported operation vectors to all false, so we won't use this driver.
 | |
|         return std::vector<bool>(model.main.operations.size(), false);
 | |
|     }
 | |
| 
 | |
|     std::vector<bool>& supportedOperations = result.value();
 | |
| #ifdef NN_DEBUGGABLE
 | |
|     if (mSupported != 1) {
 | |
|         return supportedOperations;
 | |
|     }
 | |
| 
 | |
|     const uint32_t baseAccumulator = std::hash<std::string>{}(getName());
 | |
|     for (size_t operationIndex = 0; operationIndex < supportedOperations.size(); operationIndex++) {
 | |
|         if (!supportedOperations[operationIndex]) {
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         uint32_t accumulator = baseAccumulator;
 | |
|         const Operation& operation = model.main.operations[operationIndex];
 | |
|         accumulator ^= static_cast<uint32_t>(operation.type);
 | |
|         auto accumulateOperands = [&model, &accumulator](const std::vector<uint32_t>& operands) {
 | |
|             for (uint32_t operandIndex : operands) {
 | |
|                 const Operand& operand = model.main.operands[operandIndex];
 | |
|                 accumulator ^= static_cast<uint32_t>(operand.type);
 | |
|                 accumulator ^= operand.dimensions.size();
 | |
|                 for (const Dimension& dimension : operand.dimensions) {
 | |
|                     accumulator ^= dimension;
 | |
|                     if (operand.lifetime == Operand::LifeTime::CONSTANT_COPY ||
 | |
|                         operand.lifetime == Operand::LifeTime::CONSTANT_REFERENCE ||
 | |
|                         operand.lifetime == Operand::LifeTime::POINTER) {
 | |
|                         accumulator ^= 1;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         };
 | |
|         accumulateOperands(operation.inputs);
 | |
|         accumulateOperands(operation.outputs);
 | |
|         if (accumulator & 1) {
 | |
|             supportedOperations[operationIndex] = false;
 | |
|         }
 | |
|     }
 | |
| #endif  // NN_DEBUGGABLE
 | |
| 
 | |
|     return supportedOperations;
 | |
| }
 | |
| 
 | |
| // Opens a cache file for reading and writing and returns a shared handle.
 | |
| static GeneralResult<SharedHandle> createCacheHandle(const std::string& filename,
 | |
|                                                      bool createIfNotExist) {
 | |
|     auto fd = base::unique_fd(open(filename.c_str(), createIfNotExist ? (O_RDWR | O_CREAT) : O_RDWR,
 | |
|                                    S_IRUSR | S_IWUSR));
 | |
|     if (!fd.ok()) {
 | |
|         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
 | |
|                << "Failed to " << (createIfNotExist ? "open or create" : "open") << " cache file "
 | |
|                << filename;
 | |
|     }
 | |
|     return std::make_shared<const Handle>(std::move(fd));
 | |
| }
 | |
| 
 | |
| // Opens a list of cache files and returns a vector of shared handles. The files
 | |
| // are always opened with both read and write permissions.
 | |
| static GeneralResult<std::vector<SharedHandle>> createCacheHandleVec(
 | |
|         uint32_t numCacheFiles, const std::string& baseFilename, bool createIfNotExist) {
 | |
|     CHECK(numCacheFiles <= kMaxNumberOfCacheFiles);
 | |
|     std::vector<SharedHandle> handles;
 | |
|     handles.reserve(numCacheFiles);
 | |
|     for (uint32_t i = 0; i < numCacheFiles; i++) {
 | |
|         std::string filename = baseFilename + std::to_string(i);
 | |
|         VLOG(COMPILATION) << "Cache " << i << ": " << filename;
 | |
|         handles.push_back(NN_TRY(createCacheHandle(filename, createIfNotExist)));
 | |
|     }
 | |
|     return handles;
 | |
| }
 | |
| 
 | |
| // Maps a token to cache file names and returns a pair of vectors of shared
 | |
| // handles to the opened files.
 | |
| static GeneralResult<CacheHandles> getCacheHandles(
 | |
|         const CacheInfo& cacheInfo, const CacheToken& token,
 | |
|         const std::pair<uint32_t, uint32_t>& numCacheFiles, bool createIfNotExist) {
 | |
|     if (const auto* cacheHandles = std::get_if<CacheHandles>(&cacheInfo.variant)) {
 | |
|         if (cacheHandles->modelCache.size() != numCacheFiles.first) {
 | |
|             return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
 | |
|                    << "Expected " << numCacheFiles.first << " model cache handles, got "
 | |
|                    << cacheHandles->modelCache.size();
 | |
|         }
 | |
|         if (cacheHandles->dataCache.size() != numCacheFiles.second) {
 | |
|             return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
 | |
|                    << "Expected " << numCacheFiles.second << " data cache handles, got "
 | |
|                    << cacheHandles->dataCache.size();
 | |
|         }
 | |
|         return *cacheHandles;
 | |
|     }
 | |
| 
 | |
|     // The filename includes kByteSizeOfCacheToken * 2 characters for token,
 | |
|     // and 1 character for model/data cache identifier.
 | |
|     std::string filename(kByteSizeOfCacheToken * 2 + 1, '0');
 | |
|     for (uint32_t i = 0; i < kByteSizeOfCacheToken; i++) {
 | |
|         filename[i * 2] = 'A' + (token[i] & 0x0F);
 | |
|         filename[i * 2 + 1] = 'A' + (token[i] >> 4);
 | |
|     }
 | |
| 
 | |
|     const auto& cacheDir = std::get<CacheDir>(cacheInfo.variant);
 | |
|     CHECK(cacheDir.empty() || cacheDir.back() == '/');
 | |
|     std::string cacheFileName = cacheDir + filename;
 | |
|     const uint32_t cacheTypeIdentifierIndex = cacheDir.size() + kByteSizeOfCacheToken * 2;
 | |
| 
 | |
|     cacheFileName[cacheTypeIdentifierIndex] = '1';
 | |
|     std::vector<SharedHandle> modelCache =
 | |
|             NN_TRY(createCacheHandleVec(numCacheFiles.first, cacheFileName, createIfNotExist));
 | |
| 
 | |
|     cacheFileName[cacheTypeIdentifierIndex] = '2';
 | |
|     std::vector<SharedHandle> dataCache =
 | |
|             NN_TRY(createCacheHandleVec(numCacheFiles.second, cacheFileName, createIfNotExist));
 | |
| 
 | |
|     return CacheHandles{
 | |
|             .modelCache = std::move(modelCache),
 | |
|             .dataCache = std::move(dataCache),
 | |
|     };
 | |
| }
 | |
| 
 | |
| GeneralResult<SharedPreparedModel> DriverDevice::prepareModelFromCacheInternal(
 | |
|         const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
 | |
|         const CacheToken& token) const {
 | |
|     // Get cache files if they exist, otherwise return from the function early.
 | |
|     auto cache = NN_TRY(getCacheHandles(cacheInfo, token, kInterface->getNumberOfCacheFilesNeeded(),
 | |
|                                         /*createIfNotExist=*/false));
 | |
|     return kInterface->prepareModelFromCache(deadline, cache.modelCache, cache.dataCache, token);
 | |
| }
 | |
| 
 | |
| std::pair<int, std::shared_ptr<RuntimePreparedModel>> DriverDevice::prepareModel(
 | |
|         const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
 | |
|         const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
 | |
|         const std::optional<CacheToken>& maybeToken, const std::vector<TokenValuePair>& metaData,
 | |
|         const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const {
 | |
|     // Attempt to compile from cache if token is present.
 | |
|     if (maybeToken.has_value()) {
 | |
|         auto result = prepareModelFromCacheInternal(deadline, cacheInfo, *maybeToken);
 | |
|         if (result.has_value()) {
 | |
|             LOG(INFO) << "prepareModelFromCache: successfully prepared model from cache";
 | |
|             return {ANEURALNETWORKS_NO_ERROR,
 | |
|                     std::make_shared<DriverPreparedModel>(this, std::move(result).value())};
 | |
|         } else {
 | |
|             LOG(ERROR) << "prepareModelFromCache failure (" << result.error().code
 | |
|                        << "): " << result.error().message;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Get cache files if they exist, otherwise create them.
 | |
|     CacheHandles cache;
 | |
|     if (maybeToken.has_value()) {
 | |
|         auto result =
 | |
|                 getCacheHandles(cacheInfo, *maybeToken, kInterface->getNumberOfCacheFilesNeeded(),
 | |
|                                 /*createIfNotExist=*/true);
 | |
|         if (result.has_value()) {
 | |
|             cache = std::move(result).value();
 | |
|         } else {
 | |
|             LOG(ERROR) << "getCacheHandles failure (" << result.error().code
 | |
|                        << "): " << result.error().message;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Get the token if it exists, otherwise get a null token.
 | |
|     static constexpr CacheToken kNullToken = {};
 | |
|     const CacheToken token = maybeToken.value_or(kNullToken);
 | |
| 
 | |
|     // Fallback to full compilation (possibly with token) if
 | |
|     // prepareModelFromCache could not be used or failed.
 | |
|     const Model model = makeModel();
 | |
|     auto result =
 | |
|             kInterface->prepareModel(model, preference, priority, deadline, cache.modelCache,
 | |
|                                      cache.dataCache, token, metaData, extensionNameAndPrefix);
 | |
|     if (!result.ok()) {
 | |
|         LOG(ERROR) << "IDevice::prepareModel() error: " << result.error().message;
 | |
|         return {convertErrorStatusToResultCode(result.error().code), nullptr};
 | |
|     }
 | |
|     SharedPreparedModel preparedModel = std::move(result).value();
 | |
|     CHECK(preparedModel != nullptr)
 | |
|             << "IDevice::prepareModel() returned nullptr without error code";
 | |
|     return {ANEURALNETWORKS_NO_ERROR,
 | |
|             std::make_shared<DriverPreparedModel>(this, std::move(preparedModel))};
 | |
| }
 | |
| 
 | |
| std::pair<int, std::unique_ptr<RuntimeMemory>> DriverDevice::allocate(const MemoryDescriptor& desc,
 | |
|                                                                       OperandType) const {
 | |
|     const BufferDesc bufferDesc = {.dimensions = desc.dimensions};
 | |
|     std::vector<SharedPreparedModel> preparedModels(desc.preparedModels.size());
 | |
|     std::transform(desc.preparedModels.begin(), desc.preparedModels.end(), preparedModels.begin(),
 | |
|                    [](const auto* preparedModel) {
 | |
|                        const auto versionedPreparedModel = preparedModel->getInterface();
 | |
|                        CHECK(versionedPreparedModel != nullptr);
 | |
|                        return versionedPreparedModel;
 | |
|                    });
 | |
|     auto result =
 | |
|             kInterface->allocate(bufferDesc, preparedModels, desc.inputRoles, desc.outputRoles);
 | |
|     if (!result.ok()) {
 | |
|         LOG(ERROR) << "DriverDevice::allocate -- memory allocation on device " << getName()
 | |
|                    << " failed!";
 | |
|         return {convertErrorStatusToResultCode(result.error().code), nullptr};
 | |
|     }
 | |
|     return MemoryFromDevice::create(std::move(result).value());
 | |
| }
 | |
| 
 | |
| static Request createDriverRequest(const std::vector<ModelArgumentInfo>& inputs,
 | |
|                                    const std::vector<ModelArgumentInfo>& outputs,
 | |
|                                    const std::vector<const RuntimeMemory*>& memories) {
 | |
|     Request request;
 | |
|     request.inputs.reserve(inputs.size());
 | |
|     std::transform(inputs.begin(), inputs.end(), std::back_inserter(request.inputs),
 | |
|                    [](const auto& input) { return input.createRequestArgument(); });
 | |
|     request.outputs.reserve(outputs.size());
 | |
|     std::transform(outputs.begin(), outputs.end(), std::back_inserter(request.outputs),
 | |
|                    [](const auto& output) { return output.createRequestArgument(); });
 | |
|     request.pools.reserve(memories.size());
 | |
|     std::transform(memories.begin(), memories.end(), std::back_inserter(request.pools),
 | |
|                    [](const RuntimeMemory* memory) { return memory->getMemoryPool(); });
 | |
|     return request;
 | |
| }
 | |
| 
 | |
| // Perform computation on an actual device driver.
 | |
| //
 | |
| // Because HIDL cannot take raw pointers, two separate memory pools will be allocated for inputs and
 | |
| // outputs specified by pointers. The input pointer data will be copied to the input pool prior to
 | |
| // execution, and the output pointer data will be copied out from the output pool after the
 | |
| // execution.
 | |
| std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
 | |
|         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
 | |
|         const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
 | |
|         MeasureTiming measure, const OptionalTimePoint& deadline,
 | |
|         const OptionalDuration& loopTimeoutDuration,
 | |
|         const std::vector<TokenValuePair>& metaData) const {
 | |
|     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::execute");
 | |
| 
 | |
|     auto request = createDriverRequest(inputs, outputs, memories);
 | |
| 
 | |
|     NNTRACE_RT_SWITCH(NNTRACE_PHASE_EXECUTION, "DriverPreparedModel::execute::execute");
 | |
| 
 | |
|     ExecutionResult<std::pair<std::vector<OutputShape>, Timing>> result;
 | |
| 
 | |
|     // compute using burst if present, otherwise compute from IPreparedModel
 | |
|     const bool burstCompute = (burstController != nullptr);
 | |
|     if (burstCompute) {
 | |
|         for (const RuntimeMemory* memory : memories) {
 | |
|             const auto pool = memory->getMemoryPool();
 | |
|             if (const auto* maybeMemory = std::get_if<SharedMemory>(&pool)) {
 | |
|                 auto cacheHold = burstController->cacheMemory(*maybeMemory);
 | |
|                 memory->hold(cacheHold);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         VLOG(EXECUTION) << "Before burstController->execute() " << SHOW_IF_DEBUG(request);
 | |
|         result = burstController->execute(request, measure, deadline, loopTimeoutDuration, metaData,
 | |
|                                           TypeManager::get()->getExtensionNameAndPrefix(metaData));
 | |
|     } else {
 | |
|         result = mPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, metaData,
 | |
|                                          TypeManager::get()->getExtensionNameAndPrefix(metaData));
 | |
|     }
 | |
| 
 | |
|     int n = ANEURALNETWORKS_OP_FAILED;
 | |
|     std::vector<OutputShape> outputShapes;
 | |
|     Timing timing;
 | |
| 
 | |
|     if (result.ok()) {
 | |
|         n = ANEURALNETWORKS_NO_ERROR;
 | |
|         std::tie(outputShapes, timing) = std::move(result).value();
 | |
|     } else {
 | |
|         auto [message, code, returnedOutputShapes] = std::move(result).error();
 | |
|         VLOG(EXECUTION) << "**Execution failed** (ResultCode = " << n << ")";
 | |
|         LOG(ERROR) << (burstCompute ? "IBurst" : "IPreparedModel")
 | |
|                    << "::execute(...) error: " << message;
 | |
|         n = convertErrorStatusToResultCode(code);
 | |
|         if (code == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
 | |
|             outputShapes = std::move(returnedOutputShapes);
 | |
|         }
 | |
|         return {n, std::move(outputShapes), timing};
 | |
|     }
 | |
| 
 | |
|     VLOG(EXECUTION) << "DriverPreparedModel::execute completed";
 | |
|     return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
 | |
| }
 | |
| 
 | |
| std::tuple<int, int, ExecuteFencedInfoCallback, Timing> DriverPreparedModel::executeFenced(
 | |
|         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
 | |
|         const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
 | |
|         MeasureTiming measure, const OptionalTimePoint& deadline,
 | |
|         const OptionalDuration& loopTimeoutDuration,
 | |
|         const OptionalDuration& timeoutDurationAfterFence,
 | |
|         const std::vector<TokenValuePair>& metaData) const {
 | |
|     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::executeFenced");
 | |
|     CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd >= 0; }));
 | |
| 
 | |
|     auto request = createDriverRequest(inputs, outputs, memories);
 | |
| 
 | |
|     NNTRACE_RT_SWITCH(NNTRACE_PHASE_EXECUTION, "DriverPreparedModel::executeFenced");
 | |
| 
 | |
|     std::vector<SyncFence> waitForHandles;
 | |
|     waitForHandles.reserve(waitFor.size());
 | |
|     for (int fd : waitFor) {
 | |
|         int dupFd = dup(fd);
 | |
|         if (dupFd < 0) {
 | |
|             LOG(ERROR) << "Unable to dup the file descriptor";
 | |
|             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
 | |
|         }
 | |
|         waitForHandles.push_back(SyncFence::create(base::unique_fd(dupFd)));
 | |
|     }
 | |
| 
 | |
|     SyncFence syncFence = SyncFence::createAsSignaled();
 | |
|     ExecuteFencedInfoCallback executeFencedInfoCallback = nullptr;
 | |
|     Timing timing = {};
 | |
|     if (isCompliantVersion(kHalVersionV1_3ToApi.canonical, mDevice->getFeatureLevel())) {
 | |
|         auto result = mPreparedModel->executeFenced(
 | |
|                 request, waitForHandles, measure, deadline, loopTimeoutDuration,
 | |
|                 timeoutDurationAfterFence, metaData,
 | |
|                 TypeManager::get()->getExtensionNameAndPrefix(metaData));
 | |
|         if (!result.ok()) {
 | |
|             LOG(ERROR) << "IPreparedModel::executeFenced() error: " << result.error().message;
 | |
|             VLOG(EXECUTION) << "**executeFenced failed**";
 | |
|             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
 | |
|         }
 | |
|         std::tie(syncFence, executeFencedInfoCallback) = std::move(result).value();
 | |
|     } else {
 | |
|         // Fallback to synchronous execution if executeFenced is not supported.
 | |
|         // First wait for all sync fences to be ready.
 | |
|         LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
 | |
|         for (const auto& fence : waitForHandles) {
 | |
|             if (!fence.hasFd() || fence.getFd() < 0) {
 | |
|                 return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, {}};
 | |
|             }
 | |
|             auto r = fence.syncWait({/* no timeout */});
 | |
|             if (r != SyncFence::FenceState::SIGNALED) {
 | |
|                 LOG(ERROR) << "syncWait failed, fd: " << fence.getFd() << ", state: " << r;
 | |
|                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
 | |
|             }
 | |
|         }
 | |
|         auto result =
 | |
|                 mPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, metaData,
 | |
|                                         TypeManager::get()->getExtensionNameAndPrefix(metaData));
 | |
|         if (!result.ok()) {
 | |
|             LOG(ERROR) << "IPreparedModel::execute() error: " << result.error().message;
 | |
|             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
 | |
|         }
 | |
|         std::tie(std::ignore, timing) = result.value();
 | |
|     }
 | |
| 
 | |
|     int syncFenceFd = -1;
 | |
|     if (syncFence.hasFd()) {
 | |
|         syncFenceFd = dup(syncFence.getFd());
 | |
|         if (syncFenceFd < 0) {
 | |
|             LOG(ERROR) << "Failed to dup the file descriptor";
 | |
|             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     VLOG(EXECUTION) << "DriverPreparedModel::executeFenced completed";
 | |
|     return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedInfoCallback, timing};
 | |
| }
 | |
| 
 | |
| std::pair<int, std::shared_ptr<RuntimeExecution>> DriverPreparedModel::createReusableExecution(
 | |
|         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
 | |
|         const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
 | |
|         const OptionalDuration& loopTimeoutDuration,
 | |
|         const std::vector<TokenValuePair>& metaData) const {
 | |
|     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::createReusableExecution");
 | |
| 
 | |
|     auto request = createDriverRequest(inputs, outputs, memories);
 | |
|     auto result = mPreparedModel->createReusableExecution(
 | |
|             request, measure, loopTimeoutDuration, metaData,
 | |
|             TypeManager::get()->getExtensionNameAndPrefix(metaData));
 | |
|     if (!result.ok()) {
 | |
|         LOG(ERROR) << "IPreparedModel::createReusableExecution() error: " << result.error().message;
 | |
|         const int n = convertErrorStatusToResultCode(result.error().code);
 | |
|         return {n, nullptr};
 | |
|     }
 | |
|     auto execution = std::make_shared<DriverExecution>(
 | |
|             std::move(result).value(), std::move(request), memories, measure, loopTimeoutDuration,
 | |
|             mDevice->getFeatureLevel(), metaData);
 | |
|     return {ANEURALNETWORKS_NO_ERROR, std::move(execution)};
 | |
| }
 | |
| 
 | |
| std::tuple<int, std::vector<OutputShape>, Timing> DriverExecution::compute(
 | |
|         const SharedBurst& burstController, const OptionalTimePoint& deadline) const {
 | |
|     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "DriverExecution::compute");
 | |
| 
 | |
|     // compute using burst if present, otherwise compute from IPreparedModel
 | |
|     SharedExecution execution;
 | |
|     const bool burstCompute = (burstController != nullptr);
 | |
|     if (burstCompute) {
 | |
|         // create a reusable burst execution if the controller is not seen before
 | |
|         auto burstExecution = mCachedBurstExecutions.find(burstController.get());
 | |
|         if (burstExecution == mCachedBurstExecutions.end()) {
 | |
|             for (const RuntimeMemory* memory : kMemories) {
 | |
|                 const auto pool = memory->getMemoryPool();
 | |
|                 if (const auto* maybeMemory = std::get_if<SharedMemory>(&pool)) {
 | |
|                     auto cacheHold = burstController->cacheMemory(*maybeMemory);
 | |
|                     memory->hold(cacheHold);
 | |
|                 }
 | |
|             }
 | |
|             auto createResult = burstController->createReusableExecution(
 | |
|                     kRequest, kMeasure, kLoopTimeoutDuration, kMetaData,
 | |
|                     TypeManager::get()->getExtensionNameAndPrefix(kMetaData));
 | |
|             if (!createResult.ok()) {
 | |
|                 LOG(ERROR) << "IBurst::createReusableExecution() error: "
 | |
|                            << createResult.error().message;
 | |
|                 const int n = convertErrorStatusToResultCode(createResult.error().code);
 | |
|                 return {n, {}, {}};
 | |
|             }
 | |
|             execution = std::move(createResult).value();
 | |
|             mCachedBurstExecutions.emplace(burstController.get(), execution);
 | |
|         } else {
 | |
|             execution = burstExecution->second;
 | |
|         }
 | |
|         VLOG(EXECUTION) << "Before mBurstExecution->compute() " << SHOW_IF_DEBUG(kRequest);
 | |
|     } else {
 | |
|         execution = kExecution;
 | |
|     }
 | |
| 
 | |
|     CHECK(execution != nullptr);
 | |
|     auto result = execution->compute(deadline);
 | |
|     if (!result.ok()) {
 | |
|         auto [message, code, returnedOutputShapes] = std::move(result).error();
 | |
|         int n = convertErrorStatusToResultCode(code);
 | |
|         VLOG(EXECUTION) << "**Execution failed** (ResultCode = " << n << ")";
 | |
|         LOG(ERROR) << (burstCompute ? "IBurst" : "IPreparedModel")
 | |
|                    << "::execute(...) error: " << message;
 | |
|         if (code == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
 | |
|             return {n, std::move(returnedOutputShapes), {}};
 | |
|         }
 | |
|         return {n, {}, {}};
 | |
|     }
 | |
| 
 | |
|     VLOG(EXECUTION) << "DriverExecution::compute completed";
 | |
|     auto [outputShapes, timing] = std::move(result).value();
 | |
|     return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
 | |
| }
 | |
| 
 | |
| std::tuple<int, int, ExecuteFencedInfoCallback, Timing> DriverExecution::computeFenced(
 | |
|         const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
 | |
|         const OptionalDuration& timeoutDurationAfterFence) const {
 | |
|     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "DriverExecution::computeFenced");
 | |
|     CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd >= 0; }));
 | |
| 
 | |
|     std::vector<SyncFence> waitForHandles;
 | |
|     waitForHandles.reserve(waitFor.size());
 | |
|     for (int fd : waitFor) {
 | |
|         int dupFd = dup(fd);
 | |
|         if (dupFd < 0) {
 | |
|             LOG(ERROR) << "Unable to dup the file descriptor";
 | |
|             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
 | |
|         }
 | |
|         waitForHandles.push_back(SyncFence::create(base::unique_fd(dupFd)));
 | |
|     }
 | |
| 
 | |
|     SyncFence syncFence = SyncFence::createAsSignaled();
 | |
|     ExecuteFencedInfoCallback executeFencedInfoCallback = nullptr;
 | |
|     Timing timing = {};
 | |
|     if (isCompliantVersion(kHalVersionV1_3ToApi.canonical, kDeviceFeatureLevel)) {
 | |
|         auto result =
 | |
|                 kExecution->computeFenced(waitForHandles, deadline, timeoutDurationAfterFence);
 | |
|         if (!result.ok()) {
 | |
|             LOG(ERROR) << "IExecution::computeFenced() error: " << result.error().message;
 | |
|             VLOG(EXECUTION) << "**computeFenced failed**";
 | |
|             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
 | |
|         }
 | |
|         std::tie(syncFence, executeFencedInfoCallback) = std::move(result).value();
 | |
|     } else {
 | |
|         // Fallback to synchronous execution if computeFenced is not supported.
 | |
|         // First wait for all sync fences to be ready.
 | |
|         LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
 | |
|         for (const auto& fence : waitForHandles) {
 | |
|             if (!fence.hasFd() || fence.getFd() < 0) {
 | |
|                 return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, {}};
 | |
|             }
 | |
|             auto r = fence.syncWait({/* no timeout */});
 | |
|             if (r != SyncFence::FenceState::SIGNALED) {
 | |
|                 LOG(ERROR) << "syncWait failed, fd: " << fence.getFd() << ", state: " << r;
 | |
|                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
 | |
|             }
 | |
|         }
 | |
|         auto result = kExecution->compute(deadline);
 | |
|         if (!result.ok()) {
 | |
|             LOG(ERROR) << "IExecution::compute() error: " << result.error().message;
 | |
|             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
 | |
|         }
 | |
|         std::tie(std::ignore, timing) = result.value();
 | |
|     }
 | |
| 
 | |
|     int syncFenceFd = -1;
 | |
|     if (syncFence.hasFd()) {
 | |
|         syncFenceFd = dup(syncFence.getFd());
 | |
|         if (syncFenceFd < 0) {
 | |
|             LOG(ERROR) << "Failed to dup the file descriptor";
 | |
|             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     VLOG(EXECUTION) << "DriverExecution::computeFenced completed";
 | |
|     return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedInfoCallback, timing};
 | |
| }
 | |
| 
 | |
| static Capabilities createCpuCapabilities() {
 | |
|     constexpr Capabilities::PerformanceInfo kPerf = {.execTime = 1.0f, .powerUsage = 1.0f};
 | |
|     return makeCapabilities(kPerf, kPerf, kPerf);
 | |
| }
 | |
| 
 | |
| // A special abstracted device for the CPU. Only one instance of this class will exist.
 | |
| // Use get() to retrieve it.
 | |
| class CpuDevice : public Device {
 | |
|    public:
 | |
|     // Returns the singleton CPU fallback device.
 | |
|     static std::shared_ptr<CpuDevice> get() {
 | |
|         static std::shared_ptr<CpuDevice> instance(new CpuDevice);
 | |
|         return instance;
 | |
|     }
 | |
| 
 | |
|     const std::string& getName() const override { return kName; }
 | |
|     const std::string& getVersionString() const override { return kVersionString; }
 | |
|     Version getFeatureLevel() const override { return kVersion; }
 | |
|     int32_t getType() const override { return ANEURALNETWORKS_DEVICE_CPU; }
 | |
|     const std::vector<Extension>& getSupportedExtensions() const override {
 | |
|         return kSupportedExtensions;
 | |
|     }
 | |
|     std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const override;
 | |
|     const Capabilities& getCapabilities() const override { return kCapabilities; }
 | |
|     Capabilities::PerformanceInfo getPerformance(OperandType) const override {
 | |
|         return kPerformance;
 | |
|     }
 | |
|     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
 | |
|         return kPerformance;
 | |
|     }
 | |
|     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
 | |
|         return kPerformance;
 | |
|     }
 | |
|     Capabilities::PerformanceInfo getIfPerformance() const override { return kPerformance; }
 | |
|     Capabilities::PerformanceInfo getWhilePerformance() const override { return kPerformance; }
 | |
|     std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const override {
 | |
|         return {/*numModelCache=*/0, /*numDataCache=*/0};
 | |
|     }
 | |
|     bool isCachingSupported() const override { return false; }
 | |
|     int wait() const override { return ANEURALNETWORKS_NO_ERROR; }
 | |
| 
 | |
|     std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
 | |
|             const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
 | |
|             const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
 | |
|             const std::optional<CacheToken>& maybeToken,
 | |
|             const std::vector<TokenValuePair>& metaData,
 | |
|             const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const override;
 | |
| 
 | |
|     std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
 | |
|                                                             OperandType type) const override;
 | |
| 
 | |
|    private:
 | |
|     CpuDevice() = default;
 | |
|     const Version kVersion = getRuntimeFeatureLevelVersion();
 | |
|     const std::string kName = "nnapi-reference";
 | |
| #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
 | |
|     const std::string kVersionString = build::GetBuildNumber();
 | |
| #else
 | |
|     const std::string kVersionString = "UNKNOWN";
 | |
| #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
 | |
|     // Since the performance is a ratio compared to the CPU performance,
 | |
|     // by definition the performance of the CPU is 1.0.
 | |
|     const Capabilities::PerformanceInfo kPerformance = {.execTime = 1.0f, .powerUsage = 1.0f};
 | |
|     const Capabilities kCapabilities = createCpuCapabilities();
 | |
|     const std::vector<Extension> kSupportedExtensions{/* No extensions. */};
 | |
| };
 | |
| 
 | |
| // A special abstracted RuntimePreparedModel for the CPU, constructed by CpuDevice.
 | |
| class CpuPreparedModel : public RuntimePreparedModel {
 | |
|    public:
 | |
|     // Factory method for CpuPreparedModel. Returns ANEURALNETWORKS_NO_ERROR and
 | |
|     // a prepared model object if successfully created. Returns an error code
 | |
|     // and nullptr otherwise.
 | |
|     static std::pair<int, std::shared_ptr<RuntimePreparedModel>> create(Model model);
 | |
| 
 | |
|     const Device* getDevice() const override { return CpuDevice::get().get(); }
 | |
|     SharedPreparedModel getInterface() const override { return nullptr; }
 | |
| 
 | |
|     std::tuple<int, std::vector<OutputShape>, Timing> execute(
 | |
|             const std::vector<ModelArgumentInfo>& inputs,
 | |
|             const std::vector<ModelArgumentInfo>& outputs,
 | |
|             const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
 | |
|             MeasureTiming measure, const OptionalTimePoint& deadline,
 | |
|             const OptionalDuration& loopTimeoutDuration,
 | |
|             const std::vector<TokenValuePair>& metaData) const override;
 | |
| 
 | |
|     GeneralResult<SharedBurst> configureExecutionBurst() const override { return nullptr; }
 | |
| 
 | |
|     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
 | |
|             const std::vector<ModelArgumentInfo>& inputs,
 | |
|             const std::vector<ModelArgumentInfo>& outputs,
 | |
|             const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
 | |
|             MeasureTiming measure, const OptionalTimePoint& deadline,
 | |
|             const OptionalDuration& loopTimeoutDuration,
 | |
|             const OptionalDuration& timeoutDurationAfterFence,
 | |
|             const std::vector<TokenValuePair>& metaData) const override;
 | |
| 
 | |
|     std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
 | |
|             const std::vector<ModelArgumentInfo>& inputs,
 | |
|             const std::vector<ModelArgumentInfo>& outputs,
 | |
|             const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
 | |
|             const OptionalDuration& loopTimeoutDuration,
 | |
|             const std::vector<TokenValuePair>& metaData) const override;
 | |
| 
 | |
|     MemoryPreference getMemoryPreference() const override {
 | |
|         return {kPreferredAlignment, kPreferredPadding};
 | |
|     }
 | |
| 
 | |
|     // Prefer to use CpuPreparedModel::create.
 | |
|     CpuPreparedModel(Model model, std::vector<RunTimePoolInfo> poolInfos)
 | |
|         : mModel(std::move(model)), mModelPoolInfos(std::move(poolInfos)) {}
 | |
| 
 | |
|     const Model& getModel() const { return mModel; }
 | |
|     const std::vector<RunTimePoolInfo>& getModelPoolInfos() const { return mModelPoolInfos; }
 | |
| 
 | |
|    private:
 | |
|     // TFLite kernels prefers 64 bytes for padding and alignment.
 | |
|     static constexpr uint32_t kPreferredAlignment = 64;
 | |
|     static constexpr uint32_t kPreferredPadding = 64;
 | |
| 
 | |
|     const Model mModel;
 | |
|     const std::vector<RunTimePoolInfo> mModelPoolInfos;
 | |
| };
 | |
| 
 | |
| class CpuExecution : public RuntimeExecution {
 | |
|    public:
 | |
|     CpuExecution(const CpuPreparedModel& preparedModel, Request request,
 | |
|                  std::vector<RunTimePoolInfo> requestPoolInfos,
 | |
|                  OptionalDuration loopTimeoutDuration)
 | |
|         : kPreparedModel(preparedModel),
 | |
|           kRequest(std::move(request)),
 | |
|           kRequestPoolInfos(std::move(requestPoolInfos)),
 | |
|           kLoopTimeoutDuration(std::move(loopTimeoutDuration)) {}
 | |
| 
 | |
|     std::tuple<int, std::vector<OutputShape>, Timing> compute(
 | |
|             const SharedBurst& burstController, const OptionalTimePoint& deadline) const override;
 | |
| 
 | |
|     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
 | |
|             const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
 | |
|             const OptionalDuration& timeoutDurationAfterFence) const override;
 | |
| 
 | |
|    private:
 | |
|     const CpuPreparedModel& kPreparedModel;
 | |
|     Request kRequest;
 | |
|     std::vector<RunTimePoolInfo> kRequestPoolInfos;
 | |
|     const OptionalDuration kLoopTimeoutDuration;
 | |
| };
 | |
| 
 | |
| std::vector<bool> CpuDevice::getSupportedOperations(const MetaModel& metaModel) const {
 | |
|     const Model& model = metaModel.getModel();
 | |
|     const size_t count = model.main.operations.size();
 | |
|     std::vector<bool> result(count, false);
 | |
|     for (size_t i = 0; i < count; i++) {
 | |
|         // TODO(b/119870033): Decide whether and how post-P operations would be supported on CPU.
 | |
|         //                    We may want to use the slicer for CpuDevice just as we do for
 | |
|         //                    DriverDevice.
 | |
|         OperationType operationType = model.main.operations[i].type;
 | |
|         result[i] = !isExtension(operationType) && operationType != OperationType::OEM_OPERATION;
 | |
|     }
 | |
|     return result;
 | |
| }
 | |
| 
 | |
| template <typename Type>
 | |
| static Result<void> validateAndCheckCompliance(const Type& object) {
 | |
|     const auto version = NN_TRY(validate(object));
 | |
|     if (!isCompliantVersion(version, DeviceManager::get()->getRuntimeVersion())) {
 | |
|         return NN_ERROR() << "Object than is newer what is allowed. Version needed: " << version
 | |
|                           << ", current runtime version supported: "
 | |
|                           << DeviceManager::get()->getRuntimeVersion();
 | |
|     }
 | |
|     return {};
 | |
| }
 | |
| 
 | |
| std::pair<int, std::shared_ptr<RuntimePreparedModel>> CpuDevice::prepareModel(
 | |
|         const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
 | |
|         const OptionalTimePoint& deadline, const CacheInfo& /*cacheInfo*/,
 | |
|         const std::optional<CacheToken>& maybeToken,
 | |
|         const std::vector<TokenValuePair>& /*metaData*/,
 | |
|         const std::vector<ExtensionNameAndPrefix>& /*extensionNameAndPrefix*/) const {
 | |
|     CHECK(!maybeToken.has_value())
 | |
|             << "Should never call prepareModel with cache information on CpuDevice";
 | |
| 
 | |
|     const Model model = makeModel();
 | |
|     if (auto result = validateAndCheckCompliance(model); !result.ok()) {
 | |
|         LOG(ERROR) << "Invalid Model: " << result.error();
 | |
|         return {ANEURALNETWORKS_OP_FAILED, nullptr};
 | |
|     }
 | |
|     if (auto result = validateAndCheckCompliance(preference); !result.ok()) {
 | |
|         LOG(ERROR) << "Invalid ExecutionPreference: " << result.error();
 | |
|         return {ANEURALNETWORKS_OP_FAILED, nullptr};
 | |
|     }
 | |
|     if (auto result = validateAndCheckCompliance(priority); !result.ok()) {
 | |
|         LOG(ERROR) << "Invalid Priority: " << result.error();
 | |
|         return {ANEURALNETWORKS_OP_FAILED, nullptr};
 | |
|     }
 | |
|     if (hasDeadlinePassed(deadline)) {
 | |
|         return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, nullptr};
 | |
|     }
 | |
| 
 | |
|     return CpuPreparedModel::create(model);
 | |
| }
 | |
| 
 | |
| std::pair<int, std::unique_ptr<RuntimeMemory>> CpuDevice::allocate(const MemoryDescriptor& desc,
 | |
|                                                                    OperandType type) const {
 | |
|     uint32_t size = TypeManager::get()->getSizeOfData(type, desc.dimensions);
 | |
|     if (size == 0) {
 | |
|         LOG(ERROR) << "CpuDevice::allocate -- does not support unknown dimensions.";
 | |
|         return {ANEURALNETWORKS_OP_FAILED, nullptr};
 | |
|     }
 | |
|     return MemoryAshmem::create(size);
 | |
| }
 | |
| 
 | |
| std::pair<int, std::shared_ptr<RuntimePreparedModel>> CpuPreparedModel::create(Model model) {
 | |
|     std::vector<RunTimePoolInfo> poolInfos;
 | |
|     if (!setRunTimePoolInfosFromCanonicalMemories(&poolInfos, model.pools)) {
 | |
|         return {ANEURALNETWORKS_UNMAPPABLE, nullptr};
 | |
|     }
 | |
| 
 | |
|     std::shared_ptr<RuntimePreparedModel> preparedModel =
 | |
|             std::make_shared<CpuPreparedModel>(std::move(model), std::move(poolInfos));
 | |
|     return {ANEURALNETWORKS_NO_ERROR, std::move(preparedModel)};
 | |
| }
 | |
| 
 | |
| static std::tuple<int, std::vector<OutputShape>, Timing> computeOnCpu(
 | |
|         const Model& model, const Request& request,
 | |
|         const std::vector<RunTimePoolInfo>& modelPoolInfos,
 | |
|         const std::vector<RunTimePoolInfo>& requestPoolInfos, const OptionalTimePoint& deadline,
 | |
|         const OptionalDuration& loopTimeoutDuration) {
 | |
|     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "computeOnCpu");
 | |
|     CpuExecutor executor;
 | |
|     if (loopTimeoutDuration.has_value()) {
 | |
|         executor.setLoopTimeout(loopTimeoutDuration->count());
 | |
|     }
 | |
|     if (deadline.has_value()) {
 | |
|         executor.setDeadline(*deadline);
 | |
|     }
 | |
|     int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
 | |
|     const auto& outputShapes = executor.getOutputShapes();
 | |
|     return {err, outputShapes, {}};
 | |
| }
 | |
| 
 | |
| std::tuple<int, int, ExecuteFencedInfoCallback, Timing> CpuPreparedModel::executeFenced(
 | |
|         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
 | |
|         const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
 | |
|         MeasureTiming measure, const OptionalTimePoint& deadline,
 | |
|         const OptionalDuration& loopTimeoutDuration, const OptionalDuration& duration,
 | |
|         const std::vector<TokenValuePair>& /*metaData*/) const {
 | |
|     VLOG(EXECUTION)
 | |
|             << "CpuPreparedModel::executeFenced wait for sync fences to signal before execution";
 | |
|     for (int syncFd : waitFor) {
 | |
|         if (syncFd > 0) {
 | |
|             auto r = syncWait(syncFd, -1);
 | |
|             if (r != FenceState::SIGNALED) {
 | |
|                 LOG(ERROR) << "sync wait failed, fd: " << syncFd;
 | |
|                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Update deadline if the timeout duration is closer than the deadline.
 | |
|     auto closestDeadline = deadline;
 | |
|     if (duration.has_value()) {
 | |
|         const auto timeoutDurationDeadline = makeDeadline(*duration);
 | |
|         if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
 | |
|             closestDeadline = timeoutDurationDeadline;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     const auto [result, outputShapes, timing] = execute(inputs, outputs, memories, nullptr, measure,
 | |
|                                                         closestDeadline, loopTimeoutDuration, {});
 | |
|     return {result, -1, nullptr, timing};
 | |
| }
 | |
| 
 | |
| static std::tuple<int, Request, std::vector<RunTimePoolInfo>> createCpuRequest(
 | |
|         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
 | |
|         const std::vector<const RuntimeMemory*>& memories) {
 | |
|     std::vector<RunTimePoolInfo> requestPoolInfos;
 | |
|     requestPoolInfos.reserve(memories.size());
 | |
|     for (const RuntimeMemory* mem : memories) {
 | |
|         if (std::optional<RunTimePoolInfo> poolInfo = mem->getRunTimePoolInfo()) {
 | |
|             requestPoolInfos.emplace_back(*poolInfo);
 | |
|         } else {
 | |
|             return {ANEURALNETWORKS_UNMAPPABLE, {}, {}};
 | |
|         }
 | |
|     }
 | |
|     // Create as many pools as there are input / output.
 | |
|     auto fixPointerArguments =
 | |
|             [&requestPoolInfos](const std::vector<ModelArgumentInfo>& argumentInfos) {
 | |
|                 std::vector<DataLocation> ptrArgsLocations;
 | |
|                 for (const ModelArgumentInfo& argumentInfo : argumentInfos) {
 | |
|                     if (argumentInfo.state() == ModelArgumentInfo::POINTER) {
 | |
|                         ptrArgsLocations.push_back(
 | |
|                                 {.poolIndex = static_cast<uint32_t>(requestPoolInfos.size()),
 | |
|                                  .offset = 0,
 | |
|                                  .length = argumentInfo.length(),
 | |
|                                  .padding = argumentInfo.padding()});
 | |
|                         requestPoolInfos.emplace_back(RunTimePoolInfo::createFromExistingBuffer(
 | |
|                                 static_cast<uint8_t*>(argumentInfo.buffer())));
 | |
|                     }
 | |
|                 }
 | |
|                 return ptrArgsLocations;
 | |
|             };
 | |
|     const std::vector<DataLocation> inputPtrArgsLocations = fixPointerArguments(inputs);
 | |
|     const std::vector<DataLocation> outputPtrArgsLocations = fixPointerArguments(outputs);
 | |
| 
 | |
|     Request request;
 | |
|     request.inputs = createRequestArguments(inputs, inputPtrArgsLocations);
 | |
|     request.outputs = createRequestArguments(outputs, outputPtrArgsLocations);
 | |
|     return {ANEURALNETWORKS_NO_ERROR, std::move(request), std::move(requestPoolInfos)};
 | |
| }
 | |
| 
 | |
| // Perform computation on NNAPI CPU reference implementation.
 | |
| //
 | |
| // Contrary to DriverPreparedModel::execute, the NNAPI CPU reference executor lives in the
 | |
| // same process as the NNAPI runtime and can take raw pointers. We will create as many pools as
 | |
| // there are input/output in this method to avoid data copying.
 | |
| //
 | |
| // Will choose between sync/async execution according to DeviceManager::mSyncExecCpu.
 | |
| std::tuple<int, std::vector<OutputShape>, Timing> CpuPreparedModel::execute(
 | |
|         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
 | |
|         const std::vector<const RuntimeMemory*>& memories, const SharedBurst& /*burstController*/,
 | |
|         MeasureTiming /*measure*/, const OptionalTimePoint& deadline,
 | |
|         const OptionalDuration& loopTimeoutDuration,
 | |
|         const std::vector<TokenValuePair>& /*metaData*/) const {
 | |
|     if (hasDeadlinePassed(deadline)) {
 | |
|         return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, {}};
 | |
|     }
 | |
| 
 | |
|     int nCreateRequest;
 | |
|     Request request;
 | |
|     std::vector<RunTimePoolInfo> requestPoolInfos;
 | |
|     std::tie(nCreateRequest, request, requestPoolInfos) =
 | |
|             createCpuRequest(inputs, outputs, memories);
 | |
|     if (nCreateRequest != ANEURALNETWORKS_NO_ERROR) {
 | |
|         return {nCreateRequest, {}, {}};
 | |
|     }
 | |
| 
 | |
|     if (!DeviceManager::get()->syncExecCpu()) {
 | |
|         // TODO: use a thread pool
 | |
|         // TODO(mikie): this could have NNTRACE so we could measure the overhead
 | |
|         //              of spinning up a new thread.
 | |
|         std::tuple<int, std::vector<OutputShape>, Timing> result = {};
 | |
|         std::thread([this, &request, &requestPoolInfos, &deadline, &loopTimeoutDuration, &result] {
 | |
|             result = computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
 | |
|                                   loopTimeoutDuration);
 | |
|         }).join();
 | |
|         return result;
 | |
|     }
 | |
| 
 | |
|     return computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
 | |
|                         loopTimeoutDuration);
 | |
| }
 | |
| 
 | |
| std::pair<int, std::shared_ptr<RuntimeExecution>> CpuPreparedModel::createReusableExecution(
 | |
|         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
 | |
|         const std::vector<const RuntimeMemory*>& memories, MeasureTiming /*measure*/,
 | |
|         const OptionalDuration& loopTimeoutDuration,
 | |
|         const std::vector<TokenValuePair>& /*metaData*/) const {
 | |
|     auto [nCreateRequest, request, requestPoolInfos] = createCpuRequest(inputs, outputs, memories);
 | |
|     if (nCreateRequest != ANEURALNETWORKS_NO_ERROR) {
 | |
|         return {nCreateRequest, nullptr};
 | |
|     }
 | |
|     auto execution = std::make_shared<CpuExecution>(
 | |
|             *this, std::move(request), std::move(requestPoolInfos), loopTimeoutDuration);
 | |
|     return {ANEURALNETWORKS_NO_ERROR, std::move(execution)};
 | |
| }
 | |
| 
 | |
| std::tuple<int, std::vector<OutputShape>, Timing> CpuExecution::compute(
 | |
|         const SharedBurst& /*burstController*/, const OptionalTimePoint& deadline) const {
 | |
|     if (hasDeadlinePassed(deadline)) {
 | |
|         return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, {}};
 | |
|     }
 | |
| 
 | |
|     if (!DeviceManager::get()->syncExecCpu()) {
 | |
|         // TODO: use a thread pool
 | |
|         // TODO(mikie): this could have NNTRACE so we could measure the overhead
 | |
|         //              of spinning up a new thread.
 | |
|         std::tuple<int, std::vector<OutputShape>, Timing> result = {};
 | |
|         std::thread([this, &deadline, &result] {
 | |
|             result = computeOnCpu(kPreparedModel.getModel(), kRequest,
 | |
|                                   kPreparedModel.getModelPoolInfos(), kRequestPoolInfos, deadline,
 | |
|                                   kLoopTimeoutDuration);
 | |
|         }).join();
 | |
|         return result;
 | |
|     }
 | |
| 
 | |
|     return computeOnCpu(kPreparedModel.getModel(), kRequest, kPreparedModel.getModelPoolInfos(),
 | |
|                         kRequestPoolInfos, deadline, kLoopTimeoutDuration);
 | |
| }
 | |
| 
 | |
| std::tuple<int, int, ExecuteFencedInfoCallback, Timing> CpuExecution::computeFenced(
 | |
|         const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
 | |
|         const OptionalDuration& duration) const {
 | |
|     VLOG(EXECUTION)
 | |
|             << "CpuExecution::computeFenced wait for sync fences to signal before execution";
 | |
|     for (int syncFd : waitFor) {
 | |
|         if (syncFd > 0) {
 | |
|             auto r = syncWait(syncFd, -1);
 | |
|             if (r != FenceState::SIGNALED) {
 | |
|                 LOG(ERROR) << "sync wait failed, fd: " << syncFd;
 | |
|                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Update deadline if the timeout duration is closer than the deadline.
 | |
|     auto closestDeadline = deadline;
 | |
|     if (duration.has_value()) {
 | |
|         const auto timeoutDurationDeadline = makeDeadline(*duration);
 | |
|         if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
 | |
|             closestDeadline = timeoutDurationDeadline;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     const auto [result, outputShapes, timing] = compute(nullptr, closestDeadline);
 | |
|     return {result, -1, nullptr, timing};
 | |
| }
 | |
| 
 | |
| int64_t DeviceManager::getRuntimeFeatureLevel() const {
 | |
|     return versionToFeatureLevel(mRuntimeVersion.level);
 | |
| }
 | |
| 
 | |
| DeviceManager* DeviceManager::get() {
 | |
|     static DeviceManager manager;
 | |
|     return &manager;
 | |
| }
 | |
| 
 | |
| std::shared_ptr<Device> DeviceManager::getCpuDevice() {
 | |
|     return CpuDevice::get();
 | |
| }
 | |
| 
 | |
| std::shared_ptr<Device> DeviceManager::forTest_makeDriverDevice(const SharedDevice& device) {
 | |
|     VLOG(MANAGER) << "forTest_makeDriverDevice(" << device->getName() << ")";
 | |
|     const auto driverDevice = DriverDevice::create(device);
 | |
|     CHECK(driverDevice != nullptr);
 | |
|     return driverDevice;
 | |
| }
 | |
| 
 | |
| #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
 | |
| std::vector<std::shared_ptr<DriverDevice>> getDriverDevices(
 | |
|         [[maybe_unused]] Version::Level maxFeatureLevelAllowed) {
 | |
| #ifdef __ANDROID__
 | |
|     auto devices = hardware::neuralnetworks::service::getDevices(maxFeatureLevelAllowed);
 | |
| 
 | |
|     std::vector<std::shared_ptr<DriverDevice>> driverDevices;
 | |
|     driverDevices.reserve(devices.size());
 | |
|     for (auto& device : devices) {
 | |
|         driverDevices.push_back(DriverDevice::create(std::move(device)));
 | |
|     }
 | |
|     return driverDevices;
 | |
| #else   // __ANDROID__
 | |
|     return {};
 | |
| #endif  // __ANDROID__
 | |
| }
 | |
| #else
 | |
| std::vector<std::shared_ptr<DriverDevice>> getDriverDevices(
 | |
|         Version::Level /*maxFeatureLevelAllowed*/) {
 | |
|     auto devices = getDevices();
 | |
|     std::vector<std::shared_ptr<DriverDevice>> driverDevices;
 | |
|     driverDevices.reserve(devices.size());
 | |
|     for (auto& device : devices) {
 | |
|         driverDevices.push_back(DriverDevice::create(std::move(device)));
 | |
|     }
 | |
|     return driverDevices;
 | |
| }
 | |
| #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
 | |
| 
 | |
| void DeviceManager::findAvailableDevices() {
 | |
|     VLOG(MANAGER) << "findAvailableDevices";
 | |
| 
 | |
| #ifdef NN_DEBUGGABLE
 | |
|     // debug.nn.enabled-devices defines a regex pattern. For all available driver devices, only the
 | |
|     // ones with name matching the pattern are enabled. Driver devices with unmatched names are
 | |
|     // ignored. If this property is not set, all available driver devices are enabled by default.
 | |
|     // This filter only applies to driver devices. nnapi-reference is always enabled.
 | |
|     std::string patternStr = base::GetProperty("debug.nn.enabled-devices", ".*");
 | |
|     LOG(INFO) << "Enabled devices: " << patternStr;
 | |
|     const std::regex pattern(patternStr);
 | |
| #endif  // NN_DEBUGGABLE
 | |
| 
 | |
|     // register driver devices
 | |
|     auto driverDevices = getDriverDevices(mRuntimeVersion.level);
 | |
|     for (auto& driverDevice : driverDevices) {
 | |
| #ifdef NN_DEBUGGABLE
 | |
|         if (!std::regex_match(driverDevice->getName(), pattern)) {
 | |
|             LOG(INFO) << "Ignored interface " << driverDevice->getName()
 | |
|                       << " (version = " << driverDevice->getVersionString() << ")";
 | |
|             continue;
 | |
|         }
 | |
| #endif  // NN_DEBUGGABLE
 | |
|         LOG(INFO) << "Found interface " << driverDevice->getName()
 | |
|                   << " (version = " << driverDevice->getVersionString() << ")";
 | |
|         mDevices.push_back(std::move(driverDevice));
 | |
|     }
 | |
| 
 | |
| #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
 | |
|     // register CPU fallback device
 | |
|     mDevices.push_back(CpuDevice::get());
 | |
|     mDevicesCpuOnly.push_back(CpuDevice::get());
 | |
| #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
 | |
| }
 | |
| 
 | |
| void DeviceManager::registerDevice(const SharedDevice& device) {
 | |
|     if (auto driverDevice = DriverDevice::create(device)) {
 | |
|         mDevices.push_back(std::move(driverDevice));
 | |
|     }
 | |
| }
 | |
| 
 | |
| DeviceManager::DeviceManager() {
 | |
|     VLOG(MANAGER) << "DeviceManager::DeviceManager";
 | |
|     mRuntimeVersion = getRuntimeFeatureLevelVersion();
 | |
|     mIsPlatformTelemetryEnabled = getWhetherPlatformTelemetryIsEnabled();
 | |
|     findAvailableDevices();
 | |
| #ifdef NN_DEBUGGABLE
 | |
|     mStrictSlicing = (getProp("debug.nn.strict-slicing") != 0);
 | |
|     mPartitioning = getProp("debug.nn.partition", kPartitioningDefault);
 | |
|     mDebugNNCpuOnly = (getProp("debug.nn.cpuonly") != 0);
 | |
|     mSyncExecCpu = (getProp("debug.nn.syncexec-cpu", 1) != 0);
 | |
|     mSyncExecRuntime = (getProp("debug.nn.syncexec-runtime") != 0);
 | |
| #endif  // NN_DEBUGGABLE
 | |
| }
 | |
| 
 | |
| }  // namespace nn
 | |
| }  // namespace android
 |