197 lines
8.7 KiB
C++
197 lines
8.7 KiB
C++
/*
|
|
* Copyright (C) 2017 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
|
|
#define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
|
|
|
|
#include "Callbacks.h"
|
|
#include "HalInterfaces.h"
|
|
#include "Memory.h"
|
|
#include "ModelBuilder.h"
|
|
#include "NeuralNetworks.h"
|
|
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
using ::android::hardware::neuralnetworks::V1_0::implementation::ExecutionCallback;
|
|
using ::android::hardware::neuralnetworks::V1_0::implementation::PreparedModelCallback;
|
|
|
|
namespace android {
|
|
namespace nn {
|
|
|
|
class CompilationBuilder;
|
|
class ExecutionPlan;
|
|
class Memory;
|
|
class ModelBuilder;
|
|
class StepExecutor;
|
|
|
|
// TODO move length out of DataLocation
|
|
struct ModelArgumentInfo {
|
|
// Whether the argument was specified as being in a Memory, as a pointer,
|
|
// has no value, or has not been specified.
|
|
// If POINTER then:
|
|
// locationAndLength.length is valid.
|
|
// dimensions is valid.
|
|
// buffer is valid
|
|
// If MEMORY then:
|
|
// locationAndLength.location.{poolIndex, offset, length} is valid.
|
|
// dimensions is valid.
|
|
enum { POINTER, MEMORY, HAS_NO_VALUE, UNSPECIFIED } state = UNSPECIFIED;
|
|
DataLocation locationAndLength;
|
|
std::vector<uint32_t> dimensions;
|
|
void* buffer;
|
|
|
|
int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer,
|
|
uint32_t length);
|
|
int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
|
|
uint32_t poolIndex, uint32_t offset, uint32_t length);
|
|
int setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex, uint32_t offset);
|
|
int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType);
|
|
};
|
|
|
|
class ExecutionBuilder {
|
|
friend class StepExecutor;
|
|
public:
|
|
ExecutionBuilder(const CompilationBuilder* compilation);
|
|
|
|
int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer,
|
|
size_t length);
|
|
int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
|
|
const Memory* memory, size_t offset, size_t length);
|
|
int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
|
|
size_t length);
|
|
int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
|
|
const Memory* memory, size_t offset, size_t length);
|
|
int startCompute(sp<ExecutionCallback>* synchronizationCallback);
|
|
|
|
const ModelBuilder* getModel() const { return mModel; }
|
|
|
|
private:
|
|
const ModelBuilder* mModel;
|
|
const ExecutionPlan* mPlan;
|
|
|
|
// The information we'll send to the driver about the inputs and outputs.
|
|
// Note that we build this in two steps:
|
|
// 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
|
|
// If set from a pointer, don't set the location in the RequestArgument but store it
|
|
// instead in mInputBuffers or mOutputBuffers.
|
|
// 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
|
|
// the m*Buffers entries. Copy the input values into the shared memory.
|
|
// We do this to avoid creating a lot of shared memory objects if we have a lot of
|
|
// parameters specified via pointers. We also avoid copying in the case where
|
|
// some of the nodes will interpreted on the CPU anyway.
|
|
std::vector<ModelArgumentInfo> mInputs;
|
|
std::vector<ModelArgumentInfo> mOutputs;
|
|
MemoryTracker mMemories;
|
|
};
|
|
|
|
// class StepExecutor is used to execute a single "step" in a
|
|
// potentially multiple step execution process. The graph associated
|
|
// with that step is executed in its entirety on a single device (or
|
|
// on the CPU).
|
|
class StepExecutor {
|
|
public:
|
|
// executionBuilder
|
|
// Describes the full (possibly multiple-"step") execution.
|
|
// model
|
|
// The model to be executed by the executor. Possibly a
|
|
// submodel of the model from executionBuilder.
|
|
// driver, preparedModel
|
|
// The device on which to execute the "step", and the prepared
|
|
// model to execute on that device. (Both are nullptr in the
|
|
// case of CPU.)
|
|
StepExecutor(const ExecutionBuilder* executionBuilder,
|
|
const ModelBuilder* model,
|
|
sp<IDevice> driver, sp<IPreparedModel> preparedModel);
|
|
|
|
// Map inputs and outputs from ExecutionBuilder to StepExecutor,
|
|
// in the case where we have a single-"step" execution (i.e., the executor
|
|
// is executing the entire model from the ExecutionBuilder).
|
|
void mapInputsAndOutputsTrivially();
|
|
|
|
// Map inputs and outputs from ExecutionBuilder to StepExecutor,
|
|
// one at a time. Note that these are input/output indexes, not
|
|
// operand indexes.
|
|
void mapInput(uint32_t builderIndex, uint32_t executorIndex) {
|
|
mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex],
|
|
&mInputs[executorIndex]);
|
|
}
|
|
void mapOutput(uint32_t builderIndex, uint32_t executorIndex) {
|
|
mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex],
|
|
&mOutputs[executorIndex]);
|
|
}
|
|
|
|
// The input or output is assumed to have the size of the
|
|
// corresponding operand.
|
|
int setInputFromTemporaryMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) {
|
|
return setInputOrOutputFromTemporaryMemory(mModel->getInputOperand(inputIndex),
|
|
memory, offset,
|
|
&mInputs.at(inputIndex));
|
|
}
|
|
int setOutputFromTemporaryMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) {
|
|
return setInputOrOutputFromTemporaryMemory(mModel->getOutputOperand(outputIndex),
|
|
memory, offset,
|
|
&mOutputs.at(outputIndex));
|
|
}
|
|
|
|
// Executes using the (driver, preparedModel) specified at construction time.
|
|
int startCompute(sp<ExecutionCallback>* synchronizationCallback);
|
|
|
|
// Executes using the CPU, regardless of the (driver,
|
|
// preparedModel) specified at construction time.
|
|
int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback);
|
|
|
|
bool isCpu() const { return mDriver == nullptr; }
|
|
|
|
private:
|
|
int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory);
|
|
int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback);
|
|
|
|
void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
|
|
ModelArgumentInfo* executorInputOrOutput);
|
|
|
|
int setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
|
|
const Memory* memory, uint32_t offset,
|
|
ModelArgumentInfo* inputOrOutputInfo);
|
|
|
|
// describes the full (possibly multiple-"step") execution
|
|
const ExecutionBuilder* mExecutionBuilder;
|
|
|
|
// model to be executed on the executor, in both original and
|
|
// compiled forms; and device on which to execute it
|
|
const ModelBuilder* mModel;
|
|
sp<IDevice> mDriver; // nullptr if CPU execution
|
|
sp<IPreparedModel> mPreparedModel; // nullptr if CPU execution or if bypassing ExecutionPlan
|
|
|
|
// The information we'll send to the driver about the inputs and outputs.
|
|
// Note that we build this in two steps:
|
|
// 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
|
|
// If set from a pointer, don't set the location in the RequestArgument but store it
|
|
// instead in mInputBuffers or mOutputBuffers.
|
|
// 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
|
|
// the m*Buffers entries. Copy the input values into the shared memory.
|
|
// We do this to avoid creating a lot of shared memory objects if we have a lot of
|
|
// parameters specified via pointers. We also avoid copying in the case where
|
|
// some of the nodes will interpreted on the CPU anyway.
|
|
std::vector<ModelArgumentInfo> mInputs;
|
|
std::vector<ModelArgumentInfo> mOutputs;
|
|
MemoryTracker mMemories;
|
|
};
|
|
|
|
} // namespace nn
|
|
} // namespace android
|
|
|
|
#endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
|