443 lines
13 KiB
C++
443 lines
13 KiB
C++
#include "ai_scheduler.h"
|
|
|
|
#include <cstring>
|
|
#include <fstream>
|
|
|
|
#include "utils/logger.h"
|
|
|
|
namespace rk3588 {
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
namespace {
|
|
|
|
uint32_t TensorTypeSizeBytes(rknn_tensor_type t) {
|
|
switch (t) {
|
|
case RKNN_TENSOR_INT8:
|
|
case RKNN_TENSOR_UINT8:
|
|
return 1;
|
|
case RKNN_TENSOR_INT16:
|
|
case RKNN_TENSOR_UINT16:
|
|
case RKNN_TENSOR_FLOAT16:
|
|
return 2;
|
|
case RKNN_TENSOR_INT32:
|
|
case RKNN_TENSOR_UINT32:
|
|
case RKNN_TENSOR_FLOAT32:
|
|
return 4;
|
|
case RKNN_TENSOR_INT64:
|
|
return 8;
|
|
default:
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
#endif
|
|
|
|
AiScheduler& AiScheduler::Instance() {
|
|
static AiScheduler instance;
|
|
return instance;
|
|
}
|
|
|
|
AiScheduler::AiScheduler() {
|
|
LogInfo("[AiScheduler] initialized");
|
|
}
|
|
|
|
AiScheduler::~AiScheduler() {
|
|
Shutdown();
|
|
}
|
|
|
|
void AiScheduler::Shutdown() {
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
{
|
|
std::lock_guard<std::mutex> lock(models_mutex_);
|
|
models_by_handle_.clear();
|
|
models_by_path_.clear();
|
|
}
|
|
LogInfo("[AiScheduler] shutdown, total inferences: " + std::to_string(total_inferences_.load()) +
|
|
", errors: " + std::to_string(total_errors_.load()));
|
|
#endif
|
|
}
|
|
|
|
ModelHandle AiScheduler::LoadModel(const std::string& model_path, std::string& err) {
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
{
|
|
std::lock_guard<std::mutex> lock(models_mutex_);
|
|
auto it = models_by_path_.find(model_path);
|
|
if (it != models_by_path_.end()) {
|
|
if (auto existing = it->second.lock()) {
|
|
ModelHandle handle = next_handle_.fetch_add(1);
|
|
models_by_handle_[handle] = existing;
|
|
LogInfo("[AiScheduler] reused model: " + model_path + " (handle=" + std::to_string(handle) + ")");
|
|
return handle;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Read model file
|
|
std::ifstream file(model_path, std::ios::binary | std::ios::ate);
|
|
if (!file.is_open()) {
|
|
err = "Failed to open model file: " + model_path;
|
|
return kInvalidModelHandle;
|
|
}
|
|
|
|
size_t model_size = file.tellg();
|
|
file.seekg(0, std::ios::beg);
|
|
|
|
auto ctx = std::make_shared<ModelContext>();
|
|
ctx->model_data.resize(model_size);
|
|
ctx->path = model_path;
|
|
|
|
if (!file.read(reinterpret_cast<char*>(ctx->model_data.data()), model_size)) {
|
|
err = "Failed to read model file: " + model_path;
|
|
return kInvalidModelHandle;
|
|
}
|
|
|
|
// Initialize RKNN context
|
|
int ret = rknn_init(&ctx->ctx, ctx->model_data.data(), model_size, 0, nullptr);
|
|
if (ret < 0) {
|
|
err = "rknn_init failed with code: " + std::to_string(ret);
|
|
return kInvalidModelHandle;
|
|
}
|
|
|
|
// Prefer using all NPU cores (RK3588).
|
|
// This does not change the shared-context behavior; it only hints RKNN runtime scheduling.
|
|
ret = rknn_set_core_mask(ctx->ctx, RKNN_NPU_CORE_0_1_2);
|
|
if (ret < 0) {
|
|
LogWarn("[AiScheduler] rknn_set_core_mask failed: " + std::to_string(ret));
|
|
}
|
|
|
|
// Query input/output info
|
|
rknn_input_output_num io_num;
|
|
ret = rknn_query(ctx->ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
|
|
if (ret < 0) {
|
|
err = "rknn_query IO num failed";
|
|
rknn_destroy(ctx->ctx);
|
|
ctx->ctx = 0;
|
|
return kInvalidModelHandle;
|
|
}
|
|
|
|
ctx->n_input = io_num.n_input;
|
|
ctx->n_output = io_num.n_output;
|
|
|
|
// Query input attributes
|
|
ctx->input_attrs.resize(ctx->n_input);
|
|
for (uint32_t i = 0; i < ctx->n_input; ++i) {
|
|
ctx->input_attrs[i].index = i;
|
|
rknn_query(ctx->ctx, RKNN_QUERY_INPUT_ATTR, &ctx->input_attrs[i], sizeof(rknn_tensor_attr));
|
|
}
|
|
|
|
// Query output attributes
|
|
ctx->output_attrs.resize(ctx->n_output);
|
|
for (uint32_t i = 0; i < ctx->n_output; ++i) {
|
|
ctx->output_attrs[i].index = i;
|
|
rknn_query(ctx->ctx, RKNN_QUERY_OUTPUT_ATTR, &ctx->output_attrs[i], sizeof(rknn_tensor_attr));
|
|
}
|
|
|
|
// Preallocate output buffers for borrowed inference.
|
|
ctx->output_buffers.resize(ctx->n_output);
|
|
for (uint32_t i = 0; i < ctx->n_output; ++i) {
|
|
uint32_t out_sz = ctx->output_attrs[i].size;
|
|
if (out_sz == 0 && ctx->output_attrs[i].size_with_stride > 0) {
|
|
out_sz = ctx->output_attrs[i].size_with_stride;
|
|
}
|
|
if (out_sz == 0 && ctx->output_attrs[i].n_elems > 0) {
|
|
out_sz = ctx->output_attrs[i].n_elems * TensorTypeSizeBytes(ctx->output_attrs[i].type);
|
|
}
|
|
if (out_sz > 0) {
|
|
ctx->output_buffers[i].resize(out_sz);
|
|
} else {
|
|
ctx->output_buffers[i].clear();
|
|
}
|
|
}
|
|
|
|
// Extract input dimensions
|
|
if (ctx->input_attrs[0].fmt == RKNN_TENSOR_NCHW) {
|
|
ctx->input_c = ctx->input_attrs[0].dims[1];
|
|
ctx->input_h = ctx->input_attrs[0].dims[2];
|
|
ctx->input_w = ctx->input_attrs[0].dims[3];
|
|
} else {
|
|
// NHWC
|
|
ctx->input_h = ctx->input_attrs[0].dims[1];
|
|
ctx->input_w = ctx->input_attrs[0].dims[2];
|
|
ctx->input_c = ctx->input_attrs[0].dims[3];
|
|
}
|
|
|
|
ModelHandle handle = next_handle_.fetch_add(1);
|
|
|
|
{
|
|
std::lock_guard<std::mutex> lock(models_mutex_);
|
|
models_by_handle_[handle] = ctx;
|
|
models_by_path_[model_path] = ctx;
|
|
}
|
|
|
|
LogInfo("[AiScheduler] loaded model: " + model_path +
|
|
" (handle=" + std::to_string(handle) +
|
|
", input=" + std::to_string(ctx->input_w) + "x" + std::to_string(ctx->input_h) +
|
|
"x" + std::to_string(ctx->input_c) +
|
|
", outputs=" + std::to_string(ctx->n_output) + ")");
|
|
|
|
return handle;
|
|
#else
|
|
(void)model_path;
|
|
err = "RKNN not enabled";
|
|
return kInvalidModelHandle;
|
|
#endif
|
|
}
|
|
|
|
void AiScheduler::UnloadModel(ModelHandle handle) {
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
bool erased = false;
|
|
{
|
|
std::lock_guard<std::mutex> lock(models_mutex_);
|
|
auto it = models_by_handle_.find(handle);
|
|
if (it != models_by_handle_.end()) {
|
|
models_by_handle_.erase(it);
|
|
erased = true;
|
|
}
|
|
}
|
|
if (erased) {
|
|
LogInfo("[AiScheduler] unloaded model handle=" + std::to_string(handle));
|
|
}
|
|
#else
|
|
(void)handle;
|
|
#endif
|
|
}
|
|
|
|
bool AiScheduler::GetModelInfo(ModelHandle handle, ModelInfo& info) const {
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
std::shared_ptr<ModelContext> ctx;
|
|
{
|
|
std::lock_guard<std::mutex> lock(models_mutex_);
|
|
auto it = models_by_handle_.find(handle);
|
|
if (it == models_by_handle_.end() || !it->second) {
|
|
return false;
|
|
}
|
|
ctx = it->second;
|
|
}
|
|
|
|
info.input_width = ctx->input_w;
|
|
info.input_height = ctx->input_h;
|
|
info.input_channels = ctx->input_c;
|
|
info.n_input = ctx->n_input;
|
|
info.n_output = ctx->n_output;
|
|
info.name = ctx->path;
|
|
return true;
|
|
#else
|
|
(void)handle;
|
|
(void)info;
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
InferResult AiScheduler::Infer(ModelHandle handle, const InferInput& input) {
|
|
InferResult result;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
std::shared_ptr<ModelContext> ctx;
|
|
{
|
|
std::lock_guard<std::mutex> lock(models_mutex_);
|
|
auto it = models_by_handle_.find(handle);
|
|
if (it == models_by_handle_.end() || !it->second) {
|
|
result.error = "Invalid model handle";
|
|
total_errors_.fetch_add(1);
|
|
return result;
|
|
}
|
|
ctx = it->second;
|
|
}
|
|
|
|
// Lock this specific model for inference.
|
|
std::lock_guard<std::mutex> infer_lock(ctx->infer_mutex);
|
|
|
|
if (!input.data || input.size == 0) {
|
|
result.error = "Invalid input data";
|
|
total_errors_.fetch_add(1);
|
|
return result;
|
|
}
|
|
|
|
// Setup input
|
|
rknn_input inputs[1];
|
|
memset(inputs, 0, sizeof(inputs));
|
|
inputs[0].index = 0;
|
|
inputs[0].type = input.type;
|
|
inputs[0].size = input.size;
|
|
inputs[0].fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
|
|
inputs[0].buf = const_cast<void*>(input.data);
|
|
inputs[0].pass_through = 0;
|
|
|
|
int ret = rknn_inputs_set(ctx->ctx, ctx->n_input, inputs);
|
|
if (ret < 0) {
|
|
result.error = "rknn_inputs_set failed: " + std::to_string(ret);
|
|
total_errors_.fetch_add(1);
|
|
return result;
|
|
}
|
|
|
|
// Run inference
|
|
ret = rknn_run(ctx->ctx, nullptr);
|
|
if (ret < 0) {
|
|
result.error = "rknn_run failed: " + std::to_string(ret);
|
|
total_errors_.fetch_add(1);
|
|
return result;
|
|
}
|
|
|
|
// Get outputs
|
|
std::vector<rknn_output> outputs(ctx->n_output);
|
|
memset(outputs.data(), 0, sizeof(rknn_output) * ctx->n_output);
|
|
for (uint32_t i = 0; i < ctx->n_output; ++i) {
|
|
outputs[i].want_float = 0; // Keep quantized output
|
|
}
|
|
|
|
ret = rknn_outputs_get(ctx->ctx, ctx->n_output, outputs.data(), nullptr);
|
|
if (ret < 0) {
|
|
result.error = "rknn_outputs_get failed: " + std::to_string(ret);
|
|
total_errors_.fetch_add(1);
|
|
return result;
|
|
}
|
|
|
|
// Copy outputs to result
|
|
result.outputs.resize(ctx->n_output);
|
|
for (uint32_t i = 0; i < ctx->n_output; ++i) {
|
|
auto& out = result.outputs[i];
|
|
out.index = i;
|
|
out.size = outputs[i].size;
|
|
out.type = ctx->output_attrs[i].type;
|
|
out.zp = ctx->output_attrs[i].zp;
|
|
out.scale = ctx->output_attrs[i].scale;
|
|
|
|
// Copy dimensions
|
|
out.dims.resize(ctx->output_attrs[i].n_dims);
|
|
for (uint32_t d = 0; d < ctx->output_attrs[i].n_dims; ++d) {
|
|
out.dims[d] = ctx->output_attrs[i].dims[d];
|
|
}
|
|
|
|
// Copy data
|
|
out.data.resize(outputs[i].size);
|
|
memcpy(out.data.data(), outputs[i].buf, outputs[i].size);
|
|
}
|
|
|
|
rknn_outputs_release(ctx->ctx, ctx->n_output, outputs.data());
|
|
|
|
result.success = true;
|
|
total_inferences_.fetch_add(1);
|
|
|
|
#else
|
|
result.error = "RKNN not enabled";
|
|
(void)handle;
|
|
(void)input;
|
|
#endif
|
|
|
|
return result;
|
|
}
|
|
|
|
AiScheduler::BorrowedInferResult AiScheduler::InferBorrowed(ModelHandle handle, const InferInput& input) {
|
|
BorrowedInferResult result;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
std::shared_ptr<ModelContext> ctx;
|
|
{
|
|
std::lock_guard<std::mutex> lock(models_mutex_);
|
|
auto it = models_by_handle_.find(handle);
|
|
if (it == models_by_handle_.end() || !it->second) {
|
|
result.error = "Invalid model handle";
|
|
total_errors_.fetch_add(1);
|
|
return result;
|
|
}
|
|
ctx = it->second;
|
|
}
|
|
|
|
if (!input.data || input.size == 0) {
|
|
result.error = "Invalid input data";
|
|
total_errors_.fetch_add(1);
|
|
return result;
|
|
}
|
|
|
|
// Hold per-model inference lock for the lifetime of this result.
|
|
result.infer_lock = std::unique_lock<std::mutex>(ctx->infer_mutex);
|
|
result.keepalive = ctx;
|
|
|
|
// Setup input
|
|
rknn_input inputs[1];
|
|
memset(inputs, 0, sizeof(inputs));
|
|
inputs[0].index = 0;
|
|
inputs[0].type = input.type;
|
|
inputs[0].size = input.size;
|
|
inputs[0].fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
|
|
inputs[0].buf = const_cast<void*>(input.data);
|
|
inputs[0].pass_through = 0;
|
|
|
|
int ret = rknn_inputs_set(ctx->ctx, ctx->n_input, inputs);
|
|
if (ret < 0) {
|
|
result.error = "rknn_inputs_set failed: " + std::to_string(ret);
|
|
total_errors_.fetch_add(1);
|
|
return result;
|
|
}
|
|
|
|
ret = rknn_run(ctx->ctx, nullptr);
|
|
if (ret < 0) {
|
|
result.error = "rknn_run failed: " + std::to_string(ret);
|
|
total_errors_.fetch_add(1);
|
|
return result;
|
|
}
|
|
|
|
std::vector<rknn_output> outputs(ctx->n_output);
|
|
memset(outputs.data(), 0, sizeof(rknn_output) * ctx->n_output);
|
|
for (uint32_t i = 0; i < ctx->n_output; ++i) {
|
|
outputs[i].want_float = 0;
|
|
outputs[i].index = i;
|
|
if (i < ctx->output_buffers.size() && !ctx->output_buffers[i].empty()) {
|
|
outputs[i].is_prealloc = 1;
|
|
outputs[i].buf = ctx->output_buffers[i].data();
|
|
outputs[i].size = static_cast<uint32_t>(ctx->output_buffers[i].size());
|
|
} else {
|
|
outputs[i].is_prealloc = 0;
|
|
outputs[i].buf = nullptr;
|
|
outputs[i].size = 0;
|
|
}
|
|
}
|
|
|
|
ret = rknn_outputs_get(ctx->ctx, ctx->n_output, outputs.data(), nullptr);
|
|
if (ret < 0) {
|
|
result.error = "rknn_outputs_get failed: " + std::to_string(ret);
|
|
total_errors_.fetch_add(1);
|
|
return result;
|
|
}
|
|
|
|
result.outputs.resize(ctx->n_output);
|
|
for (uint32_t i = 0; i < ctx->n_output; ++i) {
|
|
auto& out = result.outputs[i];
|
|
out.index = static_cast<int>(i);
|
|
out.size = outputs[i].size;
|
|
out.data = reinterpret_cast<const uint8_t*>(outputs[i].buf);
|
|
out.type = ctx->output_attrs[i].type;
|
|
out.zp = ctx->output_attrs[i].zp;
|
|
out.scale = ctx->output_attrs[i].scale;
|
|
out.dims.resize(ctx->output_attrs[i].n_dims);
|
|
for (uint32_t d = 0; d < ctx->output_attrs[i].n_dims; ++d) {
|
|
out.dims[d] = ctx->output_attrs[i].dims[d];
|
|
}
|
|
}
|
|
|
|
rknn_outputs_release(ctx->ctx, ctx->n_output, outputs.data());
|
|
|
|
result.success = true;
|
|
total_inferences_.fetch_add(1);
|
|
return result;
|
|
|
|
#else
|
|
(void)handle;
|
|
(void)input;
|
|
result.error = "RKNN not enabled";
|
|
return result;
|
|
#endif
|
|
}
|
|
|
|
void AiScheduler::InferAsync(ModelHandle handle, const InferInput& input, InferCallback callback) {
|
|
// Simple implementation: just call sync Infer
|
|
// Future: use a thread pool for true async
|
|
InferResult result = Infer(handle, input);
|
|
if (callback) {
|
|
callback(result);
|
|
}
|
|
}
|
|
|
|
} // namespace rk3588
|