OrangePi3588Media/src/ai_scheduler.cpp
sladro f61b9d4c99
Some checks are pending
CI / host-build (push) Waiting to run
CI / rk3588-cross-build (push) Waiting to run
修复人脸识别问题,更换facerknn,修复崩溃2
2026-01-08 15:27:18 +08:00

443 lines
13 KiB
C++

#include "ai_scheduler.h"
#include <cstring>
#include <fstream>
#include "utils/logger.h"
namespace rk3588 {
#if defined(RK3588_ENABLE_RKNN)
namespace {
uint32_t TensorTypeSizeBytes(rknn_tensor_type t) {
switch (t) {
case RKNN_TENSOR_INT8:
case RKNN_TENSOR_UINT8:
return 1;
case RKNN_TENSOR_INT16:
case RKNN_TENSOR_UINT16:
case RKNN_TENSOR_FLOAT16:
return 2;
case RKNN_TENSOR_INT32:
case RKNN_TENSOR_UINT32:
case RKNN_TENSOR_FLOAT32:
return 4;
case RKNN_TENSOR_INT64:
return 8;
default:
return 1;
}
}
} // namespace
#endif
AiScheduler& AiScheduler::Instance() {
static AiScheduler instance;
return instance;
}
AiScheduler::AiScheduler() {
LogInfo("[AiScheduler] initialized");
}
AiScheduler::~AiScheduler() {
Shutdown();
}
void AiScheduler::Shutdown() {
#if defined(RK3588_ENABLE_RKNN)
{
std::lock_guard<std::mutex> lock(models_mutex_);
models_by_handle_.clear();
models_by_path_.clear();
}
LogInfo("[AiScheduler] shutdown, total inferences: " + std::to_string(total_inferences_.load()) +
", errors: " + std::to_string(total_errors_.load()));
#endif
}
ModelHandle AiScheduler::LoadModel(const std::string& model_path, std::string& err) {
#if defined(RK3588_ENABLE_RKNN)
{
std::lock_guard<std::mutex> lock(models_mutex_);
auto it = models_by_path_.find(model_path);
if (it != models_by_path_.end()) {
if (auto existing = it->second.lock()) {
ModelHandle handle = next_handle_.fetch_add(1);
models_by_handle_[handle] = existing;
LogInfo("[AiScheduler] reused model: " + model_path + " (handle=" + std::to_string(handle) + ")");
return handle;
}
}
}
// Read model file
std::ifstream file(model_path, std::ios::binary | std::ios::ate);
if (!file.is_open()) {
err = "Failed to open model file: " + model_path;
return kInvalidModelHandle;
}
size_t model_size = file.tellg();
file.seekg(0, std::ios::beg);
auto ctx = std::make_shared<ModelContext>();
ctx->model_data.resize(model_size);
ctx->path = model_path;
if (!file.read(reinterpret_cast<char*>(ctx->model_data.data()), model_size)) {
err = "Failed to read model file: " + model_path;
return kInvalidModelHandle;
}
// Initialize RKNN context
int ret = rknn_init(&ctx->ctx, ctx->model_data.data(), model_size, 0, nullptr);
if (ret < 0) {
err = "rknn_init failed with code: " + std::to_string(ret);
return kInvalidModelHandle;
}
// Prefer using all NPU cores (RK3588).
// This does not change the shared-context behavior; it only hints RKNN runtime scheduling.
ret = rknn_set_core_mask(ctx->ctx, RKNN_NPU_CORE_0_1_2);
if (ret < 0) {
LogWarn("[AiScheduler] rknn_set_core_mask failed: " + std::to_string(ret));
}
// Query input/output info
rknn_input_output_num io_num;
ret = rknn_query(ctx->ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret < 0) {
err = "rknn_query IO num failed";
rknn_destroy(ctx->ctx);
ctx->ctx = 0;
return kInvalidModelHandle;
}
ctx->n_input = io_num.n_input;
ctx->n_output = io_num.n_output;
// Query input attributes
ctx->input_attrs.resize(ctx->n_input);
for (uint32_t i = 0; i < ctx->n_input; ++i) {
ctx->input_attrs[i].index = i;
rknn_query(ctx->ctx, RKNN_QUERY_INPUT_ATTR, &ctx->input_attrs[i], sizeof(rknn_tensor_attr));
}
// Query output attributes
ctx->output_attrs.resize(ctx->n_output);
for (uint32_t i = 0; i < ctx->n_output; ++i) {
ctx->output_attrs[i].index = i;
rknn_query(ctx->ctx, RKNN_QUERY_OUTPUT_ATTR, &ctx->output_attrs[i], sizeof(rknn_tensor_attr));
}
// Preallocate output buffers for borrowed inference.
ctx->output_buffers.resize(ctx->n_output);
for (uint32_t i = 0; i < ctx->n_output; ++i) {
uint32_t out_sz = ctx->output_attrs[i].size;
if (out_sz == 0 && ctx->output_attrs[i].size_with_stride > 0) {
out_sz = ctx->output_attrs[i].size_with_stride;
}
if (out_sz == 0 && ctx->output_attrs[i].n_elems > 0) {
out_sz = ctx->output_attrs[i].n_elems * TensorTypeSizeBytes(ctx->output_attrs[i].type);
}
if (out_sz > 0) {
ctx->output_buffers[i].resize(out_sz);
} else {
ctx->output_buffers[i].clear();
}
}
// Extract input dimensions
if (ctx->input_attrs[0].fmt == RKNN_TENSOR_NCHW) {
ctx->input_c = ctx->input_attrs[0].dims[1];
ctx->input_h = ctx->input_attrs[0].dims[2];
ctx->input_w = ctx->input_attrs[0].dims[3];
} else {
// NHWC
ctx->input_h = ctx->input_attrs[0].dims[1];
ctx->input_w = ctx->input_attrs[0].dims[2];
ctx->input_c = ctx->input_attrs[0].dims[3];
}
ModelHandle handle = next_handle_.fetch_add(1);
{
std::lock_guard<std::mutex> lock(models_mutex_);
models_by_handle_[handle] = ctx;
models_by_path_[model_path] = ctx;
}
LogInfo("[AiScheduler] loaded model: " + model_path +
" (handle=" + std::to_string(handle) +
", input=" + std::to_string(ctx->input_w) + "x" + std::to_string(ctx->input_h) +
"x" + std::to_string(ctx->input_c) +
", outputs=" + std::to_string(ctx->n_output) + ")");
return handle;
#else
(void)model_path;
err = "RKNN not enabled";
return kInvalidModelHandle;
#endif
}
void AiScheduler::UnloadModel(ModelHandle handle) {
#if defined(RK3588_ENABLE_RKNN)
bool erased = false;
{
std::lock_guard<std::mutex> lock(models_mutex_);
auto it = models_by_handle_.find(handle);
if (it != models_by_handle_.end()) {
models_by_handle_.erase(it);
erased = true;
}
}
if (erased) {
LogInfo("[AiScheduler] unloaded model handle=" + std::to_string(handle));
}
#else
(void)handle;
#endif
}
bool AiScheduler::GetModelInfo(ModelHandle handle, ModelInfo& info) const {
#if defined(RK3588_ENABLE_RKNN)
std::shared_ptr<ModelContext> ctx;
{
std::lock_guard<std::mutex> lock(models_mutex_);
auto it = models_by_handle_.find(handle);
if (it == models_by_handle_.end() || !it->second) {
return false;
}
ctx = it->second;
}
info.input_width = ctx->input_w;
info.input_height = ctx->input_h;
info.input_channels = ctx->input_c;
info.n_input = ctx->n_input;
info.n_output = ctx->n_output;
info.name = ctx->path;
return true;
#else
(void)handle;
(void)info;
return false;
#endif
}
InferResult AiScheduler::Infer(ModelHandle handle, const InferInput& input) {
InferResult result;
#if defined(RK3588_ENABLE_RKNN)
std::shared_ptr<ModelContext> ctx;
{
std::lock_guard<std::mutex> lock(models_mutex_);
auto it = models_by_handle_.find(handle);
if (it == models_by_handle_.end() || !it->second) {
result.error = "Invalid model handle";
total_errors_.fetch_add(1);
return result;
}
ctx = it->second;
}
// Lock this specific model for inference.
std::lock_guard<std::mutex> infer_lock(ctx->infer_mutex);
if (!input.data || input.size == 0) {
result.error = "Invalid input data";
total_errors_.fetch_add(1);
return result;
}
// Setup input
rknn_input inputs[1];
memset(inputs, 0, sizeof(inputs));
inputs[0].index = 0;
inputs[0].type = input.type;
inputs[0].size = input.size;
inputs[0].fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
inputs[0].buf = const_cast<void*>(input.data);
inputs[0].pass_through = 0;
int ret = rknn_inputs_set(ctx->ctx, ctx->n_input, inputs);
if (ret < 0) {
result.error = "rknn_inputs_set failed: " + std::to_string(ret);
total_errors_.fetch_add(1);
return result;
}
// Run inference
ret = rknn_run(ctx->ctx, nullptr);
if (ret < 0) {
result.error = "rknn_run failed: " + std::to_string(ret);
total_errors_.fetch_add(1);
return result;
}
// Get outputs
std::vector<rknn_output> outputs(ctx->n_output);
memset(outputs.data(), 0, sizeof(rknn_output) * ctx->n_output);
for (uint32_t i = 0; i < ctx->n_output; ++i) {
outputs[i].want_float = 0; // Keep quantized output
}
ret = rknn_outputs_get(ctx->ctx, ctx->n_output, outputs.data(), nullptr);
if (ret < 0) {
result.error = "rknn_outputs_get failed: " + std::to_string(ret);
total_errors_.fetch_add(1);
return result;
}
// Copy outputs to result
result.outputs.resize(ctx->n_output);
for (uint32_t i = 0; i < ctx->n_output; ++i) {
auto& out = result.outputs[i];
out.index = i;
out.size = outputs[i].size;
out.type = ctx->output_attrs[i].type;
out.zp = ctx->output_attrs[i].zp;
out.scale = ctx->output_attrs[i].scale;
// Copy dimensions
out.dims.resize(ctx->output_attrs[i].n_dims);
for (uint32_t d = 0; d < ctx->output_attrs[i].n_dims; ++d) {
out.dims[d] = ctx->output_attrs[i].dims[d];
}
// Copy data
out.data.resize(outputs[i].size);
memcpy(out.data.data(), outputs[i].buf, outputs[i].size);
}
rknn_outputs_release(ctx->ctx, ctx->n_output, outputs.data());
result.success = true;
total_inferences_.fetch_add(1);
#else
result.error = "RKNN not enabled";
(void)handle;
(void)input;
#endif
return result;
}
AiScheduler::BorrowedInferResult AiScheduler::InferBorrowed(ModelHandle handle, const InferInput& input) {
BorrowedInferResult result;
#if defined(RK3588_ENABLE_RKNN)
std::shared_ptr<ModelContext> ctx;
{
std::lock_guard<std::mutex> lock(models_mutex_);
auto it = models_by_handle_.find(handle);
if (it == models_by_handle_.end() || !it->second) {
result.error = "Invalid model handle";
total_errors_.fetch_add(1);
return result;
}
ctx = it->second;
}
if (!input.data || input.size == 0) {
result.error = "Invalid input data";
total_errors_.fetch_add(1);
return result;
}
// Hold per-model inference lock for the lifetime of this result.
result.infer_lock = std::unique_lock<std::mutex>(ctx->infer_mutex);
result.keepalive = ctx;
// Setup input
rknn_input inputs[1];
memset(inputs, 0, sizeof(inputs));
inputs[0].index = 0;
inputs[0].type = input.type;
inputs[0].size = input.size;
inputs[0].fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
inputs[0].buf = const_cast<void*>(input.data);
inputs[0].pass_through = 0;
int ret = rknn_inputs_set(ctx->ctx, ctx->n_input, inputs);
if (ret < 0) {
result.error = "rknn_inputs_set failed: " + std::to_string(ret);
total_errors_.fetch_add(1);
return result;
}
ret = rknn_run(ctx->ctx, nullptr);
if (ret < 0) {
result.error = "rknn_run failed: " + std::to_string(ret);
total_errors_.fetch_add(1);
return result;
}
std::vector<rknn_output> outputs(ctx->n_output);
memset(outputs.data(), 0, sizeof(rknn_output) * ctx->n_output);
for (uint32_t i = 0; i < ctx->n_output; ++i) {
outputs[i].want_float = 0;
outputs[i].index = i;
if (i < ctx->output_buffers.size() && !ctx->output_buffers[i].empty()) {
outputs[i].is_prealloc = 1;
outputs[i].buf = ctx->output_buffers[i].data();
outputs[i].size = static_cast<uint32_t>(ctx->output_buffers[i].size());
} else {
outputs[i].is_prealloc = 0;
outputs[i].buf = nullptr;
outputs[i].size = 0;
}
}
ret = rknn_outputs_get(ctx->ctx, ctx->n_output, outputs.data(), nullptr);
if (ret < 0) {
result.error = "rknn_outputs_get failed: " + std::to_string(ret);
total_errors_.fetch_add(1);
return result;
}
result.outputs.resize(ctx->n_output);
for (uint32_t i = 0; i < ctx->n_output; ++i) {
auto& out = result.outputs[i];
out.index = static_cast<int>(i);
out.size = outputs[i].size;
out.data = reinterpret_cast<const uint8_t*>(outputs[i].buf);
out.type = ctx->output_attrs[i].type;
out.zp = ctx->output_attrs[i].zp;
out.scale = ctx->output_attrs[i].scale;
out.dims.resize(ctx->output_attrs[i].n_dims);
for (uint32_t d = 0; d < ctx->output_attrs[i].n_dims; ++d) {
out.dims[d] = ctx->output_attrs[i].dims[d];
}
}
rknn_outputs_release(ctx->ctx, ctx->n_output, outputs.data());
result.success = true;
total_inferences_.fetch_add(1);
return result;
#else
(void)handle;
(void)input;
result.error = "RKNN not enabled";
return result;
#endif
}
void AiScheduler::InferAsync(ModelHandle handle, const InferInput& input, InferCallback callback) {
// Simple implementation: just call sync Infer
// Future: use a thread pool for true async
InferResult result = Infer(handle, input);
if (callback) {
callback(result);
}
}
} // namespace rk3588