178 lines
5.2 KiB
C++
178 lines
5.2 KiB
C++
#pragma once
|
|
|
|
#include <atomic>
|
|
#include <cstdint>
|
|
#include <functional>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
#include "rknn_api.h"
|
|
#endif
|
|
|
|
namespace rk3588 {
|
|
|
|
using ModelHandle = uint64_t;
|
|
constexpr ModelHandle kInvalidModelHandle = 0;
|
|
|
|
struct ModelInfo {
|
|
int input_width = 0;
|
|
int input_height = 0;
|
|
int input_channels = 0;
|
|
uint32_t n_input = 0;
|
|
uint32_t n_output = 0;
|
|
std::string name;
|
|
};
|
|
|
|
struct InferInput {
|
|
const void* data = nullptr;
|
|
size_t size = 0;
|
|
int width = 0;
|
|
int height = 0;
|
|
bool is_nhwc = true; // true: NHWC, false: NCHW
|
|
|
|
// Optional DMA-BUF input for RKNN zero-copy (best-effort).
|
|
// When dma_fd >= 0, AiScheduler will try rknn_create_mem_from_fd + rknn_set_io_mem.
|
|
int dma_fd = -1;
|
|
int dma_offset = 0;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
// The actual data type of `data` passed to RKNN. Default preserves existing behavior.
|
|
rknn_tensor_type type = RKNN_TENSOR_UINT8;
|
|
#endif
|
|
};
|
|
|
|
struct InferOutput {
|
|
std::vector<uint8_t> data;
|
|
size_t size = 0;
|
|
int index = 0;
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
rknn_tensor_type type = RKNN_TENSOR_UINT8;
|
|
int32_t zp = 0;
|
|
float scale = 1.0f;
|
|
std::vector<uint32_t> dims;
|
|
#endif
|
|
};
|
|
|
|
struct InferResult {
|
|
bool success = false;
|
|
std::string error;
|
|
std::vector<InferOutput> outputs;
|
|
};
|
|
|
|
// Callback for async inference (future use)
|
|
using InferCallback = std::function<void(const InferResult& result)>;
|
|
|
|
class AiScheduler {
|
|
public:
|
|
static AiScheduler& Instance();
|
|
|
|
// Prevent copy/move
|
|
AiScheduler(const AiScheduler&) = delete;
|
|
AiScheduler& operator=(const AiScheduler&) = delete;
|
|
|
|
// Load a model from file, returns handle (0 = invalid)
|
|
ModelHandle LoadModel(const std::string& model_path, std::string& err);
|
|
|
|
// Unload a model by handle
|
|
void UnloadModel(ModelHandle handle);
|
|
|
|
// Get model information
|
|
bool GetModelInfo(ModelHandle handle, ModelInfo& info) const;
|
|
|
|
// Synchronous inference
|
|
InferResult Infer(ModelHandle handle, const InferInput& input);
|
|
|
|
struct BorrowedOutput {
|
|
const uint8_t* data = nullptr;
|
|
size_t size = 0;
|
|
int index = 0;
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
rknn_tensor_type type = RKNN_TENSOR_UINT8;
|
|
int32_t zp = 0;
|
|
float scale = 1.0f;
|
|
std::vector<uint32_t> dims;
|
|
#endif
|
|
};
|
|
|
|
// Borrowed inference avoids per-call output allocations/copies by using per-context preallocated buffers.
|
|
// The returned result holds the selected context's inference lock until it is destroyed.
|
|
struct BorrowedInferResult {
|
|
bool success = false;
|
|
std::string error;
|
|
std::vector<BorrowedOutput> outputs;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
std::shared_ptr<void> keepalive; // keeps the selected ModelContext alive
|
|
std::unique_lock<std::mutex> infer_lock; // holds ModelContext::infer_mutex for that context
|
|
#endif
|
|
|
|
BorrowedInferResult() = default;
|
|
BorrowedInferResult(BorrowedInferResult&&) noexcept = default;
|
|
BorrowedInferResult& operator=(BorrowedInferResult&&) noexcept = default;
|
|
BorrowedInferResult(const BorrowedInferResult&) = delete;
|
|
BorrowedInferResult& operator=(const BorrowedInferResult&) = delete;
|
|
};
|
|
|
|
BorrowedInferResult InferBorrowed(ModelHandle handle, const InferInput& input);
|
|
|
|
// Async inference (submits to internal queue, calls callback when done)
|
|
// For now, this is a simple wrapper around sync Infer
|
|
void InferAsync(ModelHandle handle, const InferInput& input, InferCallback callback);
|
|
|
|
// Get statistics
|
|
uint64_t GetTotalInferences() const { return total_inferences_.load(); }
|
|
uint64_t GetTotalErrors() const { return total_errors_.load(); }
|
|
|
|
// Shutdown scheduler (unload all models)
|
|
void Shutdown();
|
|
|
|
private:
|
|
AiScheduler();
|
|
~AiScheduler();
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
struct ModelContext {
|
|
rknn_context ctx = 0;
|
|
// Shared model blob kept alive for RKNN runtime. Multiple contexts can share the same data.
|
|
std::shared_ptr<std::vector<uint8_t>> model_data;
|
|
std::vector<rknn_tensor_attr> input_attrs;
|
|
std::vector<rknn_tensor_attr> output_attrs;
|
|
std::vector<std::vector<uint8_t>> output_buffers; // preallocated output buffers
|
|
uint32_t n_input = 0;
|
|
uint32_t n_output = 0;
|
|
int input_w = 0;
|
|
int input_h = 0;
|
|
int input_c = 0;
|
|
std::string path;
|
|
std::mutex infer_mutex; // Per-context lock for inference
|
|
|
|
~ModelContext() {
|
|
if (ctx) {
|
|
rknn_destroy(ctx);
|
|
ctx = 0;
|
|
}
|
|
}
|
|
};
|
|
|
|
struct ModelGroup {
|
|
std::string path;
|
|
std::vector<std::shared_ptr<ModelContext>> contexts;
|
|
std::atomic<uint32_t> rr{0}; // round-robin context selection
|
|
};
|
|
|
|
std::unordered_map<ModelHandle, std::shared_ptr<ModelGroup>> models_by_handle_;
|
|
std::unordered_map<std::string, std::weak_ptr<ModelGroup>> models_by_path_;
|
|
#endif
|
|
|
|
mutable std::mutex models_mutex_; // Protects models_ map
|
|
std::atomic<ModelHandle> next_handle_{1};
|
|
std::atomic<uint64_t> total_inferences_{0};
|
|
std::atomic<uint64_t> total_errors_{0};
|
|
};
|
|
|
|
} // namespace rk3588
|