655 lines
25 KiB
C++
655 lines
25 KiB
C++
#include <algorithm>
|
|
#include <atomic>
|
|
#include <chrono>
|
|
#include <cmath>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <memory>
|
|
#include <set>
|
|
#include <thread>
|
|
#include <vector>
|
|
|
|
#include "node.h"
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
#include "rknn_api.h"
|
|
#endif
|
|
|
|
namespace rk3588 {
|
|
|
|
namespace {
|
|
|
|
constexpr int kObjClassNum = 80;
|
|
constexpr int kPropBoxSizeV5 = 5 + kObjClassNum; // YOLOv5: x,y,w,h,conf + 80 classes
|
|
constexpr int kPropBoxSizeV8 = 4 + kObjClassNum; // YOLOv8: x,y,w,h + 80 classes (no conf)
|
|
constexpr int kMaxDetections = 64;
|
|
|
|
// YOLOv5 anchors
|
|
const int kAnchor0[6] = {10, 13, 16, 30, 33, 23};
|
|
const int kAnchor1[6] = {30, 61, 62, 45, 59, 119};
|
|
const int kAnchor2[6] = {116, 90, 156, 198, 373, 326};
|
|
|
|
enum class YoloVersion { V5, V8 };
|
|
|
|
const char* kCocoLabels[kObjClassNum] = {
|
|
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
|
|
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
|
|
"dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
|
|
"umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
|
|
"kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
|
|
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
|
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
|
|
"couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
|
|
"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
|
|
"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
|
|
};
|
|
|
|
inline int Clamp(float val, int min_val, int max_val) {
|
|
return val > min_val ? (val < max_val ? static_cast<int>(val) : max_val) : min_val;
|
|
}
|
|
|
|
inline int32_t ClipFloat(float val, float min_val, float max_val) {
|
|
return static_cast<int32_t>(val <= min_val ? min_val : (val >= max_val ? max_val : val));
|
|
}
|
|
|
|
inline int8_t QuantizeF32ToAffine(float f32, int32_t zp, float scale) {
|
|
float dst_val = (f32 / scale) + zp;
|
|
return static_cast<int8_t>(ClipFloat(dst_val, -128, 127));
|
|
}
|
|
|
|
inline float DequantizeAffineToF32(int8_t qnt, int32_t zp, float scale) {
|
|
return (static_cast<float>(qnt) - static_cast<float>(zp)) * scale;
|
|
}
|
|
|
|
float CalculateIoU(float x1_min, float y1_min, float x1_max, float y1_max,
|
|
float x2_min, float y2_min, float x2_max, float y2_max) {
|
|
float w = std::fmax(0.f, std::fmin(x1_max, x2_max) - std::fmax(x1_min, x2_min) + 1.0f);
|
|
float h = std::fmax(0.f, std::fmin(y1_max, y2_max) - std::fmax(y1_min, y2_min) + 1.0f);
|
|
float inter = w * h;
|
|
float area1 = (x1_max - x1_min + 1.0f) * (y1_max - y1_min + 1.0f);
|
|
float area2 = (x2_max - x2_min + 1.0f) * (y2_max - y2_min + 1.0f);
|
|
float uni = area1 + area2 - inter;
|
|
return uni <= 0.f ? 0.f : (inter / uni);
|
|
}
|
|
|
|
void QuickSortDescending(std::vector<float>& values, int left, int right, std::vector<int>& indices) {
|
|
if (left >= right) return;
|
|
float pivot = values[left];
|
|
int pivot_idx = indices[left];
|
|
int low = left, high = right;
|
|
while (low < high) {
|
|
while (low < high && values[high] <= pivot) high--;
|
|
values[low] = values[high];
|
|
indices[low] = indices[high];
|
|
while (low < high && values[low] >= pivot) low++;
|
|
values[high] = values[low];
|
|
indices[high] = indices[low];
|
|
}
|
|
values[low] = pivot;
|
|
indices[low] = pivot_idx;
|
|
QuickSortDescending(values, left, low - 1, indices);
|
|
QuickSortDescending(values, low + 1, right, indices);
|
|
}
|
|
|
|
void NMS(int valid_count, std::vector<float>& boxes, std::vector<int>& class_ids,
|
|
std::vector<int>& order, int filter_id, float threshold) {
|
|
for (int i = 0; i < valid_count; ++i) {
|
|
if (order[i] == -1 || class_ids[i] != filter_id) continue;
|
|
int n = order[i];
|
|
for (int j = i + 1; j < valid_count; ++j) {
|
|
int m = order[j];
|
|
if (m == -1 || class_ids[j] != filter_id) continue;
|
|
float x1_min = boxes[n * 4 + 0];
|
|
float y1_min = boxes[n * 4 + 1];
|
|
float x1_max = x1_min + boxes[n * 4 + 2];
|
|
float y1_max = y1_min + boxes[n * 4 + 3];
|
|
float x2_min = boxes[m * 4 + 0];
|
|
float y2_min = boxes[m * 4 + 1];
|
|
float x2_max = x2_min + boxes[m * 4 + 2];
|
|
float y2_max = y2_min + boxes[m * 4 + 3];
|
|
if (CalculateIoU(x1_min, y1_min, x1_max, y1_max, x2_min, y2_min, x2_max, y2_max) > threshold) {
|
|
order[j] = -1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
// YOLOv5 feature map processing (anchor-based)
|
|
int ProcessFeatureMapV5(int8_t* input, const int* anchor, int grid_h, int grid_w,
|
|
int model_h, int model_w, int stride,
|
|
std::vector<float>& boxes, std::vector<float>& obj_probs,
|
|
std::vector<int>& class_ids, float conf_thresh, int32_t zp, float scale) {
|
|
int valid_count = 0;
|
|
int grid_len = grid_h * grid_w;
|
|
int8_t thresh_i8 = QuantizeF32ToAffine(conf_thresh, zp, scale);
|
|
|
|
for (int a = 0; a < 3; ++a) {
|
|
for (int i = 0; i < grid_h; ++i) {
|
|
for (int j = 0; j < grid_w; ++j) {
|
|
int8_t box_conf = input[(kPropBoxSizeV5 * a + 4) * grid_len + i * grid_w + j];
|
|
if (box_conf >= thresh_i8) {
|
|
int offset = (kPropBoxSizeV5 * a) * grid_len + i * grid_w + j;
|
|
int8_t* ptr = input + offset;
|
|
|
|
float bx = DequantizeAffineToF32(*ptr, zp, scale) * 2.0f - 0.5f;
|
|
float by = DequantizeAffineToF32(ptr[grid_len], zp, scale) * 2.0f - 0.5f;
|
|
float bw = DequantizeAffineToF32(ptr[2 * grid_len], zp, scale) * 2.0f;
|
|
float bh = DequantizeAffineToF32(ptr[3 * grid_len], zp, scale) * 2.0f;
|
|
|
|
bx = (bx + j) * stride;
|
|
by = (by + i) * stride;
|
|
bw = bw * bw * anchor[a * 2];
|
|
bh = bh * bh * anchor[a * 2 + 1];
|
|
bx -= bw / 2.0f;
|
|
by -= bh / 2.0f;
|
|
|
|
int8_t max_cls_prob = ptr[5 * grid_len];
|
|
int max_cls_id = 0;
|
|
for (int k = 1; k < kObjClassNum; ++k) {
|
|
int8_t prob = ptr[(5 + k) * grid_len];
|
|
if (prob > max_cls_prob) {
|
|
max_cls_id = k;
|
|
max_cls_prob = prob;
|
|
}
|
|
}
|
|
|
|
if (max_cls_prob > thresh_i8) {
|
|
float score = DequantizeAffineToF32(max_cls_prob, zp, scale) *
|
|
DequantizeAffineToF32(box_conf, zp, scale);
|
|
obj_probs.push_back(score);
|
|
class_ids.push_back(max_cls_id);
|
|
boxes.push_back(bx);
|
|
boxes.push_back(by);
|
|
boxes.push_back(bw);
|
|
boxes.push_back(bh);
|
|
++valid_count;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return valid_count;
|
|
}
|
|
|
|
// YOLOv8 output processing (anchor-free, single output tensor)
|
|
// Output format: [1, 84, 8400] where 84 = 4 (bbox) + 80 (classes)
|
|
// bbox format: cx, cy, w, h (center-based)
|
|
int ProcessOutputV8(float* output, int num_boxes, int num_classes,
|
|
int model_h, int model_w,
|
|
std::vector<float>& boxes, std::vector<float>& obj_probs,
|
|
std::vector<int>& class_ids, float conf_thresh) {
|
|
int valid_count = 0;
|
|
int num_channels = 4 + num_classes; // 4 bbox + num_classes
|
|
|
|
for (int i = 0; i < num_boxes; ++i) {
|
|
// Find max class score
|
|
float max_score = 0.0f;
|
|
int max_cls_id = 0;
|
|
for (int c = 0; c < num_classes; ++c) {
|
|
float score = output[(4 + c) * num_boxes + i];
|
|
if (score > max_score) {
|
|
max_score = score;
|
|
max_cls_id = c;
|
|
}
|
|
}
|
|
|
|
if (max_score >= conf_thresh) {
|
|
float cx = output[0 * num_boxes + i];
|
|
float cy = output[1 * num_boxes + i];
|
|
float w = output[2 * num_boxes + i];
|
|
float h = output[3 * num_boxes + i];
|
|
|
|
// Convert from center to top-left
|
|
float x1 = cx - w / 2.0f;
|
|
float y1 = cy - h / 2.0f;
|
|
|
|
boxes.push_back(x1);
|
|
boxes.push_back(y1);
|
|
boxes.push_back(w);
|
|
boxes.push_back(h);
|
|
obj_probs.push_back(max_score);
|
|
class_ids.push_back(max_cls_id);
|
|
++valid_count;
|
|
}
|
|
}
|
|
return valid_count;
|
|
}
|
|
|
|
// YOLOv8 INT8 output processing
|
|
int ProcessOutputV8Int8(int8_t* output, int num_boxes, int num_classes,
|
|
int model_h, int model_w,
|
|
std::vector<float>& boxes, std::vector<float>& obj_probs,
|
|
std::vector<int>& class_ids, float conf_thresh,
|
|
int32_t zp, float scale) {
|
|
int valid_count = 0;
|
|
int8_t thresh_i8 = QuantizeF32ToAffine(conf_thresh, zp, scale);
|
|
|
|
for (int i = 0; i < num_boxes; ++i) {
|
|
// Find max class score
|
|
int8_t max_score_i8 = -128;
|
|
int max_cls_id = 0;
|
|
for (int c = 0; c < num_classes; ++c) {
|
|
int8_t score = output[(4 + c) * num_boxes + i];
|
|
if (score > max_score_i8) {
|
|
max_score_i8 = score;
|
|
max_cls_id = c;
|
|
}
|
|
}
|
|
|
|
if (max_score_i8 >= thresh_i8) {
|
|
float cx = DequantizeAffineToF32(output[0 * num_boxes + i], zp, scale);
|
|
float cy = DequantizeAffineToF32(output[1 * num_boxes + i], zp, scale);
|
|
float w = DequantizeAffineToF32(output[2 * num_boxes + i], zp, scale);
|
|
float h = DequantizeAffineToF32(output[3 * num_boxes + i], zp, scale);
|
|
float max_score = DequantizeAffineToF32(max_score_i8, zp, scale);
|
|
|
|
// Convert from center to top-left
|
|
float x1 = cx - w / 2.0f;
|
|
float y1 = cy - h / 2.0f;
|
|
|
|
boxes.push_back(x1);
|
|
boxes.push_back(y1);
|
|
boxes.push_back(w);
|
|
boxes.push_back(h);
|
|
obj_probs.push_back(max_score);
|
|
class_ids.push_back(max_cls_id);
|
|
++valid_count;
|
|
}
|
|
}
|
|
return valid_count;
|
|
}
|
|
#endif
|
|
|
|
} // namespace
|
|
|
|
class AiYoloNode : public INode {
|
|
public:
|
|
std::string Id() const override { return id_; }
|
|
std::string Type() const override { return "ai_yolo"; }
|
|
|
|
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
|
|
id_ = config.ValueOr<std::string>("id", "ai_yolo");
|
|
model_path_ = config.ValueOr<std::string>("model_path", "");
|
|
conf_thresh_ = config.ValueOr<float>("conf", 0.25f);
|
|
nms_thresh_ = config.ValueOr<float>("nms", 0.45f);
|
|
model_input_w_ = config.ValueOr<int>("model_w", 640);
|
|
model_input_h_ = config.ValueOr<int>("model_h", 640);
|
|
num_classes_ = config.ValueOr<int>("num_classes", 80);
|
|
|
|
// Model version: "v5", "v8", or "auto" (default)
|
|
std::string ver = config.ValueOr<std::string>("model_version", "auto");
|
|
if (ver == "v5") {
|
|
yolo_version_ = YoloVersion::V5;
|
|
} else if (ver == "v8") {
|
|
yolo_version_ = YoloVersion::V8;
|
|
} else {
|
|
yolo_version_ = YoloVersion::V8; // Default to v8, will auto-detect in LoadModel
|
|
auto_detect_version_ = true;
|
|
}
|
|
|
|
if (const SimpleJson* filter = config.Find("class_filter")) {
|
|
for (const auto& item : filter->AsArray()) {
|
|
class_filter_.insert(item.AsInt(-1));
|
|
}
|
|
}
|
|
|
|
input_queue_ = ctx.input_queue;
|
|
if (!input_queue_) {
|
|
std::cerr << "[ai_yolo] no input queue for node " << id_ << "\n";
|
|
return false;
|
|
}
|
|
if (ctx.output_queues.empty()) {
|
|
std::cerr << "[ai_yolo] no output queue for node " << id_ << "\n";
|
|
return false;
|
|
}
|
|
output_queues_ = ctx.output_queues;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (model_path_.empty()) {
|
|
std::cerr << "[ai_yolo] model_path is required\n";
|
|
return false;
|
|
}
|
|
if (!LoadModel()) {
|
|
std::cerr << "[ai_yolo] failed to load model: " << model_path_ << "\n";
|
|
return false;
|
|
}
|
|
std::cout << "[ai_yolo] model loaded: " << model_path_ << "\n";
|
|
#else
|
|
std::cout << "[ai_yolo] RKNN disabled, will passthrough frames\n";
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
bool Start() override {
|
|
if (!input_queue_) return false;
|
|
running_.store(true);
|
|
worker_ = std::thread(&AiYoloNode::WorkerLoop, this);
|
|
std::cout << "[ai_yolo] started, conf=" << conf_thresh_ << " nms=" << nms_thresh_ << "\n";
|
|
return true;
|
|
}
|
|
|
|
void Stop() override {
|
|
running_.store(false);
|
|
if (input_queue_) input_queue_->Stop();
|
|
for (auto& q : output_queues_) q->Stop();
|
|
if (worker_.joinable()) worker_.join();
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (rknn_ctx_) {
|
|
rknn_destroy(rknn_ctx_);
|
|
rknn_ctx_ = 0;
|
|
}
|
|
#endif
|
|
std::cout << "[ai_yolo] stopped\n";
|
|
}
|
|
|
|
private:
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
bool LoadModel() {
|
|
std::ifstream file(model_path_, std::ios::binary | std::ios::ate);
|
|
if (!file.is_open()) return false;
|
|
|
|
size_t model_size = file.tellg();
|
|
file.seekg(0, std::ios::beg);
|
|
model_data_.resize(model_size);
|
|
if (!file.read(reinterpret_cast<char*>(model_data_.data()), model_size)) {
|
|
return false;
|
|
}
|
|
|
|
int ret = rknn_init(&rknn_ctx_, model_data_.data(), model_size, 0, nullptr);
|
|
if (ret < 0) {
|
|
std::cerr << "[ai_yolo] rknn_init failed: " << ret << "\n";
|
|
return false;
|
|
}
|
|
|
|
rknn_input_output_num io_num;
|
|
ret = rknn_query(rknn_ctx_, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
|
|
if (ret < 0) {
|
|
std::cerr << "[ai_yolo] rknn_query IO num failed\n";
|
|
return false;
|
|
}
|
|
n_input_ = io_num.n_input;
|
|
n_output_ = io_num.n_output;
|
|
|
|
input_attrs_.resize(n_input_);
|
|
for (uint32_t i = 0; i < n_input_; ++i) {
|
|
input_attrs_[i].index = i;
|
|
rknn_query(rknn_ctx_, RKNN_QUERY_INPUT_ATTR, &input_attrs_[i], sizeof(rknn_tensor_attr));
|
|
}
|
|
|
|
output_attrs_.resize(n_output_);
|
|
for (uint32_t i = 0; i < n_output_; ++i) {
|
|
output_attrs_[i].index = i;
|
|
rknn_query(rknn_ctx_, RKNN_QUERY_OUTPUT_ATTR, &output_attrs_[i], sizeof(rknn_tensor_attr));
|
|
}
|
|
|
|
if (input_attrs_[0].fmt == RKNN_TENSOR_NCHW) {
|
|
model_input_h_ = input_attrs_[0].dims[2];
|
|
model_input_w_ = input_attrs_[0].dims[3];
|
|
} else {
|
|
model_input_h_ = input_attrs_[0].dims[1];
|
|
model_input_w_ = input_attrs_[0].dims[2];
|
|
}
|
|
|
|
// Auto-detect YOLO version based on output structure
|
|
if (auto_detect_version_) {
|
|
if (n_output_ == 1) {
|
|
// Single output tensor: YOLOv8 style
|
|
yolo_version_ = YoloVersion::V8;
|
|
} else if (n_output_ >= 3) {
|
|
// Multiple outputs: likely YOLOv5 style
|
|
// Check output dimensions to confirm
|
|
// YOLOv5: 3 outputs with shape [1, 255, H, W] (255 = 3*(5+80))
|
|
// YOLOv8: can also have multiple outputs but different structure
|
|
uint32_t out0_elems = output_attrs_[0].n_elems;
|
|
int grid_h = model_input_h_ / 8;
|
|
int grid_w = model_input_w_ / 8;
|
|
int expected_v5 = 3 * kPropBoxSizeV5 * grid_h * grid_w;
|
|
if (out0_elems == static_cast<uint32_t>(expected_v5)) {
|
|
yolo_version_ = YoloVersion::V5;
|
|
} else {
|
|
yolo_version_ = YoloVersion::V8;
|
|
}
|
|
}
|
|
}
|
|
|
|
const char* ver_str = (yolo_version_ == YoloVersion::V5) ? "v5" : "v8";
|
|
std::cout << "[ai_yolo] model input: " << model_input_w_ << "x" << model_input_h_
|
|
<< ", outputs: " << n_output_ << ", version: " << ver_str << "\n";
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
void PushToDownstream(FramePtr frame) {
|
|
for (auto& q : output_queues_) {
|
|
q->Push(frame);
|
|
}
|
|
}
|
|
|
|
void WorkerLoop() {
|
|
using namespace std::chrono;
|
|
FramePtr frame;
|
|
|
|
while (running_.load()) {
|
|
if (!input_queue_->Pop(frame, milliseconds(200))) continue;
|
|
if (!frame) continue;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
RunInference(frame);
|
|
#endif
|
|
PushToDownstream(frame);
|
|
++processed_;
|
|
|
|
if (processed_ % 100 == 0) {
|
|
std::cout << "[ai_yolo] processed " << processed_ << " frames\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
void RunInference(FramePtr frame) {
|
|
if (!frame->data || frame->data_size == 0) return;
|
|
|
|
bool is_rgb = (frame->format == PixelFormat::RGB || frame->format == PixelFormat::BGR);
|
|
if (!is_rgb) {
|
|
std::cerr << "[ai_yolo] input must be RGB/BGR, got other format\n";
|
|
return;
|
|
}
|
|
|
|
rknn_input inputs[1];
|
|
memset(inputs, 0, sizeof(inputs));
|
|
inputs[0].index = 0;
|
|
inputs[0].type = RKNN_TENSOR_UINT8;
|
|
inputs[0].size = frame->width * frame->height * 3;
|
|
inputs[0].fmt = RKNN_TENSOR_NHWC;
|
|
inputs[0].buf = frame->data;
|
|
inputs[0].pass_through = 0;
|
|
|
|
int ret = rknn_inputs_set(rknn_ctx_, n_input_, inputs);
|
|
if (ret < 0) {
|
|
std::cerr << "[ai_yolo] rknn_inputs_set failed: " << ret << "\n";
|
|
return;
|
|
}
|
|
|
|
ret = rknn_run(rknn_ctx_, nullptr);
|
|
if (ret < 0) {
|
|
std::cerr << "[ai_yolo] rknn_run failed: " << ret << "\n";
|
|
return;
|
|
}
|
|
|
|
std::vector<rknn_output> outputs(n_output_);
|
|
memset(outputs.data(), 0, sizeof(rknn_output) * n_output_);
|
|
for (uint32_t i = 0; i < n_output_; ++i) {
|
|
outputs[i].want_float = 0;
|
|
}
|
|
|
|
ret = rknn_outputs_get(rknn_ctx_, n_output_, outputs.data(), nullptr);
|
|
if (ret < 0) {
|
|
std::cerr << "[ai_yolo] rknn_outputs_get failed: " << ret << "\n";
|
|
return;
|
|
}
|
|
|
|
PostProcess(outputs, frame);
|
|
rknn_outputs_release(rknn_ctx_, n_output_, outputs.data());
|
|
}
|
|
|
|
void PostProcess(std::vector<rknn_output>& outputs, FramePtr frame) {
|
|
std::vector<float> boxes;
|
|
std::vector<float> obj_probs;
|
|
std::vector<int> class_ids;
|
|
int valid_count = 0;
|
|
|
|
std::vector<int32_t> zps;
|
|
std::vector<float> scales;
|
|
for (uint32_t i = 0; i < n_output_; ++i) {
|
|
zps.push_back(output_attrs_[i].zp);
|
|
scales.push_back(output_attrs_[i].scale);
|
|
}
|
|
|
|
if (yolo_version_ == YoloVersion::V5) {
|
|
// YOLOv5: 3 feature maps with anchors
|
|
if (n_output_ < 3) return;
|
|
|
|
int stride0 = 8, stride1 = 16, stride2 = 32;
|
|
int grid_h0 = model_input_h_ / stride0, grid_w0 = model_input_w_ / stride0;
|
|
int grid_h1 = model_input_h_ / stride1, grid_w1 = model_input_w_ / stride1;
|
|
int grid_h2 = model_input_h_ / stride2, grid_w2 = model_input_w_ / stride2;
|
|
|
|
int cnt0 = ProcessFeatureMapV5(reinterpret_cast<int8_t*>(outputs[0].buf), kAnchor0,
|
|
grid_h0, grid_w0, model_input_h_, model_input_w_, stride0,
|
|
boxes, obj_probs, class_ids, conf_thresh_, zps[0], scales[0]);
|
|
int cnt1 = ProcessFeatureMapV5(reinterpret_cast<int8_t*>(outputs[1].buf), kAnchor1,
|
|
grid_h1, grid_w1, model_input_h_, model_input_w_, stride1,
|
|
boxes, obj_probs, class_ids, conf_thresh_, zps[1], scales[1]);
|
|
int cnt2 = ProcessFeatureMapV5(reinterpret_cast<int8_t*>(outputs[2].buf), kAnchor2,
|
|
grid_h2, grid_w2, model_input_h_, model_input_w_, stride2,
|
|
boxes, obj_probs, class_ids, conf_thresh_, zps[2], scales[2]);
|
|
valid_count = cnt0 + cnt1 + cnt2;
|
|
} else {
|
|
// YOLOv8: single output or different structure
|
|
// Output shape: [1, 84, 8400] or [1, num_classes+4, num_boxes]
|
|
if (n_output_ < 1) return;
|
|
|
|
// Determine number of boxes from output dimensions
|
|
// Typical YOLOv8 output: [1, 84, 8400] where 84 = 4 + 80 classes
|
|
int num_boxes = 0;
|
|
int num_channels = 0;
|
|
|
|
// Check output format
|
|
if (output_attrs_[0].n_dims >= 3) {
|
|
// Shape: [batch, channels, boxes] or [batch, boxes, channels]
|
|
if (output_attrs_[0].dims[1] == static_cast<uint32_t>(4 + num_classes_)) {
|
|
num_channels = output_attrs_[0].dims[1];
|
|
num_boxes = output_attrs_[0].dims[2];
|
|
} else if (output_attrs_[0].dims[2] == static_cast<uint32_t>(4 + num_classes_)) {
|
|
num_boxes = output_attrs_[0].dims[1];
|
|
num_channels = output_attrs_[0].dims[2];
|
|
} else {
|
|
// Fallback: assume standard 8400 boxes
|
|
num_boxes = 8400;
|
|
num_channels = 4 + num_classes_;
|
|
}
|
|
} else {
|
|
// Flat output, calculate from total elements
|
|
num_channels = 4 + num_classes_;
|
|
num_boxes = output_attrs_[0].n_elems / num_channels;
|
|
}
|
|
|
|
if (output_attrs_[0].type == RKNN_TENSOR_FLOAT32 ||
|
|
output_attrs_[0].type == RKNN_TENSOR_FLOAT16) {
|
|
valid_count = ProcessOutputV8(reinterpret_cast<float*>(outputs[0].buf),
|
|
num_boxes, num_classes_,
|
|
model_input_h_, model_input_w_,
|
|
boxes, obj_probs, class_ids, conf_thresh_);
|
|
} else {
|
|
valid_count = ProcessOutputV8Int8(reinterpret_cast<int8_t*>(outputs[0].buf),
|
|
num_boxes, num_classes_,
|
|
model_input_h_, model_input_w_,
|
|
boxes, obj_probs, class_ids, conf_thresh_,
|
|
zps[0], scales[0]);
|
|
}
|
|
}
|
|
|
|
if (valid_count <= 0) return;
|
|
|
|
std::vector<int> indices(valid_count);
|
|
for (int i = 0; i < valid_count; ++i) indices[i] = i;
|
|
|
|
QuickSortDescending(obj_probs, 0, valid_count - 1, indices);
|
|
|
|
std::set<int> class_set(class_ids.begin(), class_ids.end());
|
|
for (int c : class_set) {
|
|
NMS(valid_count, boxes, class_ids, indices, c, nms_thresh_);
|
|
}
|
|
|
|
float scale_w = static_cast<float>(model_input_w_) / frame->width;
|
|
float scale_h = static_cast<float>(model_input_h_) / frame->height;
|
|
|
|
auto det_result = std::make_shared<DetectionResult>();
|
|
det_result->img_w = frame->width;
|
|
det_result->img_h = frame->height;
|
|
det_result->model_name = (yolo_version_ == YoloVersion::V5) ? "yolov5" : "yolov8";
|
|
|
|
for (int i = 0; i < valid_count && det_result->items.size() < kMaxDetections; ++i) {
|
|
if (indices[i] == -1) continue;
|
|
int n = indices[i];
|
|
int cls_id = class_ids[n];
|
|
|
|
if (!class_filter_.empty() && class_filter_.find(cls_id) == class_filter_.end()) {
|
|
continue;
|
|
}
|
|
|
|
float x1 = boxes[n * 4 + 0];
|
|
float y1 = boxes[n * 4 + 1];
|
|
float w = boxes[n * 4 + 2];
|
|
float h = boxes[n * 4 + 3];
|
|
|
|
Detection det;
|
|
det.cls_id = cls_id;
|
|
det.score = obj_probs[i];
|
|
det.bbox.x = static_cast<float>(Clamp(static_cast<int>(x1 / scale_w), 0, frame->width));
|
|
det.bbox.y = static_cast<float>(Clamp(static_cast<int>(y1 / scale_h), 0, frame->height));
|
|
det.bbox.w = static_cast<float>(Clamp(static_cast<int>(w / scale_w), 0, frame->width - static_cast<int>(det.bbox.x)));
|
|
det.bbox.h = static_cast<float>(Clamp(static_cast<int>(h / scale_h), 0, frame->height - static_cast<int>(det.bbox.y)));
|
|
det.track_id = -1;
|
|
|
|
det_result->items.push_back(det);
|
|
}
|
|
|
|
frame->det = det_result;
|
|
}
|
|
#endif
|
|
|
|
std::string id_;
|
|
std::string model_path_;
|
|
float conf_thresh_ = 0.25f;
|
|
float nms_thresh_ = 0.45f;
|
|
int model_input_w_ = 640;
|
|
int model_input_h_ = 640;
|
|
int num_classes_ = 80;
|
|
YoloVersion yolo_version_ = YoloVersion::V8;
|
|
bool auto_detect_version_ = false;
|
|
std::set<int> class_filter_;
|
|
|
|
std::atomic<bool> running_{false};
|
|
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
|
|
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
|
std::thread worker_;
|
|
uint64_t processed_ = 0;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
rknn_context rknn_ctx_ = 0;
|
|
std::vector<uint8_t> model_data_;
|
|
uint32_t n_input_ = 0;
|
|
uint32_t n_output_ = 0;
|
|
std::vector<rknn_tensor_attr> input_attrs_;
|
|
std::vector<rknn_tensor_attr> output_attrs_;
|
|
#endif
|
|
};
|
|
|
|
REGISTER_NODE(AiYoloNode, "ai_yolo");
|
|
|
|
} // namespace rk3588
|