713 lines
29 KiB
C++
713 lines
29 KiB
C++
#include <algorithm>
|
|
#include <atomic>
|
|
#include <chrono>
|
|
#include <cmath>
|
|
#include <cstring>
|
|
#include <iostream>
|
|
#include <memory>
|
|
#include <set>
|
|
#include <thread>
|
|
#include <vector>
|
|
|
|
#include "ai_scheduler.h"
|
|
#include "node.h"
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
#include "rknn_api.h"
|
|
#endif
|
|
|
|
namespace rk3588 {
|
|
|
|
namespace {
|
|
|
|
constexpr int kObjClassNum = 80;
|
|
constexpr int kPropBoxSizeV5 = 5 + kObjClassNum; // YOLOv5: x,y,w,h,conf + 80 classes
|
|
constexpr int kPropBoxSizeV8 = 4 + kObjClassNum; // YOLOv8: x,y,w,h + 80 classes (no conf)
|
|
constexpr int kMaxDetections = 64;
|
|
|
|
// YOLOv5 anchors
|
|
const int kAnchor0[6] = {10, 13, 16, 30, 33, 23};
|
|
const int kAnchor1[6] = {30, 61, 62, 45, 59, 119};
|
|
const int kAnchor2[6] = {116, 90, 156, 198, 373, 326};
|
|
|
|
enum class YoloVersion { V5, V8 };
|
|
|
|
const char* kCocoLabels[kObjClassNum] = {
|
|
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
|
|
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
|
|
"dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
|
|
"umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
|
|
"kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
|
|
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
|
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
|
|
"couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
|
|
"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
|
|
"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
|
|
};
|
|
|
|
inline int Clamp(float val, int min_val, int max_val) {
|
|
return val > min_val ? (val < max_val ? static_cast<int>(val) : max_val) : min_val;
|
|
}
|
|
|
|
inline int32_t ClipFloat(float val, float min_val, float max_val) {
|
|
return static_cast<int32_t>(val <= min_val ? min_val : (val >= max_val ? max_val : val));
|
|
}
|
|
|
|
inline int8_t QuantizeF32ToAffine(float f32, int32_t zp, float scale) {
|
|
float dst_val = (f32 / scale) + zp;
|
|
return static_cast<int8_t>(ClipFloat(dst_val, -128, 127));
|
|
}
|
|
|
|
inline float DequantizeAffineToF32(int8_t qnt, int32_t zp, float scale) {
|
|
return (static_cast<float>(qnt) - static_cast<float>(zp)) * scale;
|
|
}
|
|
|
|
float CalculateIoU(float x1_min, float y1_min, float x1_max, float y1_max,
|
|
float x2_min, float y2_min, float x2_max, float y2_max) {
|
|
float w = std::fmax(0.f, std::fmin(x1_max, x2_max) - std::fmax(x1_min, x2_min) + 1.0f);
|
|
float h = std::fmax(0.f, std::fmin(y1_max, y2_max) - std::fmax(y1_min, y2_min) + 1.0f);
|
|
float inter = w * h;
|
|
float area1 = (x1_max - x1_min + 1.0f) * (y1_max - y1_min + 1.0f);
|
|
float area2 = (x2_max - x2_min + 1.0f) * (y2_max - y2_min + 1.0f);
|
|
float uni = area1 + area2 - inter;
|
|
return uni <= 0.f ? 0.f : (inter / uni);
|
|
}
|
|
|
|
void QuickSortDescending(std::vector<float>& values, int left, int right, std::vector<int>& indices) {
|
|
if (left >= right) return;
|
|
float pivot = values[left];
|
|
int pivot_idx = indices[left];
|
|
int low = left, high = right;
|
|
while (low < high) {
|
|
while (low < high && values[high] <= pivot) high--;
|
|
values[low] = values[high];
|
|
indices[low] = indices[high];
|
|
while (low < high && values[low] >= pivot) low++;
|
|
values[high] = values[low];
|
|
indices[high] = indices[low];
|
|
}
|
|
values[low] = pivot;
|
|
indices[low] = pivot_idx;
|
|
QuickSortDescending(values, left, low - 1, indices);
|
|
QuickSortDescending(values, low + 1, right, indices);
|
|
}
|
|
|
|
void NMS(int valid_count, std::vector<float>& boxes, std::vector<int>& class_ids,
|
|
std::vector<int>& order, int filter_id, float threshold) {
|
|
for (int i = 0; i < valid_count; ++i) {
|
|
int n = order[i];
|
|
if (n < 0 || n >= valid_count) continue;
|
|
if (class_ids[n] != filter_id) continue;
|
|
for (int j = i + 1; j < valid_count; ++j) {
|
|
int m = order[j];
|
|
if (m < 0 || m >= valid_count) continue;
|
|
if (class_ids[m] != filter_id) continue;
|
|
float x1_min = boxes[n * 4 + 0];
|
|
float y1_min = boxes[n * 4 + 1];
|
|
float x1_max = x1_min + boxes[n * 4 + 2];
|
|
float y1_max = y1_min + boxes[n * 4 + 3];
|
|
float x2_min = boxes[m * 4 + 0];
|
|
float y2_min = boxes[m * 4 + 1];
|
|
float x2_max = x2_min + boxes[m * 4 + 2];
|
|
float y2_max = y2_min + boxes[m * 4 + 3];
|
|
if (CalculateIoU(x1_min, y1_min, x1_max, y1_max, x2_min, y2_min, x2_max, y2_max) > threshold) {
|
|
order[j] = -1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
// YOLOv5 feature map processing (anchor-based)
|
|
int ProcessFeatureMapV5(int8_t* input, const int* anchor, int grid_h, int grid_w,
|
|
int model_h, int model_w, int stride,
|
|
std::vector<float>& boxes, std::vector<float>& obj_probs,
|
|
std::vector<int>& class_ids, float conf_thresh, int32_t zp, float scale) {
|
|
int valid_count = 0;
|
|
int grid_len = grid_h * grid_w;
|
|
int8_t thresh_i8 = QuantizeF32ToAffine(conf_thresh, zp, scale);
|
|
|
|
for (int a = 0; a < 3; ++a) {
|
|
for (int i = 0; i < grid_h; ++i) {
|
|
for (int j = 0; j < grid_w; ++j) {
|
|
int8_t box_conf = input[(kPropBoxSizeV5 * a + 4) * grid_len + i * grid_w + j];
|
|
if (box_conf >= thresh_i8) {
|
|
int offset = (kPropBoxSizeV5 * a) * grid_len + i * grid_w + j;
|
|
int8_t* ptr = input + offset;
|
|
|
|
float bx = DequantizeAffineToF32(*ptr, zp, scale) * 2.0f - 0.5f;
|
|
float by = DequantizeAffineToF32(ptr[grid_len], zp, scale) * 2.0f - 0.5f;
|
|
float bw = DequantizeAffineToF32(ptr[2 * grid_len], zp, scale) * 2.0f;
|
|
float bh = DequantizeAffineToF32(ptr[3 * grid_len], zp, scale) * 2.0f;
|
|
|
|
bx = (bx + j) * stride;
|
|
by = (by + i) * stride;
|
|
bw = bw * bw * anchor[a * 2];
|
|
bh = bh * bh * anchor[a * 2 + 1];
|
|
bx -= bw / 2.0f;
|
|
by -= bh / 2.0f;
|
|
|
|
int8_t max_cls_prob = ptr[5 * grid_len];
|
|
int max_cls_id = 0;
|
|
for (int k = 1; k < kObjClassNum; ++k) {
|
|
int8_t prob = ptr[(5 + k) * grid_len];
|
|
if (prob > max_cls_prob) {
|
|
max_cls_id = k;
|
|
max_cls_prob = prob;
|
|
}
|
|
}
|
|
|
|
if (max_cls_prob > thresh_i8) {
|
|
float score = DequantizeAffineToF32(max_cls_prob, zp, scale) *
|
|
DequantizeAffineToF32(box_conf, zp, scale);
|
|
obj_probs.push_back(score);
|
|
class_ids.push_back(max_cls_id);
|
|
boxes.push_back(bx);
|
|
boxes.push_back(by);
|
|
boxes.push_back(bw);
|
|
boxes.push_back(bh);
|
|
++valid_count;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return valid_count;
|
|
}
|
|
|
|
// YOLOv8 output processing (anchor-free, single output tensor)
|
|
int ProcessOutputV8(float* output, int num_boxes, int num_classes,
|
|
int model_h, int model_w,
|
|
std::vector<float>& boxes, std::vector<float>& obj_probs,
|
|
std::vector<int>& class_ids, float conf_thresh) {
|
|
int valid_count = 0;
|
|
|
|
for (int i = 0; i < num_boxes; ++i) {
|
|
float max_score = 0.0f;
|
|
int max_cls_id = 0;
|
|
for (int c = 0; c < num_classes; ++c) {
|
|
float score = output[(4 + c) * num_boxes + i];
|
|
if (score > max_score) {
|
|
max_score = score;
|
|
max_cls_id = c;
|
|
}
|
|
}
|
|
|
|
if (max_score >= conf_thresh) {
|
|
float cx = output[0 * num_boxes + i];
|
|
float cy = output[1 * num_boxes + i];
|
|
float w = output[2 * num_boxes + i];
|
|
float h = output[3 * num_boxes + i];
|
|
|
|
float x1 = cx - w / 2.0f;
|
|
float y1 = cy - h / 2.0f;
|
|
|
|
boxes.push_back(x1);
|
|
boxes.push_back(y1);
|
|
boxes.push_back(w);
|
|
boxes.push_back(h);
|
|
obj_probs.push_back(max_score);
|
|
class_ids.push_back(max_cls_id);
|
|
++valid_count;
|
|
}
|
|
}
|
|
return valid_count;
|
|
}
|
|
|
|
// YOLOv8 INT8 output processing
|
|
int ProcessOutputV8Int8(int8_t* output, int num_boxes, int num_classes,
|
|
int model_h, int model_w,
|
|
std::vector<float>& boxes, std::vector<float>& obj_probs,
|
|
std::vector<int>& class_ids, float conf_thresh,
|
|
int32_t zp, float scale) {
|
|
int valid_count = 0;
|
|
int8_t thresh_i8 = QuantizeF32ToAffine(conf_thresh, zp, scale);
|
|
|
|
for (int i = 0; i < num_boxes; ++i) {
|
|
int8_t max_score_i8 = -128;
|
|
int max_cls_id = 0;
|
|
for (int c = 0; c < num_classes; ++c) {
|
|
int8_t score = output[(4 + c) * num_boxes + i];
|
|
if (score > max_score_i8) {
|
|
max_score_i8 = score;
|
|
max_cls_id = c;
|
|
}
|
|
}
|
|
|
|
if (max_score_i8 >= thresh_i8) {
|
|
float cx = DequantizeAffineToF32(output[0 * num_boxes + i], zp, scale);
|
|
float cy = DequantizeAffineToF32(output[1 * num_boxes + i], zp, scale);
|
|
float w = DequantizeAffineToF32(output[2 * num_boxes + i], zp, scale);
|
|
float h = DequantizeAffineToF32(output[3 * num_boxes + i], zp, scale);
|
|
float max_score = DequantizeAffineToF32(max_score_i8, zp, scale);
|
|
|
|
float x1 = cx - w / 2.0f;
|
|
float y1 = cy - h / 2.0f;
|
|
|
|
boxes.push_back(x1);
|
|
boxes.push_back(y1);
|
|
boxes.push_back(w);
|
|
boxes.push_back(h);
|
|
obj_probs.push_back(max_score);
|
|
class_ids.push_back(max_cls_id);
|
|
++valid_count;
|
|
}
|
|
}
|
|
return valid_count;
|
|
}
|
|
#endif
|
|
|
|
} // namespace
|
|
|
|
class AiYoloNode : public INode {
|
|
public:
|
|
std::string Id() const override { return id_; }
|
|
std::string Type() const override { return "ai_yolo"; }
|
|
|
|
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
|
|
id_ = config.ValueOr<std::string>("id", "ai_yolo");
|
|
model_path_ = config.ValueOr<std::string>("model_path", "");
|
|
conf_thresh_ = config.ValueOr<float>("conf", 0.25f);
|
|
nms_thresh_ = config.ValueOr<float>("nms", 0.45f);
|
|
model_input_w_ = config.ValueOr<int>("model_w", 640);
|
|
model_input_h_ = config.ValueOr<int>("model_h", 640);
|
|
num_classes_ = config.ValueOr<int>("num_classes", 80);
|
|
|
|
// Optional inference throttle. 0 = run every frame.
|
|
infer_interval_ms_ = std::max<int64_t>(0, static_cast<int64_t>(config.ValueOr<int>("infer_interval_ms", 0)));
|
|
if (infer_interval_ms_ <= 0) {
|
|
const double infer_fps = config.ValueOr<double>("infer_fps", 0.0);
|
|
if (infer_fps > 0.0) {
|
|
infer_interval_ms_ = static_cast<int64_t>(1000.0 / infer_fps);
|
|
if (infer_interval_ms_ < 1) infer_interval_ms_ = 1;
|
|
}
|
|
}
|
|
|
|
std::string ver = config.ValueOr<std::string>("model_version", "auto");
|
|
if (ver == "v5") {
|
|
yolo_version_ = YoloVersion::V5;
|
|
} else if (ver == "v8") {
|
|
yolo_version_ = YoloVersion::V8;
|
|
} else {
|
|
yolo_version_ = YoloVersion::V8;
|
|
auto_detect_version_ = true;
|
|
}
|
|
|
|
if (const SimpleJson* filter = config.Find("class_filter")) {
|
|
for (const auto& item : filter->AsArray()) {
|
|
class_filter_.insert(item.AsInt(-1));
|
|
}
|
|
}
|
|
|
|
input_queue_ = ctx.input_queue;
|
|
if (!input_queue_) {
|
|
std::cerr << "[ai_yolo] no input queue for node " << id_ << "\n";
|
|
return false;
|
|
}
|
|
if (ctx.output_queues.empty()) {
|
|
std::cerr << "[ai_yolo] no output queue for node " << id_ << "\n";
|
|
return false;
|
|
}
|
|
output_queues_ = ctx.output_queues;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (model_path_.empty()) {
|
|
std::cerr << "[ai_yolo] model_path is required\n";
|
|
return false;
|
|
}
|
|
|
|
std::string err;
|
|
model_handle_ = AiScheduler::Instance().LoadModel(model_path_, err);
|
|
if (model_handle_ == kInvalidModelHandle) {
|
|
std::cerr << "[ai_yolo] failed to load model: " << err << "\n";
|
|
return false;
|
|
}
|
|
|
|
ModelInfo info;
|
|
if (AiScheduler::Instance().GetModelInfo(model_handle_, info)) {
|
|
model_input_w_ = info.input_width;
|
|
model_input_h_ = info.input_height;
|
|
n_output_ = info.n_output;
|
|
|
|
if (auto_detect_version_) {
|
|
if (n_output_ == 1) {
|
|
yolo_version_ = YoloVersion::V8;
|
|
} else if (n_output_ >= 3) {
|
|
yolo_version_ = YoloVersion::V5;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::cout << "[ai_yolo] model loaded via AiScheduler: " << model_path_
|
|
<< " (handle=" << model_handle_ << ", version="
|
|
<< (yolo_version_ == YoloVersion::V5 ? "v5" : "v8") << ")\n";
|
|
#else
|
|
std::cout << "[ai_yolo] RKNN disabled, will passthrough frames\n";
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
bool Start() override {
|
|
std::cout << "[ai_yolo] started, conf=" << conf_thresh_ << " nms=" << nms_thresh_ << "\n";
|
|
return true;
|
|
}
|
|
|
|
void Stop() override {
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (model_handle_ != kInvalidModelHandle) {
|
|
AiScheduler::Instance().UnloadModel(model_handle_);
|
|
model_handle_ = kInvalidModelHandle;
|
|
}
|
|
#endif
|
|
std::cout << "[ai_yolo] stopped\n";
|
|
}
|
|
|
|
NodeStatus Process(FramePtr frame) override {
|
|
if (!frame) return NodeStatus::DROP;
|
|
|
|
if (infer_interval_ms_ > 0 && frame->pts > 0) {
|
|
const int64_t pts_ms = static_cast<int64_t>(frame->pts / 1000ULL);
|
|
if (last_infer_pts_ms_ > 0 && (pts_ms - last_infer_pts_ms_) < infer_interval_ms_) {
|
|
PushToDownstream(frame);
|
|
++processed_;
|
|
return NodeStatus::OK;
|
|
}
|
|
last_infer_pts_ms_ = pts_ms;
|
|
}
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
RunInference(frame);
|
|
#endif
|
|
PushToDownstream(frame);
|
|
++processed_;
|
|
|
|
if (processed_ % 100 == 0) {
|
|
std::cout << "[ai_yolo] processed " << processed_ << " frames\n";
|
|
}
|
|
return NodeStatus::OK;
|
|
}
|
|
|
|
private:
|
|
void PushToDownstream(FramePtr frame) {
|
|
for (auto& q : output_queues_) {
|
|
q->Push(frame);
|
|
}
|
|
}
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
void RunInference(FramePtr frame) {
|
|
if (!frame->data || frame->data_size == 0) return;
|
|
|
|
bool is_rgb = (frame->format == PixelFormat::RGB || frame->format == PixelFormat::BGR);
|
|
if (!is_rgb) {
|
|
std::cerr << "[ai_yolo] input must be RGB/BGR, got other format\n";
|
|
return;
|
|
}
|
|
|
|
const int w = frame->width;
|
|
const int h = frame->height;
|
|
const size_t packed_row = static_cast<size_t>(w) * 3;
|
|
const size_t packed_size = packed_row * static_cast<size_t>(h);
|
|
|
|
const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
|
|
int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
|
|
: (frame->stride > 0 ? frame->stride : static_cast<int>(packed_row));
|
|
if (!src || src_stride <= 0) return;
|
|
|
|
InferInput input;
|
|
if (static_cast<size_t>(src_stride) == packed_row && frame->data_size >= packed_size) {
|
|
input.data = src;
|
|
input.size = packed_size;
|
|
} else {
|
|
if (frame->data_size < static_cast<size_t>(src_stride) * static_cast<size_t>(h)) {
|
|
std::cerr << "[ai_yolo] invalid RGB buffer size/stride (data_size=" << frame->data_size
|
|
<< ", stride=" << src_stride << ", h=" << h << ")\n";
|
|
return;
|
|
}
|
|
rgb_tmp_.resize(packed_size);
|
|
for (int y = 0; y < h; ++y) {
|
|
memcpy(rgb_tmp_.data() + static_cast<size_t>(y) * packed_row,
|
|
src + static_cast<size_t>(y) * static_cast<size_t>(src_stride),
|
|
packed_row);
|
|
}
|
|
input.data = rgb_tmp_.data();
|
|
input.size = packed_size;
|
|
}
|
|
input.width = w;
|
|
input.height = h;
|
|
input.is_nhwc = true;
|
|
|
|
auto result = AiScheduler::Instance().InferBorrowed(model_handle_, input);
|
|
if (!result.success) {
|
|
std::cerr << "[ai_yolo] inference failed: " << result.error << "\n";
|
|
return;
|
|
}
|
|
|
|
PostProcessBorrowed(result.outputs, frame);
|
|
}
|
|
|
|
void PostProcessBorrowed(const std::vector<AiScheduler::BorrowedOutput>& outputs, FramePtr frame) {
|
|
std::vector<float> boxes;
|
|
std::vector<float> obj_probs;
|
|
std::vector<int> class_ids;
|
|
int valid_count = 0;
|
|
|
|
if (yolo_version_ == YoloVersion::V5) {
|
|
if (outputs.size() < 3) return;
|
|
if (!outputs[0].data || !outputs[1].data || !outputs[2].data) return;
|
|
|
|
int stride0 = 8, stride1 = 16, stride2 = 32;
|
|
int grid_h0 = model_input_h_ / stride0, grid_w0 = model_input_w_ / stride0;
|
|
int grid_h1 = model_input_h_ / stride1, grid_w1 = model_input_w_ / stride1;
|
|
int grid_h2 = model_input_h_ / stride2, grid_w2 = model_input_w_ / stride2;
|
|
|
|
int cnt0 = ProcessFeatureMapV5(reinterpret_cast<int8_t*>(const_cast<uint8_t*>(outputs[0].data)), kAnchor0,
|
|
grid_h0, grid_w0, model_input_h_, model_input_w_, stride0,
|
|
boxes, obj_probs, class_ids, conf_thresh_,
|
|
outputs[0].zp, outputs[0].scale);
|
|
int cnt1 = ProcessFeatureMapV5(reinterpret_cast<int8_t*>(const_cast<uint8_t*>(outputs[1].data)), kAnchor1,
|
|
grid_h1, grid_w1, model_input_h_, model_input_w_, stride1,
|
|
boxes, obj_probs, class_ids, conf_thresh_,
|
|
outputs[1].zp, outputs[1].scale);
|
|
int cnt2 = ProcessFeatureMapV5(reinterpret_cast<int8_t*>(const_cast<uint8_t*>(outputs[2].data)), kAnchor2,
|
|
grid_h2, grid_w2, model_input_h_, model_input_w_, stride2,
|
|
boxes, obj_probs, class_ids, conf_thresh_,
|
|
outputs[2].zp, outputs[2].scale);
|
|
valid_count = cnt0 + cnt1 + cnt2;
|
|
} else {
|
|
if (outputs.empty()) return;
|
|
if (!outputs[0].data || outputs[0].size == 0) return;
|
|
|
|
int num_boxes = 0;
|
|
int num_channels = 4 + num_classes_;
|
|
|
|
if (outputs[0].dims.size() >= 3) {
|
|
if (outputs[0].dims[1] == static_cast<uint32_t>(num_channels)) {
|
|
num_boxes = static_cast<int>(outputs[0].dims[2]);
|
|
} else if (outputs[0].dims[2] == static_cast<uint32_t>(num_channels)) {
|
|
num_boxes = static_cast<int>(outputs[0].dims[1]);
|
|
} else {
|
|
num_boxes = 8400;
|
|
}
|
|
} else {
|
|
num_boxes = static_cast<int>(outputs[0].size) / num_channels;
|
|
}
|
|
|
|
if (outputs[0].type == RKNN_TENSOR_FLOAT32 ||
|
|
outputs[0].type == RKNN_TENSOR_FLOAT16) {
|
|
valid_count = ProcessOutputV8(reinterpret_cast<float*>(const_cast<uint8_t*>(outputs[0].data)),
|
|
num_boxes, num_classes_,
|
|
model_input_h_, model_input_w_,
|
|
boxes, obj_probs, class_ids, conf_thresh_);
|
|
} else {
|
|
valid_count = ProcessOutputV8Int8(reinterpret_cast<int8_t*>(const_cast<uint8_t*>(outputs[0].data)),
|
|
num_boxes, num_classes_,
|
|
model_input_h_, model_input_w_,
|
|
boxes, obj_probs, class_ids, conf_thresh_,
|
|
outputs[0].zp, outputs[0].scale);
|
|
}
|
|
}
|
|
|
|
if (valid_count <= 0) return;
|
|
|
|
std::vector<int> indices(valid_count);
|
|
for (int i = 0; i < valid_count; ++i) indices[i] = i;
|
|
|
|
QuickSortDescending(obj_probs, 0, valid_count - 1, indices);
|
|
|
|
std::set<int> class_set(class_ids.begin(), class_ids.end());
|
|
for (int c : class_set) {
|
|
NMS(valid_count, boxes, class_ids, indices, c, nms_thresh_);
|
|
}
|
|
|
|
float scale_w = static_cast<float>(model_input_w_) / frame->width;
|
|
float scale_h = static_cast<float>(model_input_h_) / frame->height;
|
|
|
|
auto det_result = std::make_shared<DetectionResult>();
|
|
det_result->img_w = frame->width;
|
|
det_result->img_h = frame->height;
|
|
det_result->model_name = (yolo_version_ == YoloVersion::V5) ? "yolov5" : "yolov8";
|
|
|
|
for (int i = 0; i < valid_count && det_result->items.size() < kMaxDetections; ++i) {
|
|
if (indices[i] == -1) continue;
|
|
int n = indices[i];
|
|
int cls_id = class_ids[n];
|
|
|
|
if (!class_filter_.empty() && class_filter_.find(cls_id) == class_filter_.end()) {
|
|
continue;
|
|
}
|
|
|
|
float x1 = boxes[n * 4 + 0];
|
|
float y1 = boxes[n * 4 + 1];
|
|
float w = boxes[n * 4 + 2];
|
|
float h = boxes[n * 4 + 3];
|
|
|
|
Detection det;
|
|
det.cls_id = cls_id;
|
|
det.score = obj_probs[i];
|
|
det.bbox.x = static_cast<float>(Clamp(static_cast<int>(x1 / scale_w), 0, frame->width));
|
|
det.bbox.y = static_cast<float>(Clamp(static_cast<int>(y1 / scale_h), 0, frame->height));
|
|
det.bbox.w = static_cast<float>(Clamp(static_cast<int>(w / scale_w), 0, frame->width - static_cast<int>(det.bbox.x)));
|
|
det.bbox.h = static_cast<float>(Clamp(static_cast<int>(h / scale_h), 0, frame->height - static_cast<int>(det.bbox.y)));
|
|
det.track_id = -1;
|
|
|
|
if (det_result->items.size() < 3 && processed_ < 10) {
|
|
std::cout << "[ai_yolo] det: raw(" << x1 << "," << y1 << "," << w << "," << h
|
|
<< ") -> bbox(" << det.bbox.x << "," << det.bbox.y << ","
|
|
<< det.bbox.w << "," << det.bbox.h << ") cls=" << cls_id
|
|
<< " score=" << det.score << "\n";
|
|
}
|
|
|
|
det_result->items.push_back(det);
|
|
}
|
|
|
|
frame->det = det_result;
|
|
}
|
|
|
|
void PostProcess(std::vector<InferOutput>& outputs, FramePtr frame) {
|
|
std::vector<float> boxes;
|
|
std::vector<float> obj_probs;
|
|
std::vector<int> class_ids;
|
|
int valid_count = 0;
|
|
|
|
if (yolo_version_ == YoloVersion::V5) {
|
|
if (outputs.size() < 3) return;
|
|
|
|
int stride0 = 8, stride1 = 16, stride2 = 32;
|
|
int grid_h0 = model_input_h_ / stride0, grid_w0 = model_input_w_ / stride0;
|
|
int grid_h1 = model_input_h_ / stride1, grid_w1 = model_input_w_ / stride1;
|
|
int grid_h2 = model_input_h_ / stride2, grid_w2 = model_input_w_ / stride2;
|
|
|
|
int cnt0 = ProcessFeatureMapV5(reinterpret_cast<int8_t*>(outputs[0].data.data()), kAnchor0,
|
|
grid_h0, grid_w0, model_input_h_, model_input_w_, stride0,
|
|
boxes, obj_probs, class_ids, conf_thresh_,
|
|
outputs[0].zp, outputs[0].scale);
|
|
int cnt1 = ProcessFeatureMapV5(reinterpret_cast<int8_t*>(outputs[1].data.data()), kAnchor1,
|
|
grid_h1, grid_w1, model_input_h_, model_input_w_, stride1,
|
|
boxes, obj_probs, class_ids, conf_thresh_,
|
|
outputs[1].zp, outputs[1].scale);
|
|
int cnt2 = ProcessFeatureMapV5(reinterpret_cast<int8_t*>(outputs[2].data.data()), kAnchor2,
|
|
grid_h2, grid_w2, model_input_h_, model_input_w_, stride2,
|
|
boxes, obj_probs, class_ids, conf_thresh_,
|
|
outputs[2].zp, outputs[2].scale);
|
|
valid_count = cnt0 + cnt1 + cnt2;
|
|
} else {
|
|
if (outputs.empty()) return;
|
|
|
|
int num_boxes = 0;
|
|
int num_channels = 4 + num_classes_;
|
|
|
|
if (outputs[0].dims.size() >= 3) {
|
|
if (outputs[0].dims[1] == static_cast<uint32_t>(num_channels)) {
|
|
num_boxes = outputs[0].dims[2];
|
|
} else if (outputs[0].dims[2] == static_cast<uint32_t>(num_channels)) {
|
|
num_boxes = outputs[0].dims[1];
|
|
} else {
|
|
num_boxes = 8400;
|
|
}
|
|
} else {
|
|
num_boxes = outputs[0].data.size() / num_channels;
|
|
}
|
|
|
|
if (outputs[0].type == RKNN_TENSOR_FLOAT32 ||
|
|
outputs[0].type == RKNN_TENSOR_FLOAT16) {
|
|
valid_count = ProcessOutputV8(reinterpret_cast<float*>(outputs[0].data.data()),
|
|
num_boxes, num_classes_,
|
|
model_input_h_, model_input_w_,
|
|
boxes, obj_probs, class_ids, conf_thresh_);
|
|
} else {
|
|
valid_count = ProcessOutputV8Int8(reinterpret_cast<int8_t*>(outputs[0].data.data()),
|
|
num_boxes, num_classes_,
|
|
model_input_h_, model_input_w_,
|
|
boxes, obj_probs, class_ids, conf_thresh_,
|
|
outputs[0].zp, outputs[0].scale);
|
|
}
|
|
}
|
|
|
|
if (valid_count <= 0) return;
|
|
|
|
std::vector<int> indices(valid_count);
|
|
for (int i = 0; i < valid_count; ++i) indices[i] = i;
|
|
|
|
QuickSortDescending(obj_probs, 0, valid_count - 1, indices);
|
|
|
|
std::set<int> class_set(class_ids.begin(), class_ids.end());
|
|
for (int c : class_set) {
|
|
NMS(valid_count, boxes, class_ids, indices, c, nms_thresh_);
|
|
}
|
|
|
|
float scale_w = static_cast<float>(model_input_w_) / frame->width;
|
|
float scale_h = static_cast<float>(model_input_h_) / frame->height;
|
|
|
|
auto det_result = std::make_shared<DetectionResult>();
|
|
det_result->img_w = frame->width;
|
|
det_result->img_h = frame->height;
|
|
det_result->model_name = (yolo_version_ == YoloVersion::V5) ? "yolov5" : "yolov8";
|
|
|
|
for (int i = 0; i < valid_count && det_result->items.size() < kMaxDetections; ++i) {
|
|
if (indices[i] == -1) continue;
|
|
int n = indices[i];
|
|
int cls_id = class_ids[n];
|
|
|
|
if (!class_filter_.empty() && class_filter_.find(cls_id) == class_filter_.end()) {
|
|
continue;
|
|
}
|
|
|
|
float x1 = boxes[n * 4 + 0];
|
|
float y1 = boxes[n * 4 + 1];
|
|
float w = boxes[n * 4 + 2];
|
|
float h = boxes[n * 4 + 3];
|
|
|
|
Detection det;
|
|
det.cls_id = cls_id;
|
|
det.score = obj_probs[i];
|
|
det.bbox.x = static_cast<float>(Clamp(static_cast<int>(x1 / scale_w), 0, frame->width));
|
|
det.bbox.y = static_cast<float>(Clamp(static_cast<int>(y1 / scale_h), 0, frame->height));
|
|
det.bbox.w = static_cast<float>(Clamp(static_cast<int>(w / scale_w), 0, frame->width - static_cast<int>(det.bbox.x)));
|
|
det.bbox.h = static_cast<float>(Clamp(static_cast<int>(h / scale_h), 0, frame->height - static_cast<int>(det.bbox.y)));
|
|
det.track_id = -1;
|
|
|
|
// Debug output for first few detections
|
|
if (det_result->items.size() < 3 && processed_ < 10) {
|
|
std::cout << "[ai_yolo] det: raw(" << x1 << "," << y1 << "," << w << "," << h
|
|
<< ") -> bbox(" << det.bbox.x << "," << det.bbox.y << ","
|
|
<< det.bbox.w << "," << det.bbox.h << ") cls=" << cls_id
|
|
<< " score=" << det.score << "\n";
|
|
}
|
|
|
|
det_result->items.push_back(det);
|
|
}
|
|
|
|
frame->det = det_result;
|
|
}
|
|
#endif
|
|
|
|
std::string id_;
|
|
std::string model_path_;
|
|
float conf_thresh_ = 0.25f;
|
|
float nms_thresh_ = 0.45f;
|
|
int model_input_w_ = 640;
|
|
int model_input_h_ = 640;
|
|
int num_classes_ = 80;
|
|
YoloVersion yolo_version_ = YoloVersion::V8;
|
|
bool auto_detect_version_ = false;
|
|
std::set<int> class_filter_;
|
|
|
|
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
|
|
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
|
uint64_t processed_ = 0;
|
|
|
|
int64_t infer_interval_ms_ = 0;
|
|
int64_t last_infer_pts_ms_ = 0;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
ModelHandle model_handle_ = kInvalidModelHandle;
|
|
uint32_t n_output_ = 0;
|
|
std::vector<uint8_t> rgb_tmp_;
|
|
#endif
|
|
};
|
|
|
|
REGISTER_NODE(AiYoloNode, "ai_yolo");
|
|
|
|
} // namespace rk3588
|