增加了支持滑动窗口的检测鞋子的节点

This commit is contained in:
haotian 2026-03-13 21:15:43 +08:00
parent 80e0f229c3
commit e17f49c53c
13 changed files with 873 additions and 15 deletions

View File

@ -105,7 +105,7 @@
"model_w": 768,
"model_h": 768,
"num_classes": 11,
"conf": 0.1,
"conf": 0.35,
"nms": 0.45,
"debug": {
"stats": true,

View File

@ -24,8 +24,8 @@
"type": "preprocess",
"role": "filter",
"enable": true,
"dst_w": 768,
"dst_h": 768,
"dst_w": 640,
"dst_h": 640,
"dst_format": "rgb",
"resize_mode": "stretch",
"use_rga": true
@ -36,14 +36,14 @@
"role": "filter",
"enable": true,
"infer_fps": 10,
"model_path": "./models/best-768.rknn",
"model_path": "./models/shoe_detector_openimages_ppe_v1.rknn",
"model_version": "v8",
"model_w": 768,
"model_h": 768,
"num_classes": 11,
"model_w": 640,
"model_h": 640,
"num_classes": 1,
"conf": 0.35,
"nms": 0.45,
"class_filter": [3, 6]
"class_filter": [0]
},
{
"id": "trk",
@ -52,7 +52,7 @@
"enable": true,
"mode": "bytetrack_lite",
"per_class": true,
"track_classes": [3, 6],
"track_classes": [0],
"max_age_ms": 1500,
"min_hits": 2
},
@ -61,9 +61,9 @@
"type": "logic_gate",
"role": "filter",
"enable": true,
"mode": "ppe_boots_check",
"anchor_class": 6,
"boots_class": 3,
"mode": "simple",
"anchor_class": 0,
"boots_class": 0,
"color_check": {
"enable": true,
"method": "hsv",
@ -79,7 +79,7 @@
"enable": true,
"draw_bbox": true,
"draw_text": true,
"labels": ["helmet", "gloves", "vest", "boots", "goggles", "none", "Person", "no_helmet", "no_goggle", "no_gloves", "no_boots"]
"labels": ["shoe"]
},
{
"id": "post",

View File

@ -0,0 +1,106 @@
{
"queue": {
"size": 8,
"strategy": "drop_oldest"
},
"graphs": [
{
"name": "shoe_detector",
"nodes": [
{
"id": "in",
"type": "input_rtsp",
"role": "source",
"enable": true,
"url": "rtsp://10.0.0.49:8554/cam",
"fps": 30,
"width": 1920,
"height": 1080,
"use_mpp": true,
"force_tcp": true
},
{
"id": "pre",
"type": "preprocess",
"role": "filter",
"enable": true,
"dst_w": 1920,
"dst_h": 1080,
"dst_format": "rgb",
"resize_mode": "stretch",
"rga_gate": "shoe_detector",
"use_rga": true
},
{
"id": "shoe_det",
"type": "ai_shoe_det",
"role": "filter",
"enable": true,
"model_path": "./models/shoe_detector_openimages_ppe_v1.rknn",
"model_w": 640,
"model_h": 640,
"conf": 0.35,
"nms": 0.45,
"windows": [
{"x": 0, "y": 0, "w": 960, "h": 1080},
{"x": 960, "y": 0, "w": 960, "h": 1080}
]
},
{
"id": "trk",
"type": "tracker",
"role": "filter",
"enable": true,
"mode": "bytetrack_lite",
"per_class": true,
"track_classes": [0],
"max_age_ms": 1500,
"min_hits": 2
},
{
"id": "osd",
"type": "osd",
"role": "filter",
"enable": true,
"draw_bbox": true,
"draw_text": true,
"use_rga_bbox": false,
"labels": ["shoe"]
},
{
"id": "post",
"type": "preprocess",
"role": "filter",
"enable": true,
"dst_w": 1920,
"dst_h": 1080,
"dst_format": "nv12",
"resize_mode": "stretch",
"rga_gate": "shoe_detector",
"use_rga": true
},
{
"id": "pub",
"type": "publish",
"role": "filter",
"enable": true,
"codec": "h264",
"fps": 30,
"bitrate_kbps": 2000,
"use_mpp": true,
"outputs": [
{"proto": "rtsp_server", "port": 8555, "path": "/live/cam1"}
]
}
],
"edges": [
["in", "pre"],
["pre", "shoe_det"],
["shoe_det", "trk"],
["trk", "osd"],
["osd", "post"],
["post", "pub"]
]
}
]
}

View File

@ -79,8 +79,11 @@ https://gitcode.com/GitHub_Trending/ul/ultralytics
pip install -e . rknn-toolkit2
pip install "onnx==1.16.1"
激活python环境
source ./venv_rknn/bin/activate
进入模型目录,执行:
yolo export model=best.pt format=rknn name=rk3588
yolo export model=yolov8s_ppe.pt format=rknn name=rk3588
- 插件可以通过以下方式构建以ai_face_det_zoned为例

View File

@ -0,0 +1,262 @@
/**
* sliding_window_detector.h -
*
*
* 1.
* 2. resize
* 3.
* 4. NMS
*
*
* SlidingWindowDetector swd;
* swd.Init(config); // 从配置初始化窗口
* auto windows = swd.GetWindows(src_w, src_h);
* for (auto& win : windows) {
* auto input = swd.PrepareInput(frame, win, model_w, model_h);
* // ... 推理 ...
* auto dets = swd.MapDetectionsToOriginal(raw_dets, win, model_w, model_h);
* }
*/
#pragma once
#include <algorithm>
#include <cstdint>
#include <cstring>
#include <vector>
#include "frame/frame.h"
#include "utils/simple_json.h"
namespace rk3588 {
// 窗口定义
struct DetectionWindow {
int x = 0;
int y = 0;
int w = 640;
int h = 640;
bool IsValid() const { return w > 0 && h > 0; }
};
// 检测框(通用格式)
struct DetectionBox {
float x, y, w, h; // 左上角 + 宽高
float confidence;
int class_id;
};
/**
*
*/
class SlidingWindowDetector {
public:
SlidingWindowDetector() = default;
/**
*
* @param config SimpleJson "windows"
* @return
*/
bool InitFromConfig(const SimpleJson& config) {
windows_.clear();
// 解析预配置窗口
if (const SimpleJson* win_arr = config.Find("windows"); win_arr && win_arr->IsArray()) {
for (const auto& w : win_arr->AsArray()) {
if (w.IsObject()) {
DetectionWindow win;
win.x = w.ValueOr<int>("x", 0);
win.y = w.ValueOr<int>("y", 0);
win.w = w.ValueOr<int>("w", 640);
win.h = w.ValueOr<int>("h", 640);
if (win.IsValid()) {
windows_.push_back(win);
}
}
}
}
// 目标 resize 高度(用于自动计算窗口时)
target_height_ = config.ValueOr<int>("target_height", 640);
return true;
}
/**
*
* @param src_w
* @param src_h
* @return
*/
std::vector<DetectionWindow> GetWindows(int src_w, int src_h) const {
if (!windows_.empty()) {
return windows_;
}
return CalculateWindowsAuto(src_w, src_h);
}
/**
*
* resize
*
* @param frame
* @param win
* @param model_w
* @param model_h
* @param output model_w * model_h * 3
* @return
*/
bool PrepareInput(const FramePtr& frame,
const DetectionWindow& win,
int model_w, int model_h,
uint8_t* output) const {
if (!frame || !frame->data || !output) return false;
const int src_w = frame->width;
const int src_h = frame->height;
// 获取源数据指针
const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
const int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
: (frame->stride > 0 ? frame->stride : frame->width * 3);
// 限制窗口在源图范围内
int win_x = std::max(0, std::min(win.x, src_w - 1));
int win_y = std::max(0, std::min(win.y, src_h - 1));
int win_w = std::min(win.w, src_w - win_x);
int win_h = std::min(win.h, src_h - win_y);
if (win_w <= 0 || win_h <= 0) return false;
// 裁剪窗口
std::vector<uint8_t> crop_buf(static_cast<size_t>(win_w) * win_h * 3);
for (int row = 0; row < win_h; ++row) {
const uint8_t* src_row = src + (win_y + row) * src_stride + win_x * 3;
uint8_t* dst_row = crop_buf.data() + row * win_w * 3;
memcpy(dst_row, src_row, static_cast<size_t>(win_w) * 3);
}
// Resize 到模型输入尺寸
ResizeRgbBilinear(crop_buf.data(), win_w, win_h, win_w * 3,
output, model_w, model_h, false);
return true;
}
/**
*
*
* @param detections model_w x model_h
* @param win
* @param model_w
* @param model_h
* @return
*/
std::vector<DetectionBox> MapDetectionsToOriginal(
const std::vector<DetectionBox>& detections,
const DetectionWindow& win,
int model_w, int model_h) const {
std::vector<DetectionBox> mapped = detections;
float scale_x = static_cast<float>(win.w) / static_cast<float>(model_w);
float scale_y = static_cast<float>(win.h) / static_cast<float>(model_h);
for (auto& det : mapped) {
det.x = win.x + det.x * scale_x;
det.y = win.y + det.y * scale_y;
det.w *= scale_x;
det.h *= scale_y;
}
return mapped;
}
/**
*
*/
size_t GetConfiguredWindowCount() const {
return windows_.size();
}
private:
/**
*
* 640x640
*/
std::vector<DetectionWindow> CalculateWindowsAuto(int src_w, int src_h) const {
std::vector<DetectionWindow> windows;
const int win_size = 640;
// 计算步长(带重叠)
int step_x = (src_w <= win_size) ? src_w : (src_w - win_size) / ((src_w + win_size - 1) / win_size - 1);
int step_y = (src_h <= win_size) ? src_h : (src_h - win_size) / ((src_h + win_size - 1) / win_size - 1);
if (step_x < win_size) step_x = win_size;
if (step_y < win_size) step_y = win_size;
for (int y = 0; y < src_h; y += step_y) {
for (int x = 0; x < src_w; x += step_x) {
DetectionWindow win;
win.x = x;
win.y = y;
win.w = win_size;
win.h = win_size;
windows.push_back(win);
if (x + win_size >= src_w) break;
}
if (y + win_size >= src_h) break;
}
return windows;
}
/**
* RGB 线 resize
* @param swap_rb R/B
*/
static void ResizeRgbBilinear(const uint8_t* src, int src_w, int src_h, int src_stride,
uint8_t* dst, int dst_w, int dst_h, bool swap_rb) {
const float scale_x = static_cast<float>(src_w) / dst_w;
const float scale_y = static_cast<float>(src_h) / dst_h;
for (int y = 0; y < dst_h; ++y) {
float fy = y * scale_y;
int y0 = static_cast<int>(fy);
int y1 = std::min(y0 + 1, src_h - 1);
float dy = fy - y0;
for (int x = 0; x < dst_w; ++x) {
float fx = x * scale_x;
int x0 = static_cast<int>(fx);
int x1 = std::min(x0 + 1, src_w - 1);
float dx = fx - x0;
// 双线性插值
for (int c = 0; c < 3; ++c) {
int src_c = swap_rb ? (2 - c) : c;
float v00 = src[(y0 * src_stride) + (x0 * 3) + src_c];
float v01 = src[(y0 * src_stride) + (x1 * 3) + src_c];
float v10 = src[(y1 * src_stride) + (x0 * 3) + src_c];
float v11 = src[(y1 * src_stride) + (x1 * 3) + src_c];
float v0 = v00 * (1 - dx) + v01 * dx;
float v1 = v10 * (1 - dx) + v11 * dx;
float v = v0 * (1 - dy) + v1 * dy;
dst[(y * dst_w + x) * 3 + c] = static_cast<uint8_t>(v);
}
}
}
}
std::vector<DetectionWindow> windows_;
int target_height_ = 640;
};
} // namespace rk3588

Binary file not shown.

View File

@ -512,7 +512,24 @@ if(RK3588_ENABLE_ZLMEDIAKIT AND RK_ZLMK_API_LIB)
)
endif()
install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate storage ai_scheduler
# ai_shoe_det plugin (shoe detection with sliding window support)
add_library(ai_shoe_det SHARED
ai_shoe_det/ai_shoe_det_node.cpp
)
target_include_directories(ai_shoe_det PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
target_link_libraries(ai_shoe_det PRIVATE project_options Threads::Threads)
if(RK3588_ENABLE_RKNN AND RK_RKNN_LIB)
target_compile_definitions(ai_shoe_det PRIVATE RK3588_ENABLE_RKNN)
target_include_directories(ai_shoe_det PRIVATE ${RKNN_RUNTIME_INCLUDE_DIR})
target_link_libraries(ai_shoe_det PRIVATE ${RK_RKNN_LIB})
endif()
set_target_properties(ai_shoe_det PROPERTIES
OUTPUT_NAME "ai_shoe_det"
LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
)
install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate storage ai_scheduler ai_shoe_det
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
)

View File

@ -0,0 +1,26 @@
# ai_shoe_det -
set(PLUGIN_NAME ai_shoe_det)
add_library(${PLUGIN_NAME} SHARED
ai_shoe_det_node.cpp
)
target_include_directories(${PLUGIN_NAME} PRIVATE
${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/third_party/rknpu2/runtime/${CMAKE_SYSTEM_PROCESSOR}/include
)
target_link_libraries(${PLUGIN_NAME} PRIVATE
core
${RKNN_LIB}
)
set_target_properties(${PLUGIN_NAME} PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/plugins
)
#
install(TARGETS ${PLUGIN_NAME}
LIBRARY DESTINATION lib/plugins
)

View File

@ -0,0 +1,95 @@
# ai_shoe_det - 鞋子检测节点
专门针对鞋子检测优化的节点,支持滑动窗口提高小目标检测率。
## 特性
- **滑动窗口支持**:可配置多窗口覆盖全图,提高小鞋子检测精度
- **单类优化**:专门针对 shoe 单类检测优化
- **自动 NMS**:多窗口结果自动合并去重
- **轻量快速**:基于 RK3588 NPU 加速
## 配置参数
```json
{
"id": "shoe_det",
"type": "ai_shoe_det",
"model_path": "./models/shoe_detector.rknn",
"model_w": 640,
"model_h": 640,
"conf": 0.25,
"nms": 0.45,
"windows": [
{"x": 0, "y": 0, "w": 960, "h": 1080},
{"x": 960, "y": 0, "w": 960, "h": 1080}
]
}
```
### 参数说明
| 参数 | 类型 | 默认值 | 说明 |
|------|------|--------|------|
| `model_path` | string | - | RKNN 模型路径 |
| `model_w` | int | 640 | 模型输入宽度 |
| `model_h` | int | 640 | 模型输入高度 |
| `conf` | float | 0.25 | 置信度阈值 |
| `nms` | float | 0.45 | NMS IoU 阈值 |
| `windows` | array | - | 滑动窗口配置,不配置则使用全图单窗口 |
### 窗口配置
- **单窗口(全图)**:不配置 `windows` 或配置 `[{"x":0,"y":0,"w":0,"h":0}]`
- **双窗口(推荐)**:左右各 960x1080
```json
"windows": [
{"x": 0, "y": 0, "w": 960, "h": 1080},
{"x": 960, "y": 0, "w": 960, "h": 1080}
]
```
## Pipeline 示例
```json
{
"nodes": [
{"id": "in", "type": "input_rtsp", "url": "rtsp://..."},
{"id": "pre", "type": "preprocess", "dst_w": 1920, "dst_h": 1080, "dst_format": "rgb"},
{
"id": "shoe_det",
"type": "ai_shoe_det",
"model_path": "./models/shoe_detector_openimages_ppe_v1.rknn",
"model_w": 640,
"model_h": 640,
"conf": 0.25,
"windows": [
{"x": 0, "y": 0, "w": 960, "h": 1080},
{"x": 960, "y": 0, "w": 960, "h": 1080}
]
},
{"id": "osd", "type": "osd"},
{"id": "pub", "type": "publish"}
],
"edges": [
["in", "pre"],
["pre", "shoe_det"],
["shoe_det", "osd"],
["osd", "pub"]
]
}
```
## 编译
```bash
cd build
cmake ..
make ai_shoe_det -j4
```
## 注意事项
1. 模型必须是单类shoeYOLOv8 格式
2. 多窗口会增加 NPU 负载2窗口 = 2倍推理时间
3. 窗口之间有重叠时NMS 会自动去重

View File

@ -0,0 +1,349 @@
/**
* ai_shoe_det -
*
* ai_yolo
*/
#include <algorithm>
#include <cstdint>
#include <cstring>
#include <memory>
#include <vector>
#include "ai_scheduler.h"
#include "frame/frame.h"
#include "hw/i_infer_backend.h"
#include "node.h"
#include "utils/logger.h"
#if defined(RK3588_ENABLE_RKNN)
#include "rknn_api.h"
#endif
namespace rk3588 {
struct DetWindow {
int x, y, w, h;
};
struct DetBox {
float x, y, w, h;
float conf;
int class_id;
};
class AiShoeDetNode : public INode {
public:
std::string Id() const override { return id_; }
std::string Type() const override { return "ai_shoe_det"; }
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
id_ = config.ValueOr<std::string>("id", "shoe_det");
model_path_ = config.ValueOr<std::string>("model_path",
"./models/shoe_detector.rknn");
model_w_ = config.ValueOr<int>("model_w", 640);
model_h_ = config.ValueOr<int>("model_h", 640);
conf_thresh_ = config.ValueOr<float>("conf", 0.25f);
nms_thresh_ = config.ValueOr<float>("nms", 0.45f);
// 解析窗口配置
windows_.clear();
if (const SimpleJson* win_arr = config.Find("windows"); win_arr && win_arr->IsArray()) {
for (const auto& w : win_arr->AsArray()) {
if (w.IsObject()) {
DetWindow win;
win.x = w.ValueOr<int>("x", 0);
win.y = w.ValueOr<int>("y", 0);
win.w = w.ValueOr<int>("w", 640);
win.h = w.ValueOr<int>("h", 640);
windows_.push_back(win);
}
}
}
// 默认单窗口(全图)
if (windows_.empty()) {
windows_.push_back({0, 0, 0, 0}); // 0表示全图
}
input_queue_ = ctx.input_queue;
output_queues_ = ctx.output_queues;
if (!input_queue_) {
LogError("[ai_shoe_det] no input queue");
return false;
}
infer_backend_ = ctx.infer_backend;
if (!infer_backend_) {
LogError("[ai_shoe_det] no infer backend");
return false;
}
#if defined(RK3588_ENABLE_RKNN)
std::string err;
model_handle_ = infer_backend_->LoadModel(model_path_, err);
if (model_handle_ == kInvalidModelHandle) {
LogError("[ai_shoe_det] failed to load model: " + err);
return false;
}
input_buf_.resize(model_w_ * model_h_ * 3);
LogInfo("[ai_shoe_det] model loaded: " + model_path_);
#else
LogWarn("[ai_shoe_det] RKNN disabled");
#endif
return true;
}
bool Start() override {
LogInfo("[ai_shoe_det] start, windows=" + std::to_string(windows_.size()));
return true;
}
void Stop() override {
#if defined(RK3588_ENABLE_RKNN)
if (model_handle_ != kInvalidModelHandle) {
infer_backend_->UnloadModel(model_handle_);
model_handle_ = kInvalidModelHandle;
}
#endif
LogInfo("[ai_shoe_det] stop");
}
NodeStatus Process(FramePtr frame) override {
if (!frame) return NodeStatus::DROP;
#if defined(RK3588_ENABLE_RKNN)
RunDetection(frame);
#endif
Push(frame);
return NodeStatus::OK;
}
private:
void Push(FramePtr frame) {
for (auto& q : output_queues_) q->Push(frame);
}
#if defined(RK3588_ENABLE_RKNN)
void RunDetection(FramePtr frame) {
if (!frame->data || frame->data_size == 0) return;
const int src_w = frame->width;
const int src_h = frame->height;
std::vector<DetBox> all_dets;
// 对每个窗口进行检测
for (const auto& win : windows_) {
auto dets = DetectWindow(frame, src_w, src_h, win);
all_dets.insert(all_dets.end(), dets.begin(), dets.end());
}
// NMS
all_dets = ApplyNMS(all_dets, nms_thresh_);
// 填充结果
if (!frame->det) {
frame->det = std::make_shared<DetectionResult>();
}
frame->det->items.clear();
frame->det->img_w = src_w;
frame->det->img_h = src_h;
for (const auto& d : all_dets) {
Detection item;
item.bbox = {d.x, d.y, d.w, d.h};
item.score = d.conf;
item.cls_id = d.class_id;
frame->det->items.push_back(item);
}
}
std::vector<DetBox> DetectWindow(FramePtr frame, int src_w, int src_h, const DetWindow& win) {
std::vector<DetBox> dets;
// 确定裁剪区域
int win_x, win_y, win_w, win_h;
if (win.w == 0 || win.h == 0) {
win_x = 0; win_y = 0; win_w = src_w; win_h = src_h;
} else {
win_x = std::max(0, std::min(win.x, src_w - 1));
win_y = std::max(0, std::min(win.y, src_h - 1));
win_w = std::min(win.w, src_w - win_x);
win_h = std::min(win.h, src_h - win_y);
}
if (win_w <= 0 || win_h <= 0) return dets;
// 获取源数据
const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
: (frame->stride > 0 ? frame->stride : src_w * 3);
if (!src || src_stride <= 0) return dets;
// 裁剪到临时缓冲区
std::vector<uint8_t> crop_buf(static_cast<size_t>(win_w) * win_h * 3);
for (int row = 0; row < win_h; ++row) {
const uint8_t* src_row = src + (win_y + row) * src_stride + win_x * 3;
uint8_t* dst_row = crop_buf.data() + row * win_w * 3;
memcpy(dst_row, src_row, static_cast<size_t>(win_w) * 3);
}
// Resize 到模型输入尺寸
ResizeRgbBilinear(crop_buf.data(), win_w, win_h, win_w * 3,
input_buf_.data(), model_w_, model_h_, model_w_ * 3);
// 推理
InferInput input;
input.width = model_w_;
input.height = model_h_;
input.is_nhwc = true;
input.data = input_buf_.data();
input.size = input_buf_.size();
input.type = RKNN_TENSOR_UINT8;
auto r = infer_backend_->InferBorrowed(model_handle_, input);
if (!r.success || r.outputs.empty() || !r.outputs[0].data) {
LogWarn("[ai_shoe_det] inference failed");
return dets;
}
// 解析输出
dets = ParseOutput(r.outputs, win_x, win_y, win_w, win_h);
return dets;
}
std::vector<DetBox> ParseOutput(const std::vector<AiScheduler::BorrowedOutput>& outputs,
int win_x, int win_y, int win_w, int win_h) {
std::vector<DetBox> dets;
if (outputs.empty() || !outputs[0].data) return dets;
// 假设 YOLOv8 输出格式: [num_boxes, 5] = x, y, w, h, conf
// 实际格式可能不同,需要根据模型调整
const float* data = reinterpret_cast<const float*>(outputs[0].data);
int num_boxes = 8400; // YOLOv8 默认
float scale_x = static_cast<float>(win_w) / model_w_;
float scale_y = static_cast<float>(win_h) / model_h_;
for (int i = 0; i < num_boxes; ++i) {
float x = data[i * 5 + 0];
float y = data[i * 5 + 1];
float w = data[i * 5 + 2];
float h = data[i * 5 + 3];
float conf = data[i * 5 + 4];
if (conf < conf_thresh_) continue;
DetBox box;
box.x = win_x + x * scale_x;
box.y = win_y + y * scale_y;
box.w = w * scale_x;
box.h = h * scale_y;
box.conf = conf;
box.class_id = 0;
dets.push_back(box);
}
return dets;
}
std::vector<DetBox> ApplyNMS(std::vector<DetBox>& dets, float thresh) {
if (dets.empty()) return dets;
std::sort(dets.begin(), dets.end(),
[](const DetBox& a, const DetBox& b) { return a.conf > b.conf; });
std::vector<DetBox> keep;
std::vector<bool> suppressed(dets.size(), false);
for (size_t i = 0; i < dets.size(); ++i) {
if (suppressed[i]) continue;
keep.push_back(dets[i]);
for (size_t j = i + 1; j < dets.size(); ++j) {
if (suppressed[j]) continue;
float iou = ComputeIoU(dets[i], dets[j]);
if (iou > thresh) {
suppressed[j] = true;
}
}
}
return keep;
}
float ComputeIoU(const DetBox& a, const DetBox& b) {
float x1 = std::max(a.x, b.x);
float y1 = std::max(a.y, b.y);
float x2 = std::min(a.x + a.w, b.x + b.w);
float y2 = std::min(a.y + a.h, b.y + b.h);
float inter = std::max(0.0f, x2 - x1) * std::max(0.0f, y2 - y1);
float area_a = a.w * a.h;
float area_b = b.w * b.h;
float uni = area_a + area_b - inter;
return uni > 0 ? inter / uni : 0;
}
void ResizeRgbBilinear(const uint8_t* src, int src_w, int src_h, int src_stride,
uint8_t* dst, int dst_w, int dst_h, int dst_stride) {
float scale_x = static_cast<float>(src_w) / dst_w;
float scale_y = static_cast<float>(src_h) / dst_h;
for (int y = 0; y < dst_h; ++y) {
float fy = y * scale_y;
int y0 = static_cast<int>(fy);
int y1 = std::min(y0 + 1, src_h - 1);
float dy = fy - y0;
for (int x = 0; x < dst_w; ++x) {
float fx = x * scale_x;
int x0 = static_cast<int>(fx);
int x1 = std::min(x0 + 1, src_w - 1);
float dx = fx - x0;
for (int c = 0; c < 3; ++c) {
float v00 = src[y0 * src_stride + x0 * 3 + c];
float v01 = src[y0 * src_stride + x1 * 3 + c];
float v10 = src[y1 * src_stride + x0 * 3 + c];
float v11 = src[y1 * src_stride + x1 * 3 + c];
float v = v00 * (1-dx) * (1-dy) +
v01 * dx * (1-dy) +
v10 * (1-dx) * dy +
v11 * dx * dy;
dst[y * dst_stride + x * 3 + c] = static_cast<uint8_t>(v);
}
}
}
}
#endif
std::string id_;
std::string model_path_;
int model_w_ = 640;
int model_h_ = 640;
float conf_thresh_ = 0.25f;
float nms_thresh_ = 0.45f;
std::vector<DetWindow> windows_;
std::vector<uint8_t> input_buf_;
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
#if defined(RK3588_ENABLE_RKNN)
std::shared_ptr<IInferBackend> infer_backend_;
ModelHandle model_handle_ = kInvalidModelHandle;
#endif
};
REGISTER_NODE(AiShoeDetNode, "ai_shoe_det");
} // namespace rk3588