diff --git a/configs/full_pipeline_1080p.json b/configs/full_pipeline_1080p.json index 72c0a9b..e95b73e 100644 --- a/configs/full_pipeline_1080p.json +++ b/configs/full_pipeline_1080p.json @@ -105,7 +105,7 @@ "model_w": 768, "model_h": 768, "num_classes": 11, - "conf": 0.1, + "conf": 0.35, "nms": 0.45, "debug": { "stats": true, diff --git a/configs/sample_logic_gate_ppe.json b/configs/sample_logic_gate_ppe.json index 28f6c47..fd99754 100644 --- a/configs/sample_logic_gate_ppe.json +++ b/configs/sample_logic_gate_ppe.json @@ -24,8 +24,8 @@ "type": "preprocess", "role": "filter", "enable": true, - "dst_w": 768, - "dst_h": 768, + "dst_w": 640, + "dst_h": 640, "dst_format": "rgb", "resize_mode": "stretch", "use_rga": true @@ -36,14 +36,14 @@ "role": "filter", "enable": true, "infer_fps": 10, - "model_path": "./models/best-768.rknn", + "model_path": "./models/shoe_detector_openimages_ppe_v1.rknn", "model_version": "v8", - "model_w": 768, - "model_h": 768, - "num_classes": 11, + "model_w": 640, + "model_h": 640, + "num_classes": 1, "conf": 0.35, "nms": 0.45, - "class_filter": [3, 6] + "class_filter": [0] }, { "id": "trk", @@ -52,7 +52,7 @@ "enable": true, "mode": "bytetrack_lite", "per_class": true, - "track_classes": [3, 6], + "track_classes": [0], "max_age_ms": 1500, "min_hits": 2 }, @@ -61,9 +61,9 @@ "type": "logic_gate", "role": "filter", "enable": true, - "mode": "ppe_boots_check", - "anchor_class": 6, - "boots_class": 3, + "mode": "simple", + "anchor_class": 0, + "boots_class": 0, "color_check": { "enable": true, "method": "hsv", @@ -79,7 +79,7 @@ "enable": true, "draw_bbox": true, "draw_text": true, - "labels": ["helmet", "gloves", "vest", "boots", "goggles", "none", "Person", "no_helmet", "no_goggle", "no_gloves", "no_boots"] + "labels": ["shoe"] }, { "id": "post", diff --git a/configs/sample_shoe_detect.json b/configs/sample_shoe_detect.json new file mode 100644 index 0000000..9704446 --- /dev/null +++ b/configs/sample_shoe_detect.json @@ -0,0 +1,106 @@ +{ + "queue": { + "size": 8, + "strategy": "drop_oldest" + }, + "graphs": [ + { + "name": "shoe_detector", + "nodes": [ + { + "id": "in", + "type": "input_rtsp", + "role": "source", + "enable": true, + "url": "rtsp://10.0.0.49:8554/cam", + "fps": 30, + "width": 1920, + "height": 1080, + "use_mpp": true, + "force_tcp": true + }, + { + "id": "pre", + "type": "preprocess", + "role": "filter", + "enable": true, + "dst_w": 1920, + "dst_h": 1080, + "dst_format": "rgb", + "resize_mode": "stretch", + "rga_gate": "shoe_detector", + "use_rga": true + }, + { + "id": "shoe_det", + "type": "ai_shoe_det", + "role": "filter", + "enable": true, + "model_path": "./models/shoe_detector_openimages_ppe_v1.rknn", + "model_w": 640, + "model_h": 640, + "conf": 0.35, + "nms": 0.45, + "windows": [ + {"x": 0, "y": 0, "w": 960, "h": 1080}, + {"x": 960, "y": 0, "w": 960, "h": 1080} + ] + }, + { + "id": "trk", + "type": "tracker", + "role": "filter", + "enable": true, + "mode": "bytetrack_lite", + "per_class": true, + "track_classes": [0], + "max_age_ms": 1500, + "min_hits": 2 + }, + { + "id": "osd", + "type": "osd", + "role": "filter", + "enable": true, + "draw_bbox": true, + "draw_text": true, + "use_rga_bbox": false, + "labels": ["shoe"] + }, + { + "id": "post", + "type": "preprocess", + "role": "filter", + "enable": true, + "dst_w": 1920, + "dst_h": 1080, + "dst_format": "nv12", + "resize_mode": "stretch", + "rga_gate": "shoe_detector", + "use_rga": true + }, + { + "id": "pub", + "type": "publish", + "role": "filter", + "enable": true, + "codec": "h264", + "fps": 30, + "bitrate_kbps": 2000, + "use_mpp": true, + "outputs": [ + {"proto": "rtsp_server", "port": 8555, "path": "/live/cam1"} + ] + } + ], + "edges": [ + ["in", "pre"], + ["pre", "shoe_det"], + ["shoe_det", "trk"], + ["trk", "osd"], + ["osd", "post"], + ["post", "pub"] + ] + } + ] +} diff --git a/docs/requirements/guide.md b/docs/requirements/guide.md index aaf29e6..8130ea6 100644 --- a/docs/requirements/guide.md +++ b/docs/requirements/guide.md @@ -79,8 +79,11 @@ https://gitcode.com/GitHub_Trending/ul/ultralytics pip install -e . rknn-toolkit2 pip install "onnx==1.16.1" +激活python环境 +source ./venv_rknn/bin/activate + 进入模型目录,执行: -yolo export model=best.pt format=rknn name=rk3588 +yolo export model=yolov8s_ppe.pt format=rknn name=rk3588 - 插件可以通过以下方式构建(以ai_face_det_zoned为例): diff --git a/include/utils/sliding_window_detector.h b/include/utils/sliding_window_detector.h new file mode 100644 index 0000000..0242b16 --- /dev/null +++ b/include/utils/sliding_window_detector.h @@ -0,0 +1,262 @@ +/** + * sliding_window_detector.h - 通用滑动窗口检测工具 + * + * 功能: + * 1. 支持配置多窗口或自动计算窗口 + * 2. 从源图裁剪窗口、resize 到模型输入尺寸 + * 3. 检测结果坐标映射回原图 + * 4. NMS 合并多窗口结果 + * + * 用法: + * SlidingWindowDetector swd; + * swd.Init(config); // 从配置初始化窗口 + * auto windows = swd.GetWindows(src_w, src_h); + * for (auto& win : windows) { + * auto input = swd.PrepareInput(frame, win, model_w, model_h); + * // ... 推理 ... + * auto dets = swd.MapDetectionsToOriginal(raw_dets, win, model_w, model_h); + * } + */ + +#pragma once + +#include +#include +#include +#include + +#include "frame/frame.h" +#include "utils/simple_json.h" + +namespace rk3588 { + +// 窗口定义 +struct DetectionWindow { + int x = 0; + int y = 0; + int w = 640; + int h = 640; + + bool IsValid() const { return w > 0 && h > 0; } +}; + +// 检测框(通用格式) +struct DetectionBox { + float x, y, w, h; // 左上角 + 宽高 + float confidence; + int class_id; +}; + +/** + * 滑动窗口检测器 + */ +class SlidingWindowDetector { +public: + SlidingWindowDetector() = default; + + /** + * 从配置初始化 + * @param config SimpleJson 配置,支持 "windows" 数组 + * @return 是否成功 + */ + bool InitFromConfig(const SimpleJson& config) { + windows_.clear(); + + // 解析预配置窗口 + if (const SimpleJson* win_arr = config.Find("windows"); win_arr && win_arr->IsArray()) { + for (const auto& w : win_arr->AsArray()) { + if (w.IsObject()) { + DetectionWindow win; + win.x = w.ValueOr("x", 0); + win.y = w.ValueOr("y", 0); + win.w = w.ValueOr("w", 640); + win.h = w.ValueOr("h", 640); + if (win.IsValid()) { + windows_.push_back(win); + } + } + } + } + + // 目标 resize 高度(用于自动计算窗口时) + target_height_ = config.ValueOr("target_height", 640); + + return true; + } + + /** + * 获取窗口列表(预配置或自动计算) + * @param src_w 源图宽度 + * @param src_h 源图高度 + * @return 窗口列表 + */ + std::vector GetWindows(int src_w, int src_h) const { + if (!windows_.empty()) { + return windows_; + } + return CalculateWindowsAuto(src_w, src_h); + } + + /** + * 准备模型输入 + * 从源图裁剪窗口区域,resize 到模型输入尺寸 + * + * @param frame 源帧 + * @param win 窗口定义 + * @param model_w 模型输入宽 + * @param model_h 模型输入高 + * @param output 输出缓冲区(model_w * model_h * 3) + * @return 是否成功 + */ + bool PrepareInput(const FramePtr& frame, + const DetectionWindow& win, + int model_w, int model_h, + uint8_t* output) const { + if (!frame || !frame->data || !output) return false; + + const int src_w = frame->width; + const int src_h = frame->height; + + // 获取源数据指针 + const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data; + const int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride + : (frame->stride > 0 ? frame->stride : frame->width * 3); + + // 限制窗口在源图范围内 + int win_x = std::max(0, std::min(win.x, src_w - 1)); + int win_y = std::max(0, std::min(win.y, src_h - 1)); + int win_w = std::min(win.w, src_w - win_x); + int win_h = std::min(win.h, src_h - win_y); + + if (win_w <= 0 || win_h <= 0) return false; + + // 裁剪窗口 + std::vector crop_buf(static_cast(win_w) * win_h * 3); + for (int row = 0; row < win_h; ++row) { + const uint8_t* src_row = src + (win_y + row) * src_stride + win_x * 3; + uint8_t* dst_row = crop_buf.data() + row * win_w * 3; + memcpy(dst_row, src_row, static_cast(win_w) * 3); + } + + // Resize 到模型输入尺寸 + ResizeRgbBilinear(crop_buf.data(), win_w, win_h, win_w * 3, + output, model_w, model_h, false); + + return true; + } + + /** + * 将检测结果从模型坐标映射回原图坐标 + * + * @param detections 模型输出的检测框(在 model_w x model_h 坐标系中) + * @param win 窗口定义 + * @param model_w 模型输入宽 + * @param model_h 模型输入高 + * @return 映射后的检测框(在原图坐标系中) + */ + std::vector MapDetectionsToOriginal( + const std::vector& detections, + const DetectionWindow& win, + int model_w, int model_h) const { + + std::vector mapped = detections; + + float scale_x = static_cast(win.w) / static_cast(model_w); + float scale_y = static_cast(win.h) / static_cast(model_h); + + for (auto& det : mapped) { + det.x = win.x + det.x * scale_x; + det.y = win.y + det.y * scale_y; + det.w *= scale_x; + det.h *= scale_y; + } + + return mapped; + } + + /** + * 获取预配置窗口数量 + */ + size_t GetConfiguredWindowCount() const { + return windows_.size(); + } + +private: + /** + * 自动计算窗口(覆盖整个图像) + * 策略:生成重叠的 640x640 窗口网格 + */ + std::vector CalculateWindowsAuto(int src_w, int src_h) const { + std::vector windows; + + const int win_size = 640; + + // 计算步长(带重叠) + int step_x = (src_w <= win_size) ? src_w : (src_w - win_size) / ((src_w + win_size - 1) / win_size - 1); + int step_y = (src_h <= win_size) ? src_h : (src_h - win_size) / ((src_h + win_size - 1) / win_size - 1); + + if (step_x < win_size) step_x = win_size; + if (step_y < win_size) step_y = win_size; + + for (int y = 0; y < src_h; y += step_y) { + for (int x = 0; x < src_w; x += step_x) { + DetectionWindow win; + win.x = x; + win.y = y; + win.w = win_size; + win.h = win_size; + windows.push_back(win); + + if (x + win_size >= src_w) break; + } + if (y + win_size >= src_h) break; + } + + return windows; + } + + /** + * RGB 图像双线性 resize + * @param swap_rb 是否交换 R/B 通道 + */ + static void ResizeRgbBilinear(const uint8_t* src, int src_w, int src_h, int src_stride, + uint8_t* dst, int dst_w, int dst_h, bool swap_rb) { + const float scale_x = static_cast(src_w) / dst_w; + const float scale_y = static_cast(src_h) / dst_h; + + for (int y = 0; y < dst_h; ++y) { + float fy = y * scale_y; + int y0 = static_cast(fy); + int y1 = std::min(y0 + 1, src_h - 1); + float dy = fy - y0; + + for (int x = 0; x < dst_w; ++x) { + float fx = x * scale_x; + int x0 = static_cast(fx); + int x1 = std::min(x0 + 1, src_w - 1); + float dx = fx - x0; + + // 双线性插值 + for (int c = 0; c < 3; ++c) { + int src_c = swap_rb ? (2 - c) : c; + + float v00 = src[(y0 * src_stride) + (x0 * 3) + src_c]; + float v01 = src[(y0 * src_stride) + (x1 * 3) + src_c]; + float v10 = src[(y1 * src_stride) + (x0 * 3) + src_c]; + float v11 = src[(y1 * src_stride) + (x1 * 3) + src_c]; + + float v0 = v00 * (1 - dx) + v01 * dx; + float v1 = v10 * (1 - dx) + v11 * dx; + float v = v0 * (1 - dy) + v1 * dy; + + dst[(y * dst_w + x) * 3 + c] = static_cast(v); + } + } + } + } + + std::vector windows_; + int target_height_ = 640; +}; + +} // namespace rk3588 diff --git a/models/shoe_detector_openimages_ppe_v1.rknn b/models/shoe_detector_openimages_ppe_v1.rknn new file mode 100644 index 0000000..0362fb0 Binary files /dev/null and b/models/shoe_detector_openimages_ppe_v1.rknn differ diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 1871599..ac9c549 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -512,7 +512,24 @@ if(RK3588_ENABLE_ZLMEDIAKIT AND RK_ZLMK_API_LIB) ) endif() -install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate storage ai_scheduler +# ai_shoe_det plugin (shoe detection with sliding window support) +add_library(ai_shoe_det SHARED + ai_shoe_det/ai_shoe_det_node.cpp +) +target_include_directories(ai_shoe_det PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party) +target_link_libraries(ai_shoe_det PRIVATE project_options Threads::Threads) +if(RK3588_ENABLE_RKNN AND RK_RKNN_LIB) + target_compile_definitions(ai_shoe_det PRIVATE RK3588_ENABLE_RKNN) + target_include_directories(ai_shoe_det PRIVATE ${RKNN_RUNTIME_INCLUDE_DIR}) + target_link_libraries(ai_shoe_det PRIVATE ${RK_RKNN_LIB}) +endif() +set_target_properties(ai_shoe_det PROPERTIES + OUTPUT_NAME "ai_shoe_det" + LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR} + RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR} +) + +install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate storage ai_scheduler ai_shoe_det LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins ) diff --git a/plugins/ai_shoe_det/CMakeLists.txt b/plugins/ai_shoe_det/CMakeLists.txt new file mode 100644 index 0000000..4b953c2 --- /dev/null +++ b/plugins/ai_shoe_det/CMakeLists.txt @@ -0,0 +1,26 @@ +# ai_shoe_det - 鞋子检测节点(支持滑动窗口) + +set(PLUGIN_NAME ai_shoe_det) + +add_library(${PLUGIN_NAME} SHARED + ai_shoe_det_node.cpp +) + +target_include_directories(${PLUGIN_NAME} PRIVATE + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/third_party/rknpu2/runtime/${CMAKE_SYSTEM_PROCESSOR}/include +) + +target_link_libraries(${PLUGIN_NAME} PRIVATE + core + ${RKNN_LIB} +) + +set_target_properties(${PLUGIN_NAME} PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/plugins +) + +# 安装 +install(TARGETS ${PLUGIN_NAME} + LIBRARY DESTINATION lib/plugins +) diff --git a/plugins/ai_shoe_det/README.md b/plugins/ai_shoe_det/README.md new file mode 100644 index 0000000..03dce84 --- /dev/null +++ b/plugins/ai_shoe_det/README.md @@ -0,0 +1,95 @@ +# ai_shoe_det - 鞋子检测节点 + +专门针对鞋子检测优化的节点,支持滑动窗口提高小目标检测率。 + +## 特性 + +- **滑动窗口支持**:可配置多窗口覆盖全图,提高小鞋子检测精度 +- **单类优化**:专门针对 shoe 单类检测优化 +- **自动 NMS**:多窗口结果自动合并去重 +- **轻量快速**:基于 RK3588 NPU 加速 + +## 配置参数 + +```json +{ + "id": "shoe_det", + "type": "ai_shoe_det", + "model_path": "./models/shoe_detector.rknn", + "model_w": 640, + "model_h": 640, + "conf": 0.25, + "nms": 0.45, + "windows": [ + {"x": 0, "y": 0, "w": 960, "h": 1080}, + {"x": 960, "y": 0, "w": 960, "h": 1080} + ] +} +``` + +### 参数说明 + +| 参数 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| `model_path` | string | - | RKNN 模型路径 | +| `model_w` | int | 640 | 模型输入宽度 | +| `model_h` | int | 640 | 模型输入高度 | +| `conf` | float | 0.25 | 置信度阈值 | +| `nms` | float | 0.45 | NMS IoU 阈值 | +| `windows` | array | - | 滑动窗口配置,不配置则使用全图单窗口 | + +### 窗口配置 + +- **单窗口(全图)**:不配置 `windows` 或配置 `[{"x":0,"y":0,"w":0,"h":0}]` +- **双窗口(推荐)**:左右各 960x1080 + ```json + "windows": [ + {"x": 0, "y": 0, "w": 960, "h": 1080}, + {"x": 960, "y": 0, "w": 960, "h": 1080} + ] + ``` + +## Pipeline 示例 + +```json +{ + "nodes": [ + {"id": "in", "type": "input_rtsp", "url": "rtsp://..."}, + {"id": "pre", "type": "preprocess", "dst_w": 1920, "dst_h": 1080, "dst_format": "rgb"}, + { + "id": "shoe_det", + "type": "ai_shoe_det", + "model_path": "./models/shoe_detector_openimages_ppe_v1.rknn", + "model_w": 640, + "model_h": 640, + "conf": 0.25, + "windows": [ + {"x": 0, "y": 0, "w": 960, "h": 1080}, + {"x": 960, "y": 0, "w": 960, "h": 1080} + ] + }, + {"id": "osd", "type": "osd"}, + {"id": "pub", "type": "publish"} + ], + "edges": [ + ["in", "pre"], + ["pre", "shoe_det"], + ["shoe_det", "osd"], + ["osd", "pub"] + ] +} +``` + +## 编译 + +```bash +cd build +cmake .. +make ai_shoe_det -j4 +``` + +## 注意事项 + +1. 模型必须是单类(shoe)YOLOv8 格式 +2. 多窗口会增加 NPU 负载(2窗口 = 2倍推理时间) +3. 窗口之间有重叠时,NMS 会自动去重 diff --git a/plugins/ai_shoe_det/ai_shoe_det_node.cpp b/plugins/ai_shoe_det/ai_shoe_det_node.cpp new file mode 100644 index 0000000..0bdbfd8 --- /dev/null +++ b/plugins/ai_shoe_det/ai_shoe_det_node.cpp @@ -0,0 +1,349 @@ +/** + * ai_shoe_det - 鞋子检测节点(支持滑动窗口) + * + * 基于 ai_yolo 实现,添加多窗口检测支持 + */ + +#include +#include +#include +#include +#include + +#include "ai_scheduler.h" +#include "frame/frame.h" +#include "hw/i_infer_backend.h" +#include "node.h" +#include "utils/logger.h" + +#if defined(RK3588_ENABLE_RKNN) +#include "rknn_api.h" +#endif + +namespace rk3588 { + +struct DetWindow { + int x, y, w, h; +}; + +struct DetBox { + float x, y, w, h; + float conf; + int class_id; +}; + +class AiShoeDetNode : public INode { +public: + std::string Id() const override { return id_; } + std::string Type() const override { return "ai_shoe_det"; } + + bool Init(const SimpleJson& config, const NodeContext& ctx) override { + id_ = config.ValueOr("id", "shoe_det"); + model_path_ = config.ValueOr("model_path", + "./models/shoe_detector.rknn"); + + model_w_ = config.ValueOr("model_w", 640); + model_h_ = config.ValueOr("model_h", 640); + conf_thresh_ = config.ValueOr("conf", 0.25f); + nms_thresh_ = config.ValueOr("nms", 0.45f); + + // 解析窗口配置 + windows_.clear(); + if (const SimpleJson* win_arr = config.Find("windows"); win_arr && win_arr->IsArray()) { + for (const auto& w : win_arr->AsArray()) { + if (w.IsObject()) { + DetWindow win; + win.x = w.ValueOr("x", 0); + win.y = w.ValueOr("y", 0); + win.w = w.ValueOr("w", 640); + win.h = w.ValueOr("h", 640); + windows_.push_back(win); + } + } + } + + // 默认单窗口(全图) + if (windows_.empty()) { + windows_.push_back({0, 0, 0, 0}); // 0表示全图 + } + + input_queue_ = ctx.input_queue; + output_queues_ = ctx.output_queues; + if (!input_queue_) { + LogError("[ai_shoe_det] no input queue"); + return false; + } + + infer_backend_ = ctx.infer_backend; + if (!infer_backend_) { + LogError("[ai_shoe_det] no infer backend"); + return false; + } + +#if defined(RK3588_ENABLE_RKNN) + std::string err; + model_handle_ = infer_backend_->LoadModel(model_path_, err); + if (model_handle_ == kInvalidModelHandle) { + LogError("[ai_shoe_det] failed to load model: " + err); + return false; + } + input_buf_.resize(model_w_ * model_h_ * 3); + LogInfo("[ai_shoe_det] model loaded: " + model_path_); +#else + LogWarn("[ai_shoe_det] RKNN disabled"); +#endif + + return true; + } + + bool Start() override { + LogInfo("[ai_shoe_det] start, windows=" + std::to_string(windows_.size())); + return true; + } + + void Stop() override { +#if defined(RK3588_ENABLE_RKNN) + if (model_handle_ != kInvalidModelHandle) { + infer_backend_->UnloadModel(model_handle_); + model_handle_ = kInvalidModelHandle; + } +#endif + LogInfo("[ai_shoe_det] stop"); + } + + NodeStatus Process(FramePtr frame) override { + if (!frame) return NodeStatus::DROP; +#if defined(RK3588_ENABLE_RKNN) + RunDetection(frame); +#endif + Push(frame); + return NodeStatus::OK; + } + +private: + void Push(FramePtr frame) { + for (auto& q : output_queues_) q->Push(frame); + } + +#if defined(RK3588_ENABLE_RKNN) + + void RunDetection(FramePtr frame) { + if (!frame->data || frame->data_size == 0) return; + + const int src_w = frame->width; + const int src_h = frame->height; + + std::vector all_dets; + + // 对每个窗口进行检测 + for (const auto& win : windows_) { + auto dets = DetectWindow(frame, src_w, src_h, win); + all_dets.insert(all_dets.end(), dets.begin(), dets.end()); + } + + // NMS + all_dets = ApplyNMS(all_dets, nms_thresh_); + + // 填充结果 + if (!frame->det) { + frame->det = std::make_shared(); + } + frame->det->items.clear(); + frame->det->img_w = src_w; + frame->det->img_h = src_h; + + for (const auto& d : all_dets) { + Detection item; + item.bbox = {d.x, d.y, d.w, d.h}; + item.score = d.conf; + item.cls_id = d.class_id; + frame->det->items.push_back(item); + } + } + + std::vector DetectWindow(FramePtr frame, int src_w, int src_h, const DetWindow& win) { + std::vector dets; + + // 确定裁剪区域 + int win_x, win_y, win_w, win_h; + if (win.w == 0 || win.h == 0) { + win_x = 0; win_y = 0; win_w = src_w; win_h = src_h; + } else { + win_x = std::max(0, std::min(win.x, src_w - 1)); + win_y = std::max(0, std::min(win.y, src_h - 1)); + win_w = std::min(win.w, src_w - win_x); + win_h = std::min(win.h, src_h - win_y); + } + + if (win_w <= 0 || win_h <= 0) return dets; + + // 获取源数据 + const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data; + int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride + : (frame->stride > 0 ? frame->stride : src_w * 3); + + if (!src || src_stride <= 0) return dets; + + // 裁剪到临时缓冲区 + std::vector crop_buf(static_cast(win_w) * win_h * 3); + for (int row = 0; row < win_h; ++row) { + const uint8_t* src_row = src + (win_y + row) * src_stride + win_x * 3; + uint8_t* dst_row = crop_buf.data() + row * win_w * 3; + memcpy(dst_row, src_row, static_cast(win_w) * 3); + } + + // Resize 到模型输入尺寸 + ResizeRgbBilinear(crop_buf.data(), win_w, win_h, win_w * 3, + input_buf_.data(), model_w_, model_h_, model_w_ * 3); + + // 推理 + InferInput input; + input.width = model_w_; + input.height = model_h_; + input.is_nhwc = true; + input.data = input_buf_.data(); + input.size = input_buf_.size(); + input.type = RKNN_TENSOR_UINT8; + + auto r = infer_backend_->InferBorrowed(model_handle_, input); + if (!r.success || r.outputs.empty() || !r.outputs[0].data) { + LogWarn("[ai_shoe_det] inference failed"); + return dets; + } + + // 解析输出 + dets = ParseOutput(r.outputs, win_x, win_y, win_w, win_h); + return dets; + } + + std::vector ParseOutput(const std::vector& outputs, + int win_x, int win_y, int win_w, int win_h) { + std::vector dets; + if (outputs.empty() || !outputs[0].data) return dets; + + // 假设 YOLOv8 输出格式: [num_boxes, 5] = x, y, w, h, conf + // 实际格式可能不同,需要根据模型调整 + const float* data = reinterpret_cast(outputs[0].data); + int num_boxes = 8400; // YOLOv8 默认 + + float scale_x = static_cast(win_w) / model_w_; + float scale_y = static_cast(win_h) / model_h_; + + for (int i = 0; i < num_boxes; ++i) { + float x = data[i * 5 + 0]; + float y = data[i * 5 + 1]; + float w = data[i * 5 + 2]; + float h = data[i * 5 + 3]; + float conf = data[i * 5 + 4]; + + if (conf < conf_thresh_) continue; + + DetBox box; + box.x = win_x + x * scale_x; + box.y = win_y + y * scale_y; + box.w = w * scale_x; + box.h = h * scale_y; + box.conf = conf; + box.class_id = 0; + dets.push_back(box); + } + + return dets; + } + + std::vector ApplyNMS(std::vector& dets, float thresh) { + if (dets.empty()) return dets; + + std::sort(dets.begin(), dets.end(), + [](const DetBox& a, const DetBox& b) { return a.conf > b.conf; }); + + std::vector keep; + std::vector suppressed(dets.size(), false); + + for (size_t i = 0; i < dets.size(); ++i) { + if (suppressed[i]) continue; + keep.push_back(dets[i]); + + for (size_t j = i + 1; j < dets.size(); ++j) { + if (suppressed[j]) continue; + float iou = ComputeIoU(dets[i], dets[j]); + if (iou > thresh) { + suppressed[j] = true; + } + } + } + + return keep; + } + + float ComputeIoU(const DetBox& a, const DetBox& b) { + float x1 = std::max(a.x, b.x); + float y1 = std::max(a.y, b.y); + float x2 = std::min(a.x + a.w, b.x + b.w); + float y2 = std::min(a.y + a.h, b.y + b.h); + + float inter = std::max(0.0f, x2 - x1) * std::max(0.0f, y2 - y1); + float area_a = a.w * a.h; + float area_b = b.w * b.h; + float uni = area_a + area_b - inter; + + return uni > 0 ? inter / uni : 0; + } + + void ResizeRgbBilinear(const uint8_t* src, int src_w, int src_h, int src_stride, + uint8_t* dst, int dst_w, int dst_h, int dst_stride) { + float scale_x = static_cast(src_w) / dst_w; + float scale_y = static_cast(src_h) / dst_h; + + for (int y = 0; y < dst_h; ++y) { + float fy = y * scale_y; + int y0 = static_cast(fy); + int y1 = std::min(y0 + 1, src_h - 1); + float dy = fy - y0; + + for (int x = 0; x < dst_w; ++x) { + float fx = x * scale_x; + int x0 = static_cast(fx); + int x1 = std::min(x0 + 1, src_w - 1); + float dx = fx - x0; + + for (int c = 0; c < 3; ++c) { + float v00 = src[y0 * src_stride + x0 * 3 + c]; + float v01 = src[y0 * src_stride + x1 * 3 + c]; + float v10 = src[y1 * src_stride + x0 * 3 + c]; + float v11 = src[y1 * src_stride + x1 * 3 + c]; + + float v = v00 * (1-dx) * (1-dy) + + v01 * dx * (1-dy) + + v10 * (1-dx) * dy + + v11 * dx * dy; + + dst[y * dst_stride + x * 3 + c] = static_cast(v); + } + } + } + } + +#endif + + std::string id_; + std::string model_path_; + int model_w_ = 640; + int model_h_ = 640; + float conf_thresh_ = 0.25f; + float nms_thresh_ = 0.45f; + std::vector windows_; + std::vector input_buf_; + + std::shared_ptr> input_queue_; + std::vector>> output_queues_; + +#if defined(RK3588_ENABLE_RKNN) + std::shared_ptr infer_backend_; + ModelHandle model_handle_ = kInvalidModelHandle; +#endif +}; + +REGISTER_NODE(AiShoeDetNode, "ai_shoe_det"); + +} // namespace rk3588 diff --git a/models/RetinaFace_mobile320.rknn b/transform/RetinaFace_mobile320.rknn similarity index 100% rename from models/RetinaFace_mobile320.rknn rename to transform/RetinaFace_mobile320.rknn diff --git a/models/yolov8s_ppe.onnx b/transform/yolov8s_ppe.onnx similarity index 100% rename from models/yolov8s_ppe.onnx rename to transform/yolov8s_ppe.onnx diff --git a/models/yolov8s_ppe.pt b/transform/yolov8s_ppe.pt similarity index 100% rename from models/yolov8s_ppe.pt rename to transform/yolov8s_ppe.pt