增加了支持滑动窗口的检测鞋子的节点

2026-03-13 21:15:43 +08:00 · 2026-03-13 21:15:43 +08:00 · e17f49c53c
commit e17f49c53c
parent 80e0f229c3
13 changed files with 873 additions and 15 deletions
--- a/configs/full_pipeline_1080p.json
+++ b/configs/full_pipeline_1080p.json
@ -105,7 +105,7 @@
          "model_w": 768,
          "model_h": 768,
          "num_classes": 11,
-          "conf": 0.1,
+          "conf": 0.35,
          "nms": 0.45,
          "debug": {
            "stats": true,
--- a/configs/sample_logic_gate_ppe.json
+++ b/configs/sample_logic_gate_ppe.json
@ -24,8 +24,8 @@
          "type": "preprocess",
          "role": "filter",
          "enable": true,
-          "dst_w": 768,
-          "dst_h": 768,
+          "dst_w": 640,
+          "dst_h": 640,
          "dst_format": "rgb",
          "resize_mode": "stretch",
          "use_rga": true
@ -36,14 +36,14 @@
          "role": "filter",
          "enable": true,
          "infer_fps": 10,
-          "model_path": "./models/best-768.rknn",
+          "model_path": "./models/shoe_detector_openimages_ppe_v1.rknn",
          "model_version": "v8",
-          "model_w": 768,
-          "model_h": 768,
-          "num_classes": 11,
+          "model_w": 640,
+          "model_h": 640,
+          "num_classes": 1,
          "conf": 0.35,
          "nms": 0.45,
-          "class_filter": [3, 6]
+          "class_filter": [0]
        },
        {
          "id": "trk",
@ -52,7 +52,7 @@
          "enable": true,
          "mode": "bytetrack_lite",
          "per_class": true,
-          "track_classes": [3, 6],
+          "track_classes": [0],
          "max_age_ms": 1500,
          "min_hits": 2
        },
@ -61,9 +61,9 @@
          "type": "logic_gate",
          "role": "filter",
          "enable": true,
-          "mode": "ppe_boots_check",
-          "anchor_class": 6,
-          "boots_class": 3,
+          "mode": "simple",
+          "anchor_class": 0,
+          "boots_class": 0,
          "color_check": {
            "enable": true,
            "method": "hsv",
@ -79,7 +79,7 @@
          "enable": true,
          "draw_bbox": true,
          "draw_text": true,
-          "labels": ["helmet", "gloves", "vest", "boots", "goggles", "none", "Person", "no_helmet", "no_goggle", "no_gloves", "no_boots"]
+          "labels": ["shoe"]
        },
        {
          "id": "post",
--- a/configs/sample_shoe_detect.json
+++ b/configs/sample_shoe_detect.json
@ -0,0 +1,106 @@
+{
+  "queue": {
+    "size": 8,
+    "strategy": "drop_oldest"
+  },
+  "graphs": [
+    {
+      "name": "shoe_detector",
+      "nodes": [
+        {
+          "id": "in",
+          "type": "input_rtsp",
+          "role": "source",
+          "enable": true,
+          "url": "rtsp://10.0.0.49:8554/cam",
+          "fps": 30,
+          "width": 1920,
+          "height": 1080,
+          "use_mpp": true,
+          "force_tcp": true
+        },
+        {
+          "id": "pre",
+          "type": "preprocess",
+          "role": "filter",
+          "enable": true,
+          "dst_w": 1920,
+          "dst_h": 1080,
+          "dst_format": "rgb",
+          "resize_mode": "stretch",
+          "rga_gate": "shoe_detector",
+          "use_rga": true
+        },
+        {
+          "id": "shoe_det",
+          "type": "ai_shoe_det",
+          "role": "filter",
+          "enable": true,
+          "model_path": "./models/shoe_detector_openimages_ppe_v1.rknn",
+          "model_w": 640,
+          "model_h": 640,
+          "conf": 0.35,
+          "nms": 0.45,
+          "windows": [
+            {"x": 0, "y": 0, "w": 960, "h": 1080},
+            {"x": 960, "y": 0, "w": 960, "h": 1080}
+          ]
+        },
+        {
+          "id": "trk",
+          "type": "tracker",
+          "role": "filter",
+          "enable": true,
+          "mode": "bytetrack_lite",
+          "per_class": true,
+          "track_classes": [0],
+          "max_age_ms": 1500,
+          "min_hits": 2
+        },
+        {
+          "id": "osd",
+          "type": "osd",
+          "role": "filter",
+          "enable": true,
+          "draw_bbox": true,
+          "draw_text": true,
+          "use_rga_bbox": false,
+          "labels": ["shoe"]
+        },
+        {
+          "id": "post",
+          "type": "preprocess",
+          "role": "filter",
+          "enable": true,
+          "dst_w": 1920,
+          "dst_h": 1080,
+          "dst_format": "nv12",
+          "resize_mode": "stretch",
+          "rga_gate": "shoe_detector",
+          "use_rga": true
+        },
+        {
+          "id": "pub",
+          "type": "publish",
+          "role": "filter",
+          "enable": true,
+          "codec": "h264",
+          "fps": 30,
+          "bitrate_kbps": 2000,
+          "use_mpp": true,
+          "outputs": [
+            {"proto": "rtsp_server", "port": 8555, "path": "/live/cam1"}
+          ]
+        }
+      ],
+      "edges": [
+        ["in", "pre"],
+        ["pre", "shoe_det"],
+        ["shoe_det", "trk"],
+        ["trk", "osd"],
+        ["osd", "post"],
+        ["post", "pub"]
+      ]
+    }
+  ]
+}
--- a/docs/requirements/guide.md
+++ b/docs/requirements/guide.md
@ -79,8 +79,11 @@ https://gitcode.com/GitHub_Trending/ul/ultralytics
 pip install -e . rknn-toolkit2
 pip install "onnx==1.16.1"

+激活python环境
+source ./venv_rknn/bin/activate
+
 进入模型目录，执行：
-yolo export model=best.pt format=rknn name=rk3588
+yolo export model=yolov8s_ppe.pt format=rknn name=rk3588


 - 插件可以通过以下方式构建（以ai_face_det_zoned为例）：
--- a/include/utils/sliding_window_detector.h
+++ b/include/utils/sliding_window_detector.h
@ -0,0 +1,262 @@
+/**
+ * sliding_window_detector.h - 通用滑动窗口检测工具
+ * 
+ * 功能：
+ * 1. 支持配置多窗口或自动计算窗口
+ * 2. 从源图裁剪窗口、resize 到模型输入尺寸
+ * 3. 检测结果坐标映射回原图
+ * 4. NMS 合并多窗口结果
+ * 
+ * 用法：
+ *   SlidingWindowDetector swd;
+ *   swd.Init(config);  // 从配置初始化窗口
+ *   auto windows = swd.GetWindows(src_w, src_h);
+ *   for (auto& win : windows) {
+ *       auto input = swd.PrepareInput(frame, win, model_w, model_h);
+ *       // ... 推理 ...
+ *       auto dets = swd.MapDetectionsToOriginal(raw_dets, win, model_w, model_h);
+ *   }
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
+#include "frame/frame.h"
+#include "utils/simple_json.h"
+
+namespace rk3588 {
+
+// 窗口定义
+struct DetectionWindow {
+    int x = 0;
+    int y = 0;
+    int w = 640;
+    int h = 640;
+    
+    bool IsValid() const { return w > 0 && h > 0; }
+};
+
+// 检测框（通用格式）
+struct DetectionBox {
+    float x, y, w, h;  // 左上角 + 宽高
+    float confidence;
+    int class_id;
+};
+
+/**
+ * 滑动窗口检测器
+ */
+class SlidingWindowDetector {
+public:
+    SlidingWindowDetector() = default;
+    
+    /**
+     * 从配置初始化
+     * @param config SimpleJson 配置，支持 "windows" 数组
+     * @return 是否成功
+     */
+    bool InitFromConfig(const SimpleJson& config) {
+        windows_.clear();
+        
+        // 解析预配置窗口
+        if (const SimpleJson* win_arr = config.Find("windows"); win_arr && win_arr->IsArray()) {
+            for (const auto& w : win_arr->AsArray()) {
+                if (w.IsObject()) {
+                    DetectionWindow win;
+                    win.x = w.ValueOr<int>("x", 0);
+                    win.y = w.ValueOr<int>("y", 0);
+                    win.w = w.ValueOr<int>("w", 640);
+                    win.h = w.ValueOr<int>("h", 640);
+                    if (win.IsValid()) {
+                        windows_.push_back(win);
+                    }
+                }
+            }
+        }
+        
+        // 目标 resize 高度（用于自动计算窗口时）
+        target_height_ = config.ValueOr<int>("target_height", 640);
+        
+        return true;
+    }
+    
+    /**
+     * 获取窗口列表（预配置或自动计算）
+     * @param src_w 源图宽度
+     * @param src_h 源图高度
+     * @return 窗口列表
+     */
+    std::vector<DetectionWindow> GetWindows(int src_w, int src_h) const {
+        if (!windows_.empty()) {
+            return windows_;
+        }
+        return CalculateWindowsAuto(src_w, src_h);
+    }
+    
+    /**
+     * 准备模型输入
+     * 从源图裁剪窗口区域，resize 到模型输入尺寸
+     * 
+     * @param frame 源帧
+     * @param win 窗口定义
+     * @param model_w 模型输入宽
+     * @param model_h 模型输入高
+     * @param output 输出缓冲区（model_w * model_h * 3）
+     * @return 是否成功
+     */
+    bool PrepareInput(const FramePtr& frame, 
+                      const DetectionWindow& win,
+                      int model_w, int model_h,
+                      uint8_t* output) const {
+        if (!frame || !frame->data || !output) return false;
+        
+        const int src_w = frame->width;
+        const int src_h = frame->height;
+        
+        // 获取源数据指针
+        const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
+        const int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
+                              : (frame->stride > 0 ? frame->stride : frame->width * 3);
+        
+        // 限制窗口在源图范围内
+        int win_x = std::max(0, std::min(win.x, src_w - 1));
+        int win_y = std::max(0, std::min(win.y, src_h - 1));
+        int win_w = std::min(win.w, src_w - win_x);
+        int win_h = std::min(win.h, src_h - win_y);
+        
+        if (win_w <= 0 || win_h <= 0) return false;
+        
+        // 裁剪窗口
+        std::vector<uint8_t> crop_buf(static_cast<size_t>(win_w) * win_h * 3);
+        for (int row = 0; row < win_h; ++row) {
+            const uint8_t* src_row = src + (win_y + row) * src_stride + win_x * 3;
+            uint8_t* dst_row = crop_buf.data() + row * win_w * 3;
+            memcpy(dst_row, src_row, static_cast<size_t>(win_w) * 3);
+        }
+        
+        // Resize 到模型输入尺寸
+        ResizeRgbBilinear(crop_buf.data(), win_w, win_h, win_w * 3,
+                          output, model_w, model_h, false);
+        
+        return true;
+    }
+    
+    /**
+     * 将检测结果从模型坐标映射回原图坐标
+     * 
+     * @param detections 模型输出的检测框（在 model_w x model_h 坐标系中）
+     * @param win 窗口定义
+     * @param model_w 模型输入宽
+     * @param model_h 模型输入高
+     * @return 映射后的检测框（在原图坐标系中）
+     */
+    std::vector<DetectionBox> MapDetectionsToOriginal(
+            const std::vector<DetectionBox>& detections,
+            const DetectionWindow& win,
+            int model_w, int model_h) const {
+        
+        std::vector<DetectionBox> mapped = detections;
+        
+        float scale_x = static_cast<float>(win.w) / static_cast<float>(model_w);
+        float scale_y = static_cast<float>(win.h) / static_cast<float>(model_h);
+        
+        for (auto& det : mapped) {
+            det.x = win.x + det.x * scale_x;
+            det.y = win.y + det.y * scale_y;
+            det.w *= scale_x;
+            det.h *= scale_y;
+        }
+        
+        return mapped;
+    }
+    
+    /**
+     * 获取预配置窗口数量
+     */
+    size_t GetConfiguredWindowCount() const {
+        return windows_.size();
+    }
+
+private:
+    /**
+     * 自动计算窗口（覆盖整个图像）
+     * 策略：生成重叠的 640x640 窗口网格
+     */
+    std::vector<DetectionWindow> CalculateWindowsAuto(int src_w, int src_h) const {
+        std::vector<DetectionWindow> windows;
+        
+        const int win_size = 640;
+        
+        // 计算步长（带重叠）
+        int step_x = (src_w <= win_size) ? src_w : (src_w - win_size) / ((src_w + win_size - 1) / win_size - 1);
+        int step_y = (src_h <= win_size) ? src_h : (src_h - win_size) / ((src_h + win_size - 1) / win_size - 1);
+        
+        if (step_x < win_size) step_x = win_size;
+        if (step_y < win_size) step_y = win_size;
+        
+        for (int y = 0; y < src_h; y += step_y) {
+            for (int x = 0; x < src_w; x += step_x) {
+                DetectionWindow win;
+                win.x = x;
+                win.y = y;
+                win.w = win_size;
+                win.h = win_size;
+                windows.push_back(win);
+                
+                if (x + win_size >= src_w) break;
+            }
+            if (y + win_size >= src_h) break;
+        }
+        
+        return windows;
+    }
+    
+    /**
+     * RGB 图像双线性 resize
+     * @param swap_rb 是否交换 R/B 通道
+     */
+    static void ResizeRgbBilinear(const uint8_t* src, int src_w, int src_h, int src_stride,
+                                   uint8_t* dst, int dst_w, int dst_h, bool swap_rb) {
+        const float scale_x = static_cast<float>(src_w) / dst_w;
+        const float scale_y = static_cast<float>(src_h) / dst_h;
+        
+        for (int y = 0; y < dst_h; ++y) {
+            float fy = y * scale_y;
+            int y0 = static_cast<int>(fy);
+            int y1 = std::min(y0 + 1, src_h - 1);
+            float dy = fy - y0;
+            
+            for (int x = 0; x < dst_w; ++x) {
+                float fx = x * scale_x;
+                int x0 = static_cast<int>(fx);
+                int x1 = std::min(x0 + 1, src_w - 1);
+                float dx = fx - x0;
+                
+                // 双线性插值
+                for (int c = 0; c < 3; ++c) {
+                    int src_c = swap_rb ? (2 - c) : c;
+                    
+                    float v00 = src[(y0 * src_stride) + (x0 * 3) + src_c];
+                    float v01 = src[(y0 * src_stride) + (x1 * 3) + src_c];
+                    float v10 = src[(y1 * src_stride) + (x0 * 3) + src_c];
+                    float v11 = src[(y1 * src_stride) + (x1 * 3) + src_c];
+                    
+                    float v0 = v00 * (1 - dx) + v01 * dx;
+                    float v1 = v10 * (1 - dx) + v11 * dx;
+                    float v = v0 * (1 - dy) + v1 * dy;
+                    
+                    dst[(y * dst_w + x) * 3 + c] = static_cast<uint8_t>(v);
+                }
+            }
+        }
+    }
+    
+    std::vector<DetectionWindow> windows_;
+    int target_height_ = 640;
+};
+
+}  // namespace rk3588
--- a/models/shoe_detector_openimages_ppe_v1.rknn
+++ b/models/shoe_detector_openimages_ppe_v1.rknn
--- a/plugins/CMakeLists.txt
+++ b/plugins/CMakeLists.txt
@ -512,7 +512,24 @@ if(RK3588_ENABLE_ZLMEDIAKIT AND RK_ZLMK_API_LIB)
    )
 endif()

-install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate storage ai_scheduler
+# ai_shoe_det plugin (shoe detection with sliding window support)
+add_library(ai_shoe_det SHARED
+    ai_shoe_det/ai_shoe_det_node.cpp
+)
+target_include_directories(ai_shoe_det PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
+target_link_libraries(ai_shoe_det PRIVATE project_options Threads::Threads)
+if(RK3588_ENABLE_RKNN AND RK_RKNN_LIB)
+    target_compile_definitions(ai_shoe_det PRIVATE RK3588_ENABLE_RKNN)
+    target_include_directories(ai_shoe_det PRIVATE ${RKNN_RUNTIME_INCLUDE_DIR})
+    target_link_libraries(ai_shoe_det PRIVATE ${RK_RKNN_LIB})
+endif()
+set_target_properties(ai_shoe_det PROPERTIES
+    OUTPUT_NAME "ai_shoe_det"
+    LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
+    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
+)
+
+install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate storage ai_scheduler ai_shoe_det
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
    RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
 )
--- a/plugins/ai_shoe_det/CMakeLists.txt
+++ b/plugins/ai_shoe_det/CMakeLists.txt
@ -0,0 +1,26 @@
+# ai_shoe_det - 鞋子检测节点（支持滑动窗口）
+
+set(PLUGIN_NAME ai_shoe_det)
+
+add_library(${PLUGIN_NAME} SHARED
+    ai_shoe_det_node.cpp
+)
+
+target_include_directories(${PLUGIN_NAME} PRIVATE
+    ${CMAKE_SOURCE_DIR}/include
+    ${CMAKE_SOURCE_DIR}/third_party/rknpu2/runtime/${CMAKE_SYSTEM_PROCESSOR}/include
+)
+
+target_link_libraries(${PLUGIN_NAME} PRIVATE
+    core
+    ${RKNN_LIB}
+)
+
+set_target_properties(${PLUGIN_NAME} PROPERTIES
+    LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/plugins
+)
+
+# 安装
+install(TARGETS ${PLUGIN_NAME}
+    LIBRARY DESTINATION lib/plugins
+)
--- a/plugins/ai_shoe_det/README.md
+++ b/plugins/ai_shoe_det/README.md
@ -0,0 +1,95 @@
+# ai_shoe_det - 鞋子检测节点
+
+专门针对鞋子检测优化的节点，支持滑动窗口提高小目标检测率。
+
+## 特性
+
+- **滑动窗口支持**：可配置多窗口覆盖全图，提高小鞋子检测精度
+- **单类优化**：专门针对 shoe 单类检测优化
+- **自动 NMS**：多窗口结果自动合并去重
+- **轻量快速**：基于 RK3588 NPU 加速
+
+## 配置参数
+
+```json
+{
+  "id": "shoe_det",
+  "type": "ai_shoe_det",
+  "model_path": "./models/shoe_detector.rknn",
+  "model_w": 640,
+  "model_h": 640,
+  "conf": 0.25,
+  "nms": 0.45,
+  "windows": [
+    {"x": 0, "y": 0, "w": 960, "h": 1080},
+    {"x": 960, "y": 0, "w": 960, "h": 1080}
+  ]
+}
+```
+
+### 参数说明
+
+| 参数 | 类型 | 默认值 | 说明 |
+|------|------|--------|------|
+| `model_path` | string | - | RKNN 模型路径 |
+| `model_w` | int | 640 | 模型输入宽度 |
+| `model_h` | int | 640 | 模型输入高度 |
+| `conf` | float | 0.25 | 置信度阈值 |
+| `nms` | float | 0.45 | NMS IoU 阈值 |
+| `windows` | array | - | 滑动窗口配置，不配置则使用全图单窗口 |
+
+### 窗口配置
+
+- **单窗口（全图）**：不配置 `windows` 或配置 `[{"x":0,"y":0,"w":0,"h":0}]`
+- **双窗口（推荐）**：左右各 960x1080
+  ```json
+  "windows": [
+    {"x": 0, "y": 0, "w": 960, "h": 1080},
+    {"x": 960, "y": 0, "w": 960, "h": 1080}
+  ]
+  ```
+
+## Pipeline 示例
+
+```json
+{
+  "nodes": [
+    {"id": "in", "type": "input_rtsp", "url": "rtsp://..."},
+    {"id": "pre", "type": "preprocess", "dst_w": 1920, "dst_h": 1080, "dst_format": "rgb"},
+    {
+      "id": "shoe_det",
+      "type": "ai_shoe_det",
+      "model_path": "./models/shoe_detector_openimages_ppe_v1.rknn",
+      "model_w": 640,
+      "model_h": 640,
+      "conf": 0.25,
+      "windows": [
+        {"x": 0, "y": 0, "w": 960, "h": 1080},
+        {"x": 960, "y": 0, "w": 960, "h": 1080}
+      ]
+    },
+    {"id": "osd", "type": "osd"},
+    {"id": "pub", "type": "publish"}
+  ],
+  "edges": [
+    ["in", "pre"],
+    ["pre", "shoe_det"],
+    ["shoe_det", "osd"],
+    ["osd", "pub"]
+  ]
+}
+```
+
+## 编译
+
+```bash
+cd build
+cmake ..
+make ai_shoe_det -j4
+```
+
+## 注意事项
+
+1. 模型必须是单类（shoe）YOLOv8 格式
+2. 多窗口会增加 NPU 负载（2窗口 = 2倍推理时间）
+3. 窗口之间有重叠时，NMS 会自动去重
--- a/plugins/ai_shoe_det/ai_shoe_det_node.cpp
+++ b/plugins/ai_shoe_det/ai_shoe_det_node.cpp
@ -0,0 +1,349 @@
+/**
+ * ai_shoe_det - 鞋子检测节点（支持滑动窗口）
+ * 
+ * 基于 ai_yolo 实现，添加多窗口检测支持
+ */
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <vector>
+
+#include "ai_scheduler.h"
+#include "frame/frame.h"
+#include "hw/i_infer_backend.h"
+#include "node.h"
+#include "utils/logger.h"
+
+#if defined(RK3588_ENABLE_RKNN)
+#include "rknn_api.h"
+#endif
+
+namespace rk3588 {
+
+struct DetWindow {
+    int x, y, w, h;
+};
+
+struct DetBox {
+    float x, y, w, h;
+    float conf;
+    int class_id;
+};
+
+class AiShoeDetNode : public INode {
+public:
+    std::string Id() const override { return id_; }
+    std::string Type() const override { return "ai_shoe_det"; }
+
+    bool Init(const SimpleJson& config, const NodeContext& ctx) override {
+        id_ = config.ValueOr<std::string>("id", "shoe_det");
+        model_path_ = config.ValueOr<std::string>("model_path", 
+            "./models/shoe_detector.rknn");
+        
+        model_w_ = config.ValueOr<int>("model_w", 640);
+        model_h_ = config.ValueOr<int>("model_h", 640);
+        conf_thresh_ = config.ValueOr<float>("conf", 0.25f);
+        nms_thresh_ = config.ValueOr<float>("nms", 0.45f);
+        
+        // 解析窗口配置
+        windows_.clear();
+        if (const SimpleJson* win_arr = config.Find("windows"); win_arr && win_arr->IsArray()) {
+            for (const auto& w : win_arr->AsArray()) {
+                if (w.IsObject()) {
+                    DetWindow win;
+                    win.x = w.ValueOr<int>("x", 0);
+                    win.y = w.ValueOr<int>("y", 0);
+                    win.w = w.ValueOr<int>("w", 640);
+                    win.h = w.ValueOr<int>("h", 640);
+                    windows_.push_back(win);
+                }
+            }
+        }
+        
+        // 默认单窗口（全图）
+        if (windows_.empty()) {
+            windows_.push_back({0, 0, 0, 0});  // 0表示全图
+        }
+        
+        input_queue_ = ctx.input_queue;
+        output_queues_ = ctx.output_queues;
+        if (!input_queue_) {
+            LogError("[ai_shoe_det] no input queue");
+            return false;
+        }
+        
+        infer_backend_ = ctx.infer_backend;
+        if (!infer_backend_) {
+            LogError("[ai_shoe_det] no infer backend");
+            return false;
+        }
+        
+#if defined(RK3588_ENABLE_RKNN)
+        std::string err;
+        model_handle_ = infer_backend_->LoadModel(model_path_, err);
+        if (model_handle_ == kInvalidModelHandle) {
+            LogError("[ai_shoe_det] failed to load model: " + err);
+            return false;
+        }
+        input_buf_.resize(model_w_ * model_h_ * 3);
+        LogInfo("[ai_shoe_det] model loaded: " + model_path_);
+#else
+        LogWarn("[ai_shoe_det] RKNN disabled");
+#endif
+        
+        return true;
+    }
+
+    bool Start() override {
+        LogInfo("[ai_shoe_det] start, windows=" + std::to_string(windows_.size()));
+        return true;
+    }
+
+    void Stop() override {
+#if defined(RK3588_ENABLE_RKNN)
+        if (model_handle_ != kInvalidModelHandle) {
+            infer_backend_->UnloadModel(model_handle_);
+            model_handle_ = kInvalidModelHandle;
+        }
+#endif
+        LogInfo("[ai_shoe_det] stop");
+    }
+
+    NodeStatus Process(FramePtr frame) override {
+        if (!frame) return NodeStatus::DROP;
+#if defined(RK3588_ENABLE_RKNN)
+        RunDetection(frame);
+#endif
+        Push(frame);
+        return NodeStatus::OK;
+    }
+
+private:
+    void Push(FramePtr frame) {
+        for (auto& q : output_queues_) q->Push(frame);
+    }
+
+#if defined(RK3588_ENABLE_RKNN)
+    
+    void RunDetection(FramePtr frame) {
+        if (!frame->data || frame->data_size == 0) return;
+        
+        const int src_w = frame->width;
+        const int src_h = frame->height;
+        
+        std::vector<DetBox> all_dets;
+        
+        // 对每个窗口进行检测
+        for (const auto& win : windows_) {
+            auto dets = DetectWindow(frame, src_w, src_h, win);
+            all_dets.insert(all_dets.end(), dets.begin(), dets.end());
+        }
+        
+        // NMS
+        all_dets = ApplyNMS(all_dets, nms_thresh_);
+        
+        // 填充结果
+        if (!frame->det) {
+            frame->det = std::make_shared<DetectionResult>();
+        }
+        frame->det->items.clear();
+        frame->det->img_w = src_w;
+        frame->det->img_h = src_h;
+        
+        for (const auto& d : all_dets) {
+            Detection item;
+            item.bbox = {d.x, d.y, d.w, d.h};
+            item.score = d.conf;
+            item.cls_id = d.class_id;
+            frame->det->items.push_back(item);
+        }
+    }
+    
+    std::vector<DetBox> DetectWindow(FramePtr frame, int src_w, int src_h, const DetWindow& win) {
+        std::vector<DetBox> dets;
+        
+        // 确定裁剪区域
+        int win_x, win_y, win_w, win_h;
+        if (win.w == 0 || win.h == 0) {
+            win_x = 0; win_y = 0; win_w = src_w; win_h = src_h;
+        } else {
+            win_x = std::max(0, std::min(win.x, src_w - 1));
+            win_y = std::max(0, std::min(win.y, src_h - 1));
+            win_w = std::min(win.w, src_w - win_x);
+            win_h = std::min(win.h, src_h - win_y);
+        }
+        
+        if (win_w <= 0 || win_h <= 0) return dets;
+        
+        // 获取源数据
+        const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
+        int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
+                        : (frame->stride > 0 ? frame->stride : src_w * 3);
+        
+        if (!src || src_stride <= 0) return dets;
+        
+        // 裁剪到临时缓冲区
+        std::vector<uint8_t> crop_buf(static_cast<size_t>(win_w) * win_h * 3);
+        for (int row = 0; row < win_h; ++row) {
+            const uint8_t* src_row = src + (win_y + row) * src_stride + win_x * 3;
+            uint8_t* dst_row = crop_buf.data() + row * win_w * 3;
+            memcpy(dst_row, src_row, static_cast<size_t>(win_w) * 3);
+        }
+        
+        // Resize 到模型输入尺寸
+        ResizeRgbBilinear(crop_buf.data(), win_w, win_h, win_w * 3,
+                          input_buf_.data(), model_w_, model_h_, model_w_ * 3);
+        
+        // 推理
+        InferInput input;
+        input.width = model_w_;
+        input.height = model_h_;
+        input.is_nhwc = true;
+        input.data = input_buf_.data();
+        input.size = input_buf_.size();
+        input.type = RKNN_TENSOR_UINT8;
+        
+        auto r = infer_backend_->InferBorrowed(model_handle_, input);
+        if (!r.success || r.outputs.empty() || !r.outputs[0].data) {
+            LogWarn("[ai_shoe_det] inference failed");
+            return dets;
+        }
+        
+        // 解析输出
+        dets = ParseOutput(r.outputs, win_x, win_y, win_w, win_h);
+        return dets;
+    }
+    
+    std::vector<DetBox> ParseOutput(const std::vector<AiScheduler::BorrowedOutput>& outputs,
+                                    int win_x, int win_y, int win_w, int win_h) {
+        std::vector<DetBox> dets;
+        if (outputs.empty() || !outputs[0].data) return dets;
+        
+        // 假设 YOLOv8 输出格式: [num_boxes, 5] = x, y, w, h, conf
+        // 实际格式可能不同，需要根据模型调整
+        const float* data = reinterpret_cast<const float*>(outputs[0].data);
+        int num_boxes = 8400;  // YOLOv8 默认
+        
+        float scale_x = static_cast<float>(win_w) / model_w_;
+        float scale_y = static_cast<float>(win_h) / model_h_;
+        
+        for (int i = 0; i < num_boxes; ++i) {
+            float x = data[i * 5 + 0];
+            float y = data[i * 5 + 1];
+            float w = data[i * 5 + 2];
+            float h = data[i * 5 + 3];
+            float conf = data[i * 5 + 4];
+            
+            if (conf < conf_thresh_) continue;
+            
+            DetBox box;
+            box.x = win_x + x * scale_x;
+            box.y = win_y + y * scale_y;
+            box.w = w * scale_x;
+            box.h = h * scale_y;
+            box.conf = conf;
+            box.class_id = 0;
+            dets.push_back(box);
+        }
+        
+        return dets;
+    }
+    
+    std::vector<DetBox> ApplyNMS(std::vector<DetBox>& dets, float thresh) {
+        if (dets.empty()) return dets;
+        
+        std::sort(dets.begin(), dets.end(), 
+                  [](const DetBox& a, const DetBox& b) { return a.conf > b.conf; });
+        
+        std::vector<DetBox> keep;
+        std::vector<bool> suppressed(dets.size(), false);
+        
+        for (size_t i = 0; i < dets.size(); ++i) {
+            if (suppressed[i]) continue;
+            keep.push_back(dets[i]);
+            
+            for (size_t j = i + 1; j < dets.size(); ++j) {
+                if (suppressed[j]) continue;
+                float iou = ComputeIoU(dets[i], dets[j]);
+                if (iou > thresh) {
+                    suppressed[j] = true;
+                }
+            }
+        }
+        
+        return keep;
+    }
+    
+    float ComputeIoU(const DetBox& a, const DetBox& b) {
+        float x1 = std::max(a.x, b.x);
+        float y1 = std::max(a.y, b.y);
+        float x2 = std::min(a.x + a.w, b.x + b.w);
+        float y2 = std::min(a.y + a.h, b.y + b.h);
+        
+        float inter = std::max(0.0f, x2 - x1) * std::max(0.0f, y2 - y1);
+        float area_a = a.w * a.h;
+        float area_b = b.w * b.h;
+        float uni = area_a + area_b - inter;
+        
+        return uni > 0 ? inter / uni : 0;
+    }
+    
+    void ResizeRgbBilinear(const uint8_t* src, int src_w, int src_h, int src_stride,
+                           uint8_t* dst, int dst_w, int dst_h, int dst_stride) {
+        float scale_x = static_cast<float>(src_w) / dst_w;
+        float scale_y = static_cast<float>(src_h) / dst_h;
+        
+        for (int y = 0; y < dst_h; ++y) {
+            float fy = y * scale_y;
+            int y0 = static_cast<int>(fy);
+            int y1 = std::min(y0 + 1, src_h - 1);
+            float dy = fy - y0;
+            
+            for (int x = 0; x < dst_w; ++x) {
+                float fx = x * scale_x;
+                int x0 = static_cast<int>(fx);
+                int x1 = std::min(x0 + 1, src_w - 1);
+                float dx = fx - x0;
+                
+                for (int c = 0; c < 3; ++c) {
+                    float v00 = src[y0 * src_stride + x0 * 3 + c];
+                    float v01 = src[y0 * src_stride + x1 * 3 + c];
+                    float v10 = src[y1 * src_stride + x0 * 3 + c];
+                    float v11 = src[y1 * src_stride + x1 * 3 + c];
+                    
+                    float v = v00 * (1-dx) * (1-dy) +
+                              v01 * dx * (1-dy) +
+                              v10 * (1-dx) * dy +
+                              v11 * dx * dy;
+                    
+                    dst[y * dst_stride + x * 3 + c] = static_cast<uint8_t>(v);
+                }
+            }
+        }
+    }
+    
+#endif
+
+    std::string id_;
+    std::string model_path_;
+    int model_w_ = 640;
+    int model_h_ = 640;
+    float conf_thresh_ = 0.25f;
+    float nms_thresh_ = 0.45f;
+    std::vector<DetWindow> windows_;
+    std::vector<uint8_t> input_buf_;
+    
+    std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
+    std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
+    
+#if defined(RK3588_ENABLE_RKNN)
+    std::shared_ptr<IInferBackend> infer_backend_;
+    ModelHandle model_handle_ = kInvalidModelHandle;
+#endif
+};
+
+REGISTER_NODE(AiShoeDetNode, "ai_shoe_det");
+
+}  // namespace rk3588
--- a/transform/RetinaFace_mobile320.rknn
+++ b/transform/RetinaFace_mobile320.rknn
--- a/transform/yolov8s_ppe.onnx
+++ b/transform/yolov8s_ppe.onnx
--- a/transform/yolov8s_ppe.pt
+++ b/transform/yolov8s_ppe.pt