新增滑动窗口节点，提高了人脸检测和识别能力

2026-03-12 20:12:00 +08:00 · 2026-03-12 20:12:00 +08:00 · 8ae1893f5f
commit 8ae1893f5f
parent d8c492f9ab
14 changed files with 2008 additions and 647 deletions
--- a/configs/full_pipeline_1080p.json
+++ b/configs/full_pipeline_1080p.json
@ -0,0 +1,311 @@
+{
+  "queue": {
+    "size": 8,
+    "strategy": "drop_oldest"
+  },
+  "graphs": [
+    {
+      "name": "cam1_full_pipeline",
+      "nodes": [
+        {
+          "id": "in_cam1",
+          "type": "input_rtsp",
+          "role": "source",
+          "enable": true,
+          "url": "rtsp://10.0.0.49:8554/cam",
+          "fps": 30,
+          "width": 1920,
+          "height": 1080,
+          "use_mpp": true,
+          "use_ffmpeg": false,
+          "force_tcp": true,
+          "reconnect_sec": 5,
+          "reconnect_backoff_max_sec": 30
+        },
+        {
+          "id": "pre_face",
+          "type": "preprocess",
+          "role": "filter",
+          "enable": true,
+          "dst_w": 1920,
+          "dst_h": 1080,
+          "dst_format": "rgb",
+          "dst_packed": true,
+          "resize_mode": "stretch",
+          "keep_ratio": false,
+          "rga_gate": "ppe_detection",
+          "use_rga": true
+        },
+        {
+          "id": "face_det",
+          "type": "ai_scrfd_sliding",
+          "role": "filter",
+          "enable": true,
+          "infer_fps": 5,
+          "model_path": "./models/scrfd_500m_640.rknn",
+          "model_w": 640,
+          "model_h": 640,
+          "windows": [
+            {"x": 0, "y": 0, "w": 960, "h": 1080},
+            {"x": 960, "y": 0, "w": 960, "h": 1080}
+          ],
+          "conf_thresh": 0.5,
+          "nms_thresh": 0.4,
+          "max_faces": 50,
+          "debug": {
+            "stats": true,
+            "stats_interval": 30
+          }
+        },
+        {
+          "id": "face_recog",
+          "type": "ai_face_recog",
+          "role": "filter",
+          "enable": true,
+          "model_path": "./models/mobilefacenet_arcface.rknn",
+          "align": true,
+          "emit_embedding": false,
+          "max_faces": 50,
+          "input_format": "rgb",
+          "input_dtype": "uint8",
+          "threshold": {
+            "accept": 0.45,
+            "margin": 0.05
+          },
+          "gallery": {
+            "backend": "sqlite",
+            "path": "./models/face_gallery.db",
+            "load_on_start": true,
+            "expected_dim": 128,
+            "dtype": "auto"
+          }
+        },
+        {
+          "id": "pre_yolo",
+          "type": "preprocess",
+          "role": "filter",
+          "enable": true,
+          "dst_w": 768,
+          "dst_h": 768,
+          "dst_format": "rgb",
+          "dst_packed": true,
+          "resize_mode": "stretch",
+          "keep_ratio": false,
+          "rga_gate": "ppe_detection",
+          "use_rga": true
+        },
+        {
+          "id": "yolo_ppe",
+          "type": "ai_yolo",
+          "role": "filter",
+          "enable": true,
+          "infer_fps": 5,
+          "model_path": "./models/best-768.rknn",
+          "model_version": "v8",
+          "model_w": 768,
+          "model_h": 768,
+          "num_classes": 11,
+          "conf": 0.25,
+          "nms": 0.45,
+          "debug": {
+            "stats": true,
+            "stats_interval": 30,
+            "detections": false
+          },
+          "class_filter": [3, 6, 10]
+        },
+        {
+          "id": "tracker",
+          "type": "tracker",
+          "role": "filter",
+          "enable": true,
+          "mode": "bytetrack_lite",
+          "per_class": true,
+          "state_key": "ppe_detection",
+          "track_classes": [3, 6, 10],
+          "ignore_classes": [],
+          "allowed_models": ["yolov8"],
+          "high_th": 0.5,
+          "low_th": 0.1,
+          "iou_th": 0.3,
+          "max_age_ms": 1500,
+          "min_hits": 2,
+          "max_tracks": 128
+        },
+        {
+          "id": "logic_boots",
+          "type": "logic_gate",
+          "role": "filter",
+          "enable": true,
+          "mode": "ppe_boots_check",
+          "anchor_class": 6,
+          "boots_class": 3,
+          "color_check": {
+            "enable": true,
+            "method": "hsv",
+            "dark_threshold": 80,
+            "roi_expand": 1.0
+          },
+          "debug": false
+        },
+        {
+          "id": "pre_osd",
+          "type": "preprocess",
+          "role": "filter",
+          "enable": true,
+          "dst_w": 1920,
+          "dst_h": 1080,
+          "dst_format": "nv12",
+          "resize_mode": "stretch",
+          "rga_gate": "ppe_detection",
+          "use_rga": true
+        },
+        {
+          "id": "osd",
+          "type": "osd",
+          "role": "filter",
+          "enable": true,
+          "draw_bbox": true,
+          "draw_text": true,
+          "draw_face_det": true,
+          "draw_face_bbox": true,
+          "line_width": 2,
+          "font_scale": 1,
+          "use_rga_bbox": false,
+          "labels": ["helmet", "gloves", "vest", "boots", "goggles", "none", "Person", "no_helmet", "no_goggle", "no_gloves", "no_boots", "violation"]
+        },
+        {
+          "id": "publish",
+          "type": "publish",
+          "role": "filter",
+          "enable": true,
+          "queue": {"size": 2, "policy": "drop_oldest"},
+          "codec": "h264",
+          "fps": 30,
+          "gop": 60,
+          "bitrate_kbps": 4000,
+          "use_mpp": true,
+          "use_ffmpeg_mux": true,
+          "outputs": [
+            {
+              "proto": "hls",
+              "path": "./web/hls/cam1/index.m3u8",
+              "segment_sec": 2
+            },
+            {
+              "proto": "rtsp_server",
+              "port": 8555,
+              "path": "/live/cam1"
+            }
+          ]
+        },
+        {
+          "id": "alarm",
+          "type": "alarm",
+          "role": "sink",
+          "enable": true,
+          "eval_fps": 10,
+          "labels": ["helmet", "gloves", "vest", "boots", "goggles", "none", "Person", "no_helmet", "no_goggle", "no_gloves", "no_boots", "violation"],
+          "rules": [
+            {
+              "name": "non_compliant_boots",
+              "class_ids": [10],
+              "roi": {"x": 0.0, "y": 0.0, "w": 1.0, "h": 1.0},
+              "min_score": 0.3,
+              "min_box_area_ratio": 0.01,
+              "require_track_id": true,
+              "min_duration_ms": 800,
+              "min_hits": 2,
+              "hit_window_ms": 1000,
+              "cooldown_ms": 5000,
+              "per_track_cooldown_ms": 5000
+            }
+          ],
+          "face_rules": [
+            {
+              "name": "unknown_face",
+              "type": "unknown",
+              "cooldown_ms": 7000,
+              "min_sim": 0.35,
+              "min_hits": 2,
+              "hit_window_ms": 1500,
+              "min_face_area_ratio": 0.01,
+              "min_face_aspect": 0.6,
+              "max_face_aspect": 1.6
+            },
+            {
+              "name": "known_person",
+              "type": "person",
+              "cooldown_ms": 7000,
+              "min_sim": 0.6,
+              "min_hits": 2,
+              "hit_window_ms": 1500,
+              "min_face_area_ratio": 0.01,
+              "min_face_aspect": 0.6,
+              "max_face_aspect": 1.6
+            }
+          ],
+          "actions": {
+            "log": {
+              "enable": true,
+              "level": "info"
+            },
+            "snapshot": {
+              "enable": true,
+              "format": "jpg",
+              "quality": 85,
+              "upload": {
+                "type": "minio",
+                "endpoint": "http://10.0.0.49:9000",
+                "bucket": "myminio",
+                "region": "us-east-1",
+                "access_key": "minioadmin",
+                "secret_key": "minioadmin"
+              }
+            },
+            "clip": {
+              "enable": true,
+              "pre_sec": 5,
+              "post_sec": 10,
+              "format": "mp4",
+              "fps": 30,
+              "upload": {
+                "type": "minio",
+                "endpoint": "http://10.0.0.49:9000",
+                "bucket": "myminio",
+                "region": "us-east-1",
+                "access_key": "minioadmin",
+                "secret_key": "minioadmin"
+              }
+            },
+            "external_api": {
+              "enable": true,
+              "getTokenUrl": "http://10.0.0.49:8080/api/getToken",
+              "putMessageUrl": "http://10.0.0.49:8080/api/putMessage",
+              "tenantCode": "32",
+              "channelNo": "cam1",
+              "timeout_ms": 3000,
+              "include_media_url": true,
+              "token_header": "X-Access-Token",
+              "token_json_path": "responseBody.token",
+              "token_cache_sec": 1200
+            }
+          }
+        }
+      ],
+      "edges": [
+        ["in_cam1", "pre_face"],
+        ["pre_face", "face_det"],
+        ["face_det", "face_recog"],
+        ["face_recog", "pre_yolo"],
+        ["pre_yolo", "yolo_ppe"],
+        ["yolo_ppe", "tracker"],
+        ["tracker", "logic_boots"],
+        ["logic_boots", "osd"],
+        ["osd", "pre_osd"],
+        ["pre_osd", "publish"],
+        ["publish", "alarm"]
+      ]
+    }
+  ]
+}
--- a/configs/test_scrfd_640.json
+++ b/configs/test_scrfd_640.json
@ -14,8 +14,8 @@
          "enable": true,
          "url": "rtsp://10.0.0.49:8554/cam",
          "fps": 30,
-          "width": 1280,
-          "height": 720,
+          "width": 1920,
+          "height": 1080,
          "use_mpp": true,
          "use_ffmpeg": false,
          "force_tcp": true,
@ -33,7 +33,7 @@
          "dst_packed": true,
          "resize_mode": "stretch",
          "keep_ratio": false,
-          "rga_gate": "scrfd_640_test",
+          "rga_gate": "scrfd_1080p",
          "use_rga": true
        },
        {
@ -42,9 +42,9 @@
          "role": "filter",
          "enable": true,
          "model_path": "./models/scrfd_500m_640.rknn",
-          "conf_thresh": 0.5,
+          "conf_thresh": 0.3,
          "nms_thresh": 0.4,
-          "max_faces": 10,
+          "max_faces": 50,
          "output_landmarks": true,
          "input_format": "rgb"
        },
@ -67,11 +67,11 @@
          "type": "preprocess",
          "role": "filter",
          "enable": true,
-          "dst_w": 1280,
-          "dst_h": 720,
+          "dst_w": 1920,
+          "dst_h": 1080,
          "dst_format": "nv12",
          "resize_mode": "stretch",
-          "rga_gate": "scrfd_640_test",
+          "rga_gate": "scrfd_1080p",
          "use_rga": true
        },
        {
@ -83,13 +83,13 @@
          "codec": "h264",
          "fps": 30,
          "gop": 60,
-          "bitrate_kbps": 2000,
+          "bitrate_kbps": 4000,
          "use_mpp": true,
          "use_ffmpeg_mux": true,
          "outputs": [
            {
              "proto": "hls",
-              "path": "./web/hls/scrfd/index.m3u8",
+              "path": "./web/hls/cam1/index.m3u8",
              "segment_sec": 2
            },
            {
--- a/configs/test_scrfd_640_recog.json
+++ b/configs/test_scrfd_640_recog.json
@ -0,0 +1,136 @@
+{
+  "queue": {
+    "size": 8,
+    "strategy": "drop_oldest"
+  },
+  "graphs": [
+    {
+      "name": "scrfd_640_recog_test",
+      "nodes": [
+        {
+          "id": "in_cam1",
+          "type": "input_rtsp",
+          "role": "source",
+          "enable": true,
+          "url": "rtsp://10.0.0.49:8554/cam",
+          "fps": 30,
+          "width": 1920,
+          "height": 1080,
+          "use_mpp": true,
+          "use_ffmpeg": false,
+          "force_tcp": true,
+          "reconnect_sec": 5,
+          "reconnect_backoff_max_sec": 30
+        },
+        {
+          "id": "pre_cam1",
+          "type": "preprocess",
+          "role": "filter",
+          "enable": true,
+          "dst_w": 640,
+          "dst_h": 640,
+          "dst_format": "rgb",
+          "dst_packed": true,
+          "resize_mode": "stretch",
+          "keep_ratio": false,
+          "rga_gate": "scrfd_1080p",
+          "use_rga": true
+        },
+        {
+          "id": "scrfd",
+          "type": "ai_scrfd",
+          "role": "filter",
+          "enable": true,
+          "model_path": "./models/scrfd_500m_640.rknn",
+          "conf_thresh": 0.3,
+          "nms_thresh": 0.4,
+          "max_faces": 50,
+          "output_landmarks": true,
+          "input_format": "rgb"
+        },
+        {
+          "id": "face_recog",
+          "type": "ai_face_recog",
+          "role": "filter",
+          "enable": true,
+          "model_path": "./models/mobilefacenet_arcface.rknn",
+          "align": true,
+          "emit_embedding": false,
+          "max_faces": 50,
+          "input_format": "rgb",
+          "input_dtype": "uint8",
+          "threshold": {
+            "accept": 0.45,
+            "margin": 0.05
+          },
+          "gallery": {
+            "backend": "sqlite",
+            "path": "./models/face_gallery.db",
+            "load_on_start": true,
+            "expected_dim": 512,
+            "dtype": "auto"
+          }
+        },
+        {
+          "id": "osd_cam1",
+          "type": "osd",
+          "role": "filter",
+          "enable": true,
+          "draw_bbox": true,
+          "draw_text": true,
+          "draw_face_det": true,
+          "draw_face_bbox": true,
+          "line_width": 2,
+          "font_scale": 1,
+          "use_rga_bbox": false,
+          "labels": ["face"]
+        },
+        {
+          "id": "post_cam1",
+          "type": "preprocess",
+          "role": "filter",
+          "enable": true,
+          "dst_w": 1920,
+          "dst_h": 1080,
+          "dst_format": "nv12",
+          "resize_mode": "stretch",
+          "rga_gate": "scrfd_1080p",
+          "use_rga": true
+        },
+        {
+          "id": "pub_cam1",
+          "type": "publish",
+          "role": "filter",
+          "enable": true,
+          "queue": {"size": 2, "policy": "drop_oldest"},
+          "codec": "h264",
+          "fps": 30,
+          "gop": 60,
+          "bitrate_kbps": 4000,
+          "use_mpp": true,
+          "use_ffmpeg_mux": true,
+          "outputs": [
+            {
+              "proto": "hls",
+              "path": "./web/hls/cam1/index.m3u8",
+              "segment_sec": 2
+            },
+            {
+              "proto": "rtsp_server",
+              "port": 8555,
+              "path": "/live/cam1"
+            }
+          ]
+        }
+      ],
+      "edges": [
+        ["in_cam1", "pre_cam1"],
+        ["pre_cam1", "scrfd"],
+        ["scrfd", "face_recog"],
+        ["face_recog", "osd_cam1"],
+        ["osd_cam1", "post_cam1"],
+        ["post_cam1", "pub_cam1"]
+      ]
+    }
+  ]
+}
--- a/configs/test_scrfd_sliding_1080p_recog.json
+++ b/configs/test_scrfd_sliding_1080p_recog.json
@ -0,0 +1,139 @@
+{
+  "queue": {
+    "size": 8,
+    "strategy": "drop_oldest"
+  },
+  "graphs": [
+    {
+      "name": "scrfd_sliding_1080p_recog",
+      "nodes": [
+        {
+          "id": "in_cam1",
+          "type": "input_rtsp",
+          "role": "source",
+          "enable": true,
+          "url": "rtsp://10.0.0.49:8554/cam",
+          "fps": 30,
+          "width": 1920,
+          "height": 1080,
+          "use_mpp": true,
+          "use_ffmpeg": false,
+          "force_tcp": true,
+          "reconnect_sec": 5,
+          "reconnect_backoff_max_sec": 30
+        },
+        {
+          "id": "pre_cam1",
+          "type": "preprocess",
+          "role": "filter",
+          "enable": true,
+          "dst_w": 1920,
+          "dst_h": 1080,
+          "dst_format": "rgb",
+          "dst_packed": true,
+          "resize_mode": "stretch",
+          "keep_ratio": false,
+          "rga_gate": "scrfd_sliding_1080p_recog",
+          "use_rga": true
+        },
+        {
+          "id": "scrfd_sliding",
+          "type": "ai_scrfd_sliding",
+          "role": "filter",
+          "enable": true,
+          "model_path": "./models/scrfd_500m_640.rknn",
+          "conf_thresh": 0.5,
+          "nms_thresh": 0.4,
+          "max_faces": 50,
+          "output_landmarks": true,
+          "windows": [
+            {"x": 0, "y": 0, "w": 960, "h": 1080},
+            {"x": 960, "y": 0, "w": 960, "h": 1080}
+          ]
+        },
+        {
+          "id": "face_recog",
+          "type": "ai_face_recog",
+          "role": "filter",
+          "enable": true,
+          "model_path": "./models/mobilefacenet_arcface.rknn",
+          "align": true,
+          "emit_embedding": false,
+          "max_faces": 50,
+          "input_format": "rgb",
+          "input_dtype": "uint8",
+          "threshold": {
+            "accept": 0.45,
+            "margin": 0.05
+          },
+          "gallery": {
+            "backend": "sqlite",
+            "path": "./models/face_gallery.db",
+            "load_on_start": true,
+            "expected_dim": 512,
+            "dtype": "auto"
+          }
+        },
+        {
+          "id": "osd_cam1",
+          "type": "osd",
+          "role": "filter",
+          "enable": true,
+          "draw_bbox": true,
+          "draw_text": true,
+          "draw_face_det": true,
+          "draw_face_bbox": true,
+          "line_width": 2,
+          "font_scale": 1,
+          "use_rga_bbox": false,
+          "labels": ["face"]
+        },
+        {
+          "id": "post_cam1",
+          "type": "preprocess",
+          "role": "filter",
+          "enable": true,
+          "dst_w": 1920,
+          "dst_h": 1080,
+          "dst_format": "nv12",
+          "resize_mode": "stretch",
+          "rga_gate": "scrfd_sliding_1080p_recog",
+          "use_rga": true
+        },
+        {
+          "id": "pub_cam1",
+          "type": "publish",
+          "role": "filter",
+          "enable": true,
+          "queue": {"size": 2, "policy": "drop_oldest"},
+          "codec": "h264",
+          "fps": 30,
+          "gop": 60,
+          "bitrate_kbps": 4000,
+          "use_mpp": true,
+          "use_ffmpeg_mux": true,
+          "outputs": [
+            {
+              "proto": "hls",
+              "path": "./web/hls/cam1/index.m3u8",
+              "segment_sec": 2
+            },
+            {
+              "proto": "rtsp_server",
+              "port": 8555,
+              "path": "/live/cam1"
+            }
+          ]
+        }
+      ],
+      "edges": [
+        ["in_cam1", "pre_cam1"],
+        ["pre_cam1", "scrfd_sliding"],
+        ["scrfd_sliding", "face_recog"],
+        ["face_recog", "osd_cam1"],
+        ["osd_cam1", "post_cam1"],
+        ["post_cam1", "pub_cam1"]
+      ]
+    }
+  ]
+}
--- a/docs/design/detection_parameters_guide.md
+++ b/docs/design/detection_parameters_guide.md
@ -0,0 +1,727 @@
+# 人脸检测参数配置指南
+
+本文档详细说明人脸检测节点（`ai_face_det`, `ai_scrfd`, `ai_scrfd_sliding`）中的核心后处理参数及其对检测结果的影响。
+
+---
+
+## 参数概览
+
+| 参数名 | 类型 | 默认值 | 范围 | 说明 |
+|--------|------|--------|------|------|
+| `conf_thresh` | float | 0.5/0.6/0.7 | 0.0 ~ 1.0 | 置信度阈值，过滤低置信度候选框 |
+| `nms_thresh` | float | 0.4 | 0.0 ~ 1.0 | NMS IoU 阈值，控制重复框去重力度 |
+| `max_faces` | int | 10/50 | ≥ 1 | 单帧最大返回人脸数 |
+
+---
+
+## 1. conf_thresh (置信度阈值)
+
+### 含义
+
+模型对检测到的人脸的置信度（confidence score）阈值。神经网络在推理时会对每个候选框输出一个置信度分数，表示该位置存在人脸的确定性程度。
+
+### 对检测结果的影响
+
+| 设置 | 效果 | 适用场景 |
+|------|------|----------|
+| **调高** (如 0.7) | 减少误检，只保留高置信度人脸 | 高精度需求场景（门禁、考勤） |
+| **调低** (如 0.2) | 增加检出率，更多弱特征人脸被检测 | 弱光、远距离、小目标场景 |
+
+### 注意事项
+
+- 值过高可能导致**漏检**：模糊人脸、侧脸、小人脸可能被过滤
+- 值过低可能导致**误检**：背景中的类似人脸的纹理可能被误判
+
+### 代码实现
+
+在 SCRFD 后处理中 (`ai_scrfd_node.cpp:282`)：
+
+```cpp
+if (score < cfg_.conf_thresh) continue;
+```
+
+在 RetinaFace 后处理中 (`ai_face_det_node.cpp:784`)：
+
+```cpp
+if (score < cfg.conf_thresh) continue;
+```
+
+---
+
+## 2. nms_thresh (NMS IoU 阈值)
+
+### 含义
+
+非极大值抑制（Non-Maximum Suppression）的 IoU（Intersection over Union，交并比）阈值。
+
+**NMS 的作用**：同一个真实人脸可能被多个 anchor/候选框检测到，NMS 用于去除重叠的重复检测框，只保留最优的一个。
+
+**IoU 计算**：
+```
+IoU = 两个框的交集面积 / 两个框的并集面积
+```
+
+### 对检测结果的影响
+
+| 设置 | 效果 | 适用场景 |
+|------|------|----------|
+| **调高** (如 0.6) | 保留更多重叠框，对密集人脸友好 | 多人密集场景（会议室、教室） |
+| **调低** (如 0.3) | 严格去重，只保留最优框 | 单人场景、需要精确框选 |
+
+### 注意事项
+
+- 值过高：同一人脸可能返回多个重叠框
+- 值过低：密集人脸场景可能误删相邻的不同人脸
+
+### 代码实现
+
+在 SCRFD 后处理中 (`ai_scrfd_node.cpp:172`)：
+
+```cpp
+detections = ApplyNMS(detections, cfg_.nms_thresh);
+```
+
+NMS 算法逻辑 (`ai_face_det_node.cpp:156-167`)：
+
+```cpp
+void NmsSorted(const std::vector<Rect>& boxes, const std::vector<float>& scores,
+              float nms_thresh, std::vector<int>& keep) {
+    for (...) {
+        bool suppressed = false;
+        for (int kept : keep) {
+            if (IoU(boxes[idx], boxes[kept]) > nms_thresh) {
+                suppressed = true;  // 被已保留的框抑制
+                break;
+            }
+        }
+        if (!suppressed) keep.push_back(idx);
+    }
+}
+```
+
+---
+
+## 3. max_faces (最大人脸数)
+
+### 含义
+
+单帧图像中最多返回的人脸检测数量限制。
+
+### 对检测结果的影响
+
+| 设置 | 效果 | 性能影响 |
+|------|------|----------|
+| **调高** (如 50) | 可检测更多人脸，不遗漏密集场景目标 | 增加后处理开销，RGA/OSD 绘制负载增大 |
+| **调低** (如 5) | 仅保留置信度最高的前几个人脸 | 减少计算量，提升实时性 |
+
+### 注意事项
+
+- 当画面中出现超过 `max_faces` 数量的人脸时，系统会按置信度排序，只保留前 N 个
+- 设置过大可能导致 RGA 任务堆积，引起 OSD 绘制卡顿
+
+### 代码实现
+
+在 SCRFD 后处理中 (`ai_scrfd_node.cpp:174-176`)：
+
+```cpp
+if (detections.size() > static_cast<size_t>(cfg_.max_faces)) {
+    detections.resize(cfg_.max_faces);
+}
+```
+
+在 RetinaFace 后处理中 (`ai_face_det_node.cpp:840`)：
+
+```cpp
+const int out_n = std::min<int>(cfg.max_faces, static_cast<int>(keep.size()));
+```
+
+---
+
+## 推荐配置
+
+### 按应用场景
+
+| 场景 | conf_thresh | nms_thresh | max_faces | 说明 |
+|------|-------------|------------|-----------|------|
+| **高精度门禁/考勤** | 0.6 ~ 0.7 | 0.4 | 5 ~ 10 | 减少误识别，确保准确率 |
+| **多人大场景** (会议室/教室) | 0.4 ~ 0.5 | 0.4 ~ 0.5 | 20 ~ 50 | 平衡检出率和去重效果 |
+| **实时性优先** | 0.5 | 0.4 | 10 | 减少后处理开销 |
+| **弱光/远距离/小目标** | 0.3 ~ 0.4 | 0.3 | 10 ~ 20 | 提高检出率，但需容忍一定误检 |
+| **单人视频通话** | 0.6 | 0.4 | 1 ~ 3 | 最小化处理开销 |
+
+### 按硬件性能
+
+| 设备性能 | max_faces 建议 | 优化策略 |
+|----------|----------------|----------|
+| **RK3588 高性能模式** | 20 ~ 50 | 可同时处理多路高清视频 |
+| **RK3588 平衡模式** | 10 ~ 20 | 适当降低分辨率和检测频率 |
+| **RK3566/RK3568** | 5 ~ 10 | 降低输入分辨率，提高 conf_thresh 减少候选框 |
+
+---
+
+## 参数联动关系
+
+这三个参数需要协同调整：
+
+1. **提高 `conf_thresh`** → 候选框数量减少 → 可降低 `max_faces` → NMS 压力减小
+2. **降低 `conf_thresh`** → 候选框数量增加 → 可能需要提高 `max_faces` → NMS 压力增大
+3. **密集场景**：适当提高 `nms_thresh` 避免误删相邻人脸，同时确保 `max_faces` 足够大
+
+---
+
+## 配置示例
+
+### SCRFD 配置 (`ai_scrfd` 节点)
+
+```json
+{
+  "type": "ai_scrfd",
+  "model_path": "./models/scrfd_500m_640.rknn",
+  "conf_thresh": 0.5,
+  "nms_thresh": 0.4,
+  "max_faces": 50,
+  "output_landmarks": true,
+  "input_format": "rgb"
+}
+```
+
+### RetinaFace 配置 (`ai_face_det` 节点)
+
+```json
+{
+  "type": "ai_face_det",
+  "model_path": "./models/RetinaFace_mobile320.rknn",
+  "conf": 0.7,
+  "nms": 0.4,
+  "max_faces": 10,
+  "output_landmarks": true,
+  "input_format": "rgb"
+}
+```
+
+### 分区域检测配置 (`ai_face_det_zoned` 节点)
+
+```json
+{
+  "type": "ai_face_det_zoned",
+  "model_path": "./models/RetinaFace_mobile320.rknn",
+  "conf": 0.6,
+  "nms": 0.4,
+  "max_faces": 10,
+  "output_landmarks": true
+}
+```
+
+---
+
+## 调试建议
+
+1. **先调 conf_thresh**：从默认值开始，观察是否漏检或误检
+2. **再调 nms_thresh**：在密集人脸场景测试，确保既不重复框选也不漏检
+3. **最后调 max_faces**：根据实际场景人数和硬件性能调整
+
+### 日志查看
+
+启动时节点会打印当前参数：
+
+```
+[ai_face_det] start id=face_det conf=0.7 nms=0.4 max_faces=10
+[ai_scrfd] start id=scrfd conf=0.5 nms=0.4 max_faces=50
+```
+
+---
+
+## 常见问题
+
+### Q1: 为什么检测到的人脸框会抖动/闪烁？
+
+**可能原因**：
+- `conf_thresh` 设置过低，边缘候选框置信度波动导致时有时无
+- `nms_thresh` 过低，相邻帧选择不同的 anchor
+
+**解决方法**：适当提高 `conf_thresh` 或调整 `nms_thresh`
+
+### Q2: 密集场景漏检严重怎么办？
+
+**解决方法**：
+- 降低 `conf_thresh` 到 0.4 左右
+- 提高 `max_faces` 到 30 以上
+- 适当提高 `nms_thresh` 到 0.5，避免相邻人脸被抑制
+
+### Q3: OSD 绘制卡顿，RGA 任务堆积？
+
+**解决方法**：
+- 降低 `max_faces` 减少绘制负载
+- 提高 `conf_thresh` 减少检测数量
+
+---
+
+## 四、人脸识别参数 (`ai_face_recog`)
+
+`ai_face_recog` 节点接收人脸检测结果，提取人脸特征向量并与特征库进行比对，完成人脸识别。
+
+### 4.1 参数概览
+
+| 参数名 | 类型 | 默认值 | 说明 |
+|--------|------|--------|------|
+| `align` | bool | true | 是否使用5点关键点进行人脸对齐 |
+| `emit_embedding` | bool | false | 是否输出特征向量（用于调试） |
+| `max_faces` | int | 10 | 单帧最大处理人脸数 |
+| `input_format` | string | "rgb" | 输入图像格式：rgb/bgr |
+| `input_dtype` | string | "uint8" | 输入数据类型：uint8/float |
+| `threshold.accept` | float | 0.45 | 识别通过阈值，相似度超过此值才接受 |
+| `threshold.margin` | float | 0.05 | 边距阈值，最佳与次佳匹配的差距要求 |
+| `gallery.backend` | string | "sqlite" | 人脸库后端类型 |
+| `gallery.path` | string | "./models/face_gallery.db" | 人脸库文件路径 |
+
+---
+
+### 4.2 align (人脸对齐)
+
+#### 含义
+
+是否使用检测到的5个面部关键点（眼睛、鼻子、嘴角）进行人脸对齐变换。
+
+#### 对识别效果的影响
+
+| 设置 | 效果 | 适用场景 |
+|------|------|----------|
+| **true** | 对齐后人脸姿态归一化，提高识别准确率 | 高位摄像头、角度倾斜、侧脸场景 |
+| **false** | 直接裁剪人脸区域，计算量略小 | 正面、固定位置场景 |
+
+#### 对齐原理
+
+使用5点关键点与标准模板进行相似变换（Similarity Transform）：
+- 标准模板坐标（112x112输入）：左眼(38.29,51.70)、右眼(73.53,51.50)、鼻尖(56.02,71.74)、左嘴角(41.55,92.37)、右嘴角(70.73,92.20)
+- 代码实现：`ai_face_recog_node.cpp:851-865`
+
+```cpp
+if (cfg->align && face.has_landmarks && model_w_ == 112 && model_h_ == 112) {
+    const std::array<Point2f, 5> dst = { ... };  // 标准模板
+    SimilarityTransform t;
+    InvTransform inv;
+    if (ComputeSimilarity(face.landmarks, dst, t) && InvertSimilarity(t, inv)) {
+        WarpFace(src, w, h, stride, inv, face_buf_.data(), model_w_, model_h_, need_swap);
+    }
+}
+```
+
+---
+
+### 4.3 threshold.accept (接受阈值)
+
+#### 含义
+
+特征向量相似度阈值，范围 `0.0 ~ 1.0`。只有当待识别人脸与库中某人的相似度超过此值时，才认为是匹配成功。
+
+#### 对识别结果的影响
+
+| 设置 | 效果 | 误识率 | 拒识率 |
+|------|------|--------|--------|
+| **调高** (如 0.55) | 更严格，只接受高度相似 | 低 | 高 |
+| **调低** (如 0.35) | 更宽松，容易匹配 | 高 | 低 |
+
+#### 推荐值
+
+| 场景 | 推荐值 | 说明 |
+|------|--------|------|
+| **高安全性场景** | 0.50 ~ 0.55 | 门禁、支付，严格控制误识 |
+| **一般场景** | 0.45 ~ 0.50 | 考勤、签到，平衡准确率和体验 |
+| **快速通行场景** | 0.40 ~ 0.45 | 闸机、通道，减少拒识 |
+
+#### 代码实现
+
+```cpp
+const bool accept = (sr.best_person_id >= 0) &&
+                    (sr.best_sim >= cfg->thr_accept) &&
+                    ((cfg->thr_margin <= 0.0f) || ((sr.best_sim - sr.second_sim) >= cfg->thr_margin));
+```
+
+---
+
+### 4.4 threshold.margin (边距阈值)
+
+#### 含义
+
+要求最佳匹配与次佳匹配的相似度差距至少达到此值，用于排除模糊匹配（如两个人都很像的情况）。设为 `0` 或负数可禁用此检查。
+
+#### 作用示例
+
+假设待识别人脸与库中人员相似度如下：
+- 张三（最佳）: 0.62
+- 李四（次佳）: 0.58
+- 差距: 0.04
+
+如果 `margin = 0.05`，则 0.04 < 0.05，匹配失败（标记为 unknown）
+如果 `margin = 0.03`，则 0.04 > 0.03，匹配成功（识别为张三）
+
+#### 推荐值
+
+- **0.05**（默认）：适合大多数人脸库
+- **0.00** 或负数：禁用边距检查，只依赖 accept 阈值
+
+---
+
+### 4.5 max_faces (最大处理人脸数)
+
+#### 含义
+
+单帧最多处理的人脸数量。由于特征提取需要 NPU 推理，此参数直接影响处理延迟。
+
+#### 与检测节点 max_faces 的关系
+
+```
+实际处理数 = min(face_det.max_faces, face_recog.max_faces)
+```
+
+建议两个节点的 `max_faces` 保持一致或识别节点略小。
+
+---
+
+### 4.6 gallery (人脸库配置)
+
+#### 参数说明
+
+| 参数 | 默认值 | 说明 |
+|------|--------|------|
+| `backend` | "sqlite" | 后端类型，目前仅支持 sqlite |
+| `path` | "./models/face_gallery.db" | 人脸库数据库文件路径 |
+| `load_on_start` | true | 启动时加载到内存 |
+| `expected_dim` | 512 | 特征向量维度（MobileFaceNet 为 512） |
+| `dtype` | "auto" | 数据类型，auto/float32 |
+
+#### 人脸库管理
+
+人脸库使用 SQLite 存储，包含以下信息：
+- `person_id`：人员唯一ID
+- `name`：人员名称
+- `embedding`：特征向量（512维浮点数）
+- 可通过 Web 管理接口或脚本添加/删除/更新人脸
+
+---
+
+### 4.7 normalize (输入归一化)
+
+#### 两种归一化方式
+
+**方式一：缩放+偏移（简单）**
+```json
+{
+  "normalize": {
+    "scale": 0.0078125,
+    "bias": 0.0
+  }
+}
+```
+公式：`output = input * scale + bias`
+
+**方式二：均值+标准差（标准）**
+```json
+{
+  "normalize": {
+    "mean": [127.5, 127.5, 127.5],
+    "std": [128.0, 128.0, 128.0]
+  }
+}
+```
+公式：`output = (input - mean) / std`
+
+#### 默认值
+
+MobileFaceNet 模型通常使用：
+- `scale`: 1.0（不对 uint8 输入做缩放，由模型内部处理）
+- 或 `mean: [127.5,127.5,127.5], std: [127.5,127.5,127.5]` 归一化到 [-1, 1]
+
+---
+
+### 4.8 人脸识别配置示例
+
+```json
+{
+  "id": "face_recog",
+  "type": "ai_face_recog",
+  "role": "filter",
+  "enable": true,
+  "model_path": "./models/mobilefacenet_arcface.rknn",
+  "align": true,
+  "emit_embedding": false,
+  "max_faces": 50,
+  "input_format": "rgb",
+  "input_dtype": "uint8",
+  "threshold": {
+    "accept": 0.45,
+    "margin": 0.05
+  },
+  "gallery": {
+    "backend": "sqlite",
+    "path": "./models/face_gallery.db",
+    "load_on_start": true,
+    "expected_dim": 512,
+    "dtype": "auto"
+  }
+}
+```
+
+---
+
+### 4.9 检测+识别完整流程配置
+
+```json
+{
+  "graphs": [{
+    "nodes": [
+      {
+        "id": "scrfd",
+        "type": "ai_scrfd",
+        "conf_thresh": 0.3,
+        "nms_thresh": 0.4,
+        "max_faces": 50,
+        "output_landmarks": true
+      },
+      {
+        "id": "face_recog",
+        "type": "ai_face_recog",
+        "align": true,
+        "max_faces": 50,
+        "threshold": { "accept": 0.45, "margin": 0.05 },
+        "gallery": { "path": "./models/face_gallery.db" }
+      },
+      {
+        "id": "osd",
+        "type": "osd",
+        "draw_face_det": true,
+        "draw_face_bbox": true
+      }
+    ],
+    "edges": [
+      ["scrfd", "face_recog"],
+      ["face_recog", "osd"]
+    ]
+  }]
+}
+```
+
+---
+
+---
+
+## 五、滑动窗口检测参数 (`ai_scrfd_sliding`)
+
+`ai_scrfd_sliding` 是专为**高分辨率视频**设计的滑动窗口检测节点，通过将画面分割成多个窗口分别检测，有效提升远处小目标的检出率。
+
+### 5.1 节点特性
+
+| 特性 | 说明 |
+|------|------|
+| **原始分辨率输入** | 直接接收原始图像，保留更多细节 |
+| **滑动窗口检测** | 将画面分割成多个窗口，分别检测后合并结果 |
+| **保持宽高比** | 每个窗口 resize 到 640x640，轻微变形但可接受 |
+| **窗口可配置** | 支持自定义窗口数量和位置 |
+
+### 5.2 参数说明
+
+| 参数 | 类型 | 默认值 | 说明 |
+|------|------|--------|------|
+| `model_path` | string | - | SCRFD 模型路径 |
+| `conf_thresh` | float | 0.3 | 置信度阈值 |
+| `nms_thresh` | float | 0.4 | NMS IoU 阈值 |
+| `max_faces` | int | 50 | 最大检测人脸数 |
+| `output_landmarks` | bool | true | 是否输出5点关键点 |
+| `windows` | array | 自动计算 | 窗口配置数组 |
+
+### 5.3 窗口配置 (`windows`)
+
+如果不配置 `windows`，节点会根据输入分辨率自动计算窗口。
+
+**窗口格式**：
+```json
+{
+  "x": 0,      // 窗口左上角 X 坐标
+  "y": 0,      // 窗口左上角 Y 坐标  
+  "w": 960,    // 窗口宽度
+  "h": 1080    // 窗口高度
+}
+```
+
+**窗口设计原则**：
+- 窗口之间应有适当重叠，避免漏检
+- 窗口尺寸建议接近 640x640 的倍数（resize 后变形较小）
+- 对于 16:9 视频，水平分割效果较好
+
+### 5.4 不同分辨率配置参考
+
+#### 1080p (1920×1080) - 推荐2窗口
+
+```json
+{
+  "windows": [
+    {"x": 0, "y": 0, "w": 960, "h": 1080},
+    {"x": 960, "y": 0, "w": 960, "h": 1080}
+  ]
+}
+```
+
+**说明**：
+- 窗口 0：左半边 960x1080
+- 窗口 1：右半边 960x1080
+- 正好覆盖 1920 宽度，无重叠
+- 每个窗口 resize 到 640x640，比例 0.89:1
+
+#### 1440p (2560×1440) - 推荐2窗口
+
+```json
+{
+  "windows": [
+    {"x": 0, "y": 0, "w": 1280, "h": 1440},
+    {"x": 1280, "y": 0, "w": 1280, "h": 1440}
+  ]
+}
+```
+
+**说明**：
+- 窗口 0：左半边 1280x1440
+- 窗口 1：右半边 1280x1440
+- 比例 0.89:1，与 1080p 一致
+
+#### 更高分辨率 - 增加窗口数
+
+对于 4K (3840×2160) 等更高分辨率，可以增加窗口数量：
+
+```json
+{
+  "windows": [
+    {"x": 0, "y": 0, "w": 1280, "h": 1080},
+    {"x": 1280, "y": 0, "w": 1280, "h": 1080},
+    {"x": 2560, "y": 0, "w": 1280, "h": 1080}
+  ]
+}
+```
+
+### 5.5 配置示例
+
+```json
+{
+  "id": "scrfd_sliding",
+  "type": "ai_scrfd_sliding",
+  "role": "filter",
+  "enable": true,
+  "model_path": "./models/scrfd_500m_640.rknn",
+  "conf_thresh": 0.3,
+  "nms_thresh": 0.4,
+  "max_faces": 50,
+  "output_landmarks": true,
+  "windows": [
+    {"x": 0, "y": 0, "w": 960, "h": 1080},
+    {"x": 960, "y": 0, "w": 960, "h": 1080}
+  ]
+}
+```
+
+### 5.6 性能考量
+
+- **窗口数 = 推理次数**：2 个窗口 = 2 次模型推理
+- **分辨率越高，窗口数越多**：需要在检测效果和性能之间平衡
+- **建议窗口数**：
+  - 1080p：2 个窗口
+  - 1440p：2 个窗口（或 4 个窗口用于更精细检测）
+  - 4K：3-4 个窗口
+
+### 5.7 滑动窗口检测常见问题
+
+#### Q7: 窗口边缘的人脸被分割成两半？
+
+**解决方法**：
+- 增加窗口重叠区域（如窗口 0 结束于 1000，窗口 1 开始于 900）
+- NMS 会自动合并重复检测
+
+#### Q8: 远处人脸还是检测不到？
+
+**解决方法**：
+- 增加窗口数量，让每个窗口覆盖更小区域
+- 降低 `conf_thresh` 让更多候选框通过
+- 考虑使用更高分辨率摄像头
+
+#### Q9: 检测延迟增加？
+
+**解决方法**：
+- 减少窗口数量
+- 降低 `max_faces` 减少后处理负担
+- 使用更高性能硬件
+
+---
+
+## 六、综合配置建议
+
+### 6.1 场景配置速查表
+
+| 场景 | 检测节点 | 关键参数 | 说明 |
+|------|----------|----------|------|
+| **门禁/考勤** | `ai_face_det` | conf=0.7, max_faces=5 | 近距离，高精度 |
+| **车间/厂房** | `ai_scrfd_sliding` | 2窗口 | 高位摄像头，大透视 |
+| **会议室** | `ai_scrfd` | conf=0.4, max_faces=50 | 多人场景 |
+| **户外/街道** | `ai_scrfd_sliding` | 2-4窗口 | 远距离检测 |
+
+### 6.2 分辨率配置对照表
+
+| 分辨率 | 检测节点 | 输入处理 | 建议 |
+|--------|----------|----------|------|
+| 720p | `ai_scrfd` | 前置缩放至640 | 通用配置 |
+| 1080p | `ai_scrfd_sliding` | 2窗口(960x1080) | 滑动窗口检测 |
+| 1440p | `ai_scrfd_sliding` | 2窗口(1280x1440) | 滑动窗口检测 |
+| 4K | `ai_scrfd_sliding` | 3-4窗口 | 更多窗口提升精度 |
+
+---
+
+## 七、常见问题汇总
+
+### Q1: 检测框抖动/闪烁
+
+**可能原因**：
+- `conf_thresh` 设置过低，边缘候选框置信度波动
+- `nms_thresh` 过低，相邻帧选择不同 anchor
+
+**解决方法**：适当提高 `conf_thresh` 或调整 `nms_thresh`
+
+### Q2: 密集场景漏检严重？
+
+**解决方法**：
+- 降低 `conf_thresh` 到 0.4 左右
+- 提高 `max_faces` 到 30 以上
+- 适当提高 `nms_thresh` 到 0.5
+
+### Q3: OSD 绘制卡顿？
+
+**解决方法**：
+- 降低 `max_faces` 减少绘制负载
+- 提高 `conf_thresh` 减少检测数量
+
+### Q4: 识别准确率不高？
+
+**可能原因及解决方法**：
+1. **对齐问题**：确保 `align: true`，且检测节点 `output_landmarks: true`
+2. **阈值不合适**：调整 `threshold.accept`，根据实际测试确定最佳值
+3. **人脸库质量**：确保库中人脸照片清晰、正面、光线均匀
+4. **检测框质量**：适当提高检测 `conf_thresh`，过滤低质量检测框
+
+#### Q5: 远距离/小目标识别效果差？
+
+**解决方法**：
+- 提高检测 `conf_thresh`，让只有清晰的人脸进入识别
+- 检查摄像头分辨率，确保人脸区域至少 60x60 像素
+- 考虑使用更高清的摄像头或调整安装角度
+
+#### Q6: 识别延迟高？
+
+**优化方法**：
+- 降低 `max_faces`，减少单帧处理数量
+- 提高检测 `conf_thresh`，减少候选框
+- 确保 `gallery.load_on_start: true`，避免运行时查询数据库
+
+---
+
+## 相关文档
+
+- [SCRFD 模型规格说明](../scrfd_500m_640_spec.md)
+- [YOLO 检测参数配置](../config_guide.md)
+- [DAG 节点与边说明](./dag_graph_node_edge.md)
+- [MobileFaceNet 模型说明](../models.md)
--- a/include/face/face_detection_utils.h
+++ b/include/face/face_detection_utils.h
@ -542,141 +542,98 @@ inline float ExtractNCHW(const TensorView& t, int c, int h, int w, int C, int H,
 }

 /**
- * 解码SCRFD检测结果
+ * 解码SCRFD检测结果 - 与 ai_scrfd 节点使用相同的逻辑
 * 
 * @param outputs       9个输出张量 [score_8, score_16, score_32, bbox_8, bbox_16, bbox_32, kps_8, kps_16, kps_32]
- * @param anchors       预生成的anchor
+ * @param anchors       预生成的anchor (center_x, center_y, stride)
 * @param src_w         原始图像宽度
 * @param src_h         原始图像高度
 * @param model_w       模型输入宽度
 * @param model_h       模型输入高度
- * @param cfg           检测配置
+ * @param conf_thresh   置信度阈值
+ * @param output_lm     是否输出关键点
 * @param out           输出结果
 */
 inline void DecodeScrfd(const std::vector<TensorView>& outputs,
                        const std::vector<ScrfdAnchor>& anchors,
                        int src_w, int src_h,
                        int model_w, int model_h,
-                        const DetectionConfig& cfg,
+                        float conf_thresh,
+                        bool output_lm,
                        FaceDetResult& out) {
-    if (outputs.size() != 9) {
-        return;  // SCRFD需要9个输出
-    }
+    if (outputs.size() != 9) return;
    
-    const float sx = static_cast<float>(src_w) / static_cast<float>(model_w);
-    const float sy = static_cast<float>(src_h) / static_cast<float>(model_h);
-    
-    std::vector<Rect> boxes;
-    std::vector<float> scores;
-    std::vector<std::array<Point2f, 5>> lmks;
+    // Output order: score_8, score_16, score_32, bbox_8, bbox_16, bbox_32, kps_8, kps_16, kps_32
+    const int anchor_counts[] = {12800, 3200, 800};
+    const int strides[] = {8, 16, 32};
    
    size_t anchor_idx = 0;
-    const int strides[3] = {8, 16, 32};
+    float scale_x = static_cast<float>(src_w) / model_w;
+    float scale_y = static_cast<float>(src_h) / model_h;
    
    for (int s = 0; s < 3; ++s) {
-        int score_idx = s;
-        int bbox_idx = s + 3;
-        int kps_idx = s + 6;
        int stride = strides[s];
+        int count = anchor_counts[s];
        
-        const TensorView& score_t = outputs[score_idx];
-        const TensorView& bbox_t = outputs[bbox_idx];
-        const TensorView& kps_t = outputs[kps_idx];
+        // 检查输出数据是否有效
+        if (outputs[s].type != RKNN_TENSOR_FLOAT32 ||
+            outputs[s + 3].type != RKNN_TENSOR_FLOAT32 ||
+            outputs[s + 6].type != RKNN_TENSOR_FLOAT32) {
+            continue;
+        }
        
-        // 检查维度
-        if (score_t.dims.size() < 4 || bbox_t.dims.size() < 4) continue;
+        const float* scores = reinterpret_cast<const float*>(outputs[s].data);
+        const float* bboxes = reinterpret_cast<const float*>(outputs[s + 3].data);
+        const float* kps = reinterpret_cast<const float*>(outputs[s + 6].data);
        
-        int C = static_cast<int>(score_t.dims[1]);
-        int H = static_cast<int>(score_t.dims[2]);
-        int W = static_cast<int>(score_t.dims[3]);
-        int anchors_per_loc = C / 2;  // fg/bg
+        if (!scores || !bboxes || !kps) continue;
        
-        for (int h = 0; h < H; ++h) {
-            for (int w = 0; w < W; ++w) {
-                for (int a = 0; a < anchors_per_loc; ++a) {
+        for (int i = 0; i < count; ++i) {
            if (anchor_idx >= anchors.size()) break;
            
-                    // 提取前景分数 (channel a*2+1)
-                    float score = ExtractNCHW(score_t, a * 2 + 1, h, w, C, H, W);
-                    
-                    if (score >= cfg.conf_thresh) {
-                        const ScrfdAnchor& anchor = anchors[anchor_idx];
-                        
-                        // 提取bbox [dx, dy, dw, dh]
-                        float dx = ExtractNCHW(bbox_t, a * 4 + 0, h, w, 
-                                               static_cast<int>(bbox_t.dims[1]), H, W) * stride;
-                        float dy = ExtractNCHW(bbox_t, a * 4 + 1, h, w,
-                                               static_cast<int>(bbox_t.dims[1]), H, W) * stride;
-                        float dw = ExtractNCHW(bbox_t, a * 4 + 2, h, w,
-                                               static_cast<int>(bbox_t.dims[1]), H, W) * stride;
-                        float dh = ExtractNCHW(bbox_t, a * 4 + 3, h, w,
-                                               static_cast<int>(bbox_t.dims[1]), H, W) * stride;
-                        
-                        float cx = anchor.cx + dx;
-                        float cy = anchor.cy + dy;
-                        float x1 = (cx - dw * 0.5f) * sx;
-                        float y1 = (cy - dh * 0.5f) * sy;
-                        float x2 = (cx + dw * 0.5f) * sx;
-                        float y2 = (cy + dh * 0.5f) * sy;
-                        
-                        x1 = static_cast<float>(ClampInt(static_cast<int>(x1), 0, src_w - 1));
-                        y1 = static_cast<float>(ClampInt(static_cast<int>(y1), 0, src_h - 1));
-                        x2 = static_cast<float>(ClampInt(static_cast<int>(x2), 0, src_w - 1));
-                        y2 = static_cast<float>(ClampInt(static_cast<int>(y2), 0, src_h - 1));
-                        
-                        Rect bb;
-                        bb.x = x1;
-                        bb.y = y1;
-                        bb.w = std::max(0.0f, x2 - x1);
-                        bb.h = std::max(0.0f, y2 - y1);
-                        
-                        if (bb.w > 1.0f && bb.h > 1.0f) {
-                            boxes.push_back(bb);
-                            scores.push_back(score);
-                            
-                            // 提取关键点
-                            if (cfg.output_landmarks) {
-                                std::array<Point2f, 5> pts{};
-                                for (int k = 0; k < 5; ++k) {
-                                    float lx = ExtractNCHW(kps_t, a * 10 + k * 2 + 0, h, w,
-                                                           static_cast<int>(kps_t.dims[1]), H, W) * stride;
-                                    float ly = ExtractNCHW(kps_t, a * 10 + k * 2 + 1, h, w,
-                                                           static_cast<int>(kps_t.dims[1]), H, W) * stride;
-                                    pts[k].x = (anchor.cx + lx) * sx;
-                                    pts[k].y = (anchor.cy + ly) * sy;
-                                }
-                                lmks.push_back(pts);
-                            }
-                        }
+            float score = scores[i];
+            if (score < conf_thresh) {
+                anchor_idx++;
+                continue;
            }
            
-                    ++anchor_idx;
-                }
-            }
+            const ScrfdAnchor& pt = anchors[anchor_idx];
+            
+            // BBox: [left, top, right, bottom] - distances from center
+            float left = bboxes[i * 4 + 0];
+            float top = bboxes[i * 4 + 1];
+            float right = bboxes[i * 4 + 2];
+            float bottom = bboxes[i * 4 + 3];
+            
+            // Decode to image coordinates (640x640)
+            float x1_640 = (pt.cx - left) * stride;
+            float y1_640 = (pt.cy - top) * stride;
+            float x2_640 = (pt.cx + right) * stride;
+            float y2_640 = (pt.cy + bottom) * stride;
+            
+            FaceDetItem det;
+            det.bbox.x = x1_640 * scale_x;
+            det.bbox.y = y1_640 * scale_y;
+            det.bbox.w = (x2_640 - x1_640) * scale_x;
+            det.bbox.h = (y2_640 - y1_640) * scale_y;
+            det.score = score;
+            det.has_landmarks = output_lm;
+            
+            // Keypoints
+            if (output_lm) {
+                for (int p = 0; p < 5; ++p) {
+                    float kps_x = kps[i * 10 + p * 2 + 0];
+                    float kps_y = kps[i * 10 + p * 2 + 1];
+                    float kx_640 = (pt.cx + kps_x) * stride;
+                    float ky_640 = (pt.cy + kps_y) * stride;
+                    det.landmarks[p].x = kx_640 * scale_x;
+                    det.landmarks[p].y = ky_640 * scale_y;
                }
            }
            
-    if (boxes.empty()) return;
-    
-    // NMS
-    std::vector<int> keep;
-    NmsSorted(boxes, scores, cfg.nms_thresh, keep);
-    if (keep.empty()) return;
-    
-    // 构建输出
-    const int out_n = std::min<int>(cfg.max_faces, static_cast<int>(keep.size()));
-    out.faces.reserve(static_cast<size_t>(out_n));
-    for (int i = 0; i < out_n; ++i) {
-        const int k = keep[static_cast<size_t>(i)];
-        FaceDetItem item;
-        item.bbox = boxes[static_cast<size_t>(k)];
-        item.score = scores[static_cast<size_t>(k)];
-        item.track_id = -1;
-        if (cfg.output_landmarks && k < static_cast<int>(lmks.size())) {
-            item.has_landmarks = true;
-            item.landmarks = lmks[static_cast<size_t>(k)];
+            out.faces.push_back(det);
+            anchor_idx++;
        }
-        out.faces.push_back(std::move(item));
    }
 }

--- a/include/face/scrfd_detector.h
+++ b/include/face/scrfd_detector.h
@ -0,0 +1,85 @@
+#pragma once
+
+/**
+ * SCRFD Detector - 可复用的 SCRFD 检测器
+ * 供 ai_scrfd 和 ai_scrfd_zoned 节点使用
+ */
+
+#include <vector>
+#include <cstdint>
+#include "face/face_result.h"
+
+// 包含 AiScheduler 以使用 BorrowedOutput
+#include "ai_scheduler.h"
+
+namespace rk3588 {
+
+/**
+ * SCRFD 检测结果
+ */
+struct ScrfdDetection {
+    FaceDetItem item;
+};
+
+/**
+ * SCRFD 检测器配置
+ */
+struct ScrfdConfig {
+    float conf_thresh = 0.5f;
+    float nms_thresh = 0.4f;
+    int max_faces = 50;
+    bool output_landmarks = true;
+};
+
+/**
+ * SCRFD 检测器
+ * 
+ * 使用示例：
+ *   ScrfdDetector det;
+ *   det.Init(640, 640);
+ *   auto dets = det.Decode(outputs, src_w, src_h, config);
+ */
+class ScrfdDetector {
+public:
+    ScrfdDetector();
+    ~ScrfdDetector();
+    
+    /**
+     * 初始化检测器
+     * @param model_w 模型输入宽度 (640)
+     * @param model_h 模型输入高度 (640)
+     */
+    void Init(int model_w, int model_h);
+    
+    /**
+     * 解码 SCRFD 输出
+     * @param outputs 9个输出张量 (BorrowedOutput)
+     * @param src_w 原始图像宽度
+     * @param src_h 原始图像高度
+     * @param cfg 检测配置
+     * @return 检测结果列表
+     */
+    std::vector<FaceDetItem> Decode(
+        const std::vector<AiScheduler::BorrowedOutput>& outputs,
+        int src_w, int src_h,
+        const ScrfdConfig& cfg);
+    
+    /**
+     * 应用 NMS
+     */
+    std::vector<FaceDetItem> ApplyNMS(
+        std::vector<FaceDetItem>& dets,
+        float nms_thresh);
+
+private:
+    struct CenterPoint {
+        float cx, cy;
+        float stride;
+    };
+    
+    std::vector<CenterPoint> center_points_;
+    int model_w_ = 640;
+    int model_h_ = 640;
+};
+
+} // namespace rk3588
--- a/plugins/CMakeLists.txt
+++ b/plugins/CMakeLists.txt
@ -269,24 +269,6 @@ set_target_properties(ai_face_det PROPERTIES
    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
 )

-# ai_face_det_zoned plugin (RKNN-based RetinaFace with distance zone detection)
-add_library(ai_face_det_zoned SHARED
-    ai_face_det_zoned/ai_face_det_zoned_node.cpp
-    ${CMAKE_SOURCE_DIR}/src/utils/dma_alloc.cpp
-)
-target_include_directories(ai_face_det_zoned PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
-target_link_libraries(ai_face_det_zoned PRIVATE project_options Threads::Threads ai_scheduler)
-if(RK3588_ENABLE_RKNN AND RK_RKNN_LIB)
-    target_compile_definitions(ai_face_det_zoned PRIVATE RK3588_ENABLE_RKNN)
-    target_include_directories(ai_face_det_zoned PRIVATE ${RKNN_RUNTIME_INCLUDE_DIR})
-    target_link_libraries(ai_face_det_zoned PRIVATE ${RK_RKNN_LIB})
-endif()
-set_target_properties(ai_face_det_zoned PROPERTIES
-    OUTPUT_NAME "ai_face_det_zoned"
-    LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
-    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
-)
-
 # ai_scrfd plugin (SCRFD 640x640 face detection)
 add_library(ai_scrfd SHARED
    ai_scrfd/ai_scrfd_node.cpp
@ -305,6 +287,25 @@ set_target_properties(ai_scrfd PROPERTIES
    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
 )

+# ai_scrfd_sliding plugin (SCRFD with sliding window detection)
+add_library(ai_scrfd_sliding SHARED
+    ai_scrfd_sliding/ai_scrfd_sliding_node.cpp
+    ${CMAKE_SOURCE_DIR}/src/face/scrfd_detector.cpp
+    ${CMAKE_SOURCE_DIR}/src/utils/dma_alloc.cpp
+)
+target_include_directories(ai_scrfd_sliding PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
+target_link_libraries(ai_scrfd_sliding PRIVATE project_options Threads::Threads ai_scheduler)
+if(RK3588_ENABLE_RKNN)
+    target_compile_definitions(ai_scrfd_sliding PRIVATE RK3588_ENABLE_RKNN)
+    target_include_directories(ai_scrfd_sliding PRIVATE ${RKNN_RUNTIME_INCLUDE_DIR})
+    target_link_libraries(ai_scrfd_sliding PRIVATE ${RK_RKNN_LIB})
+endif()
+set_target_properties(ai_scrfd_sliding PROPERTIES
+    OUTPUT_NAME "ai_scrfd_sliding"
+    LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
+    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
+)
+
 # ai_face_recog plugin (RKNN-based ArcFace/MobileFaceNet inference)
 add_library(ai_face_recog SHARED
    ai_face_recog/ai_face_recog_node.cpp
@ -511,7 +512,7 @@ if(RK3588_ENABLE_ZLMEDIAKIT AND RK_ZLMK_API_LIB)
    )
 endif()

-install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_face_det_zoned ai_face_recog tracker gate osd alarm logic_gate storage ai_scheduler
+install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate storage ai_scheduler
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
    RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
 )
--- a/plugins/ai_face_det_zoned/ai_face_det_zoned_node.cpp
+++ b/plugins/ai_face_det_zoned/ai_face_det_zoned_node.cpp
@ -1,502 +0,0 @@
-/**
- * ai_face_det_zoned - 三分区距离感知人脸检测节点
- * 
- * 特性：
- * 1. 接收原始分辨率输入（不经过前置缩放）
- * 2. 基于距离进行ROI裁剪和三分区检测
- * 3. 近区(3-5m) 1.0x / 中区(5-7m) 1.3x / 远区(7-9m) 1.8x
- * 4. 复用 face_detection_utils.h 中的公共函数
- */
-
-#include <algorithm>
-#include <array>
-#include <cmath>
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <mutex>
-#include <string>
-#include <vector>
-
-#include "face/face_detection_utils.h"
-#include "hw/i_infer_backend.h"
-#include "face/face_result.h"
-#include "node.h"
-#include "utils/dma_alloc.h"
-#include "utils/logger.h"
-
-namespace rk3588 {
-
-using namespace face_detection;
-
-class AiFaceDetZonedNode : public INode {
-public:
-    std::string Id() const override { return id_; }
-    std::string Type() const override { return "ai_face_det_zoned"; }
-
-    bool Init(const SimpleJson& config, const NodeContext& ctx) override {
-        id_ = config.ValueOr<std::string>("id", "face_det_zoned");
-        model_path_ = config.ValueOr<std::string>("model_path", 
-            "./models/RetinaFace_mobile320.rknn");
-        
-        // 基础检测参数
-        det_cfg_.conf_thresh = config.ValueOr<float>("conf", 0.6f);
-        det_cfg_.nms_thresh = config.ValueOr<float>("nms", 0.4f);
-        det_cfg_.max_faces = config.ValueOr<int>("max_faces", 10);
-        det_cfg_.output_landmarks = config.ValueOr<bool>("output_landmarks", true);
-        
-        // 模型输入尺寸（默认320）
-        model_w_ = config.ValueOr<int>("model_w", 320);
-        model_h_ = config.ValueOr<int>("model_h", 320);
-        
-        // 先验框步长和最小尺寸（RetinaFace默认）
-        det_cfg_.steps = {8, 16, 32};
-        det_cfg_.min_sizes = {{16, 32}, {64, 128}, {256, 512}};
-        
-        // ROI配置 - 支持格式: "roi": {"x": 0, "y": 0, "w": 1920, "h": 1080}
-        roi_enabled_ = false;
-        roi_x_ = roi_y_ = roi_w_ = roi_h_ = 0;
-        if (const SimpleJson* roi = config.Find("roi"); roi && roi->IsObject()) {
-            // 直接读取平级格式
-            roi_x_ = roi->ValueOr<int>("x", 0);
-            roi_y_ = roi->ValueOr<int>("y", 0);
-            roi_w_ = roi->ValueOr<int>("w", 0);
-            roi_h_ = roi->ValueOr<int>("h", 0);
-            
-            // 如果w/h有效，则启用ROI
-            if (roi_w_ > 0 && roi_h_ > 0) {
-                roi_enabled_ = true;
-            }
-            // 兼容旧格式: "roi": {"crop": {...}}
-            else if (const SimpleJson* crop = roi->Find("crop"); crop && crop->IsObject()) {
-                roi_x_ = crop->ValueOr<int>("x", 0);
-                roi_y_ = crop->ValueOr<int>("y", 0);
-                roi_w_ = crop->ValueOr<int>("w", 0);
-                roi_h_ = crop->ValueOr<int>("h", 0);
-                if (roi_w_ > 0 && roi_h_ > 0) {
-                    roi_enabled_ = true;
-                }
-            }
-        }
-        
-        // 三分区配置 - 支持两种格式：
-        // 1. 旧格式: "distance_zones": {"enabled": true, "boundaries": [y1, y2], "scales": [s1, s2, s3]}
-        // 2. 新格式: "zones": {"near_zone": {"y_start": 0, "y_end": 405, "scale": 0.5}, ...}
-        zones_enabled_ = false;
-        boundary_y_5m_ = boundary_y_7m_ = 0;
-        scale_near_ = 1.0f;
-        scale_mid_ = 1.3f;
-        scale_far_ = 1.8f;
-        
-        // 优先尝试新格式 "zones"
-        if (const SimpleJson* zones = config.Find("zones"); 
-            zones && zones->IsObject()) {
-            bool has_near = false, has_mid = false, has_far = false;
-            int near_y_end = 0, mid_y_end = 0;
-            
-            if (const SimpleJson* near = zones->Find("near_zone"); near && near->IsObject()) {
-                near_y_end = near->ValueOr<int>("y_end", 0);
-                scale_near_ = near->ValueOr<float>("scale", 1.0f);
-                has_near = true;
-            }
-            
-            if (const SimpleJson* mid = zones->Find("mid_zone"); mid && mid->IsObject()) {
-                mid_y_end = mid->ValueOr<int>("y_end", 0);
-                scale_mid_ = mid->ValueOr<float>("scale", 1.0f);
-                has_mid = true;
-            }
-            
-            if (const SimpleJson* far = zones->Find("far_zone"); far && far->IsObject()) {
-                scale_far_ = far->ValueOr<float>("scale", 1.0f);
-                has_far = true;
-            }
-            
-            if (has_near && has_mid && has_far) {
-                zones_enabled_ = true;
-                boundary_y_5m_ = near_y_end;  // near和mid的分界
-                boundary_y_7m_ = mid_y_end;   // mid和far的分界
-            }
-        }
-        // 兼容旧格式
-        else if (const SimpleJson* zones = config.Find("distance_zones"); 
-            zones && zones->IsObject()) {
-            zones_enabled_ = zones->ValueOr<bool>("enabled", false);
-            
-            if (const SimpleJson* boundaries = zones->Find("boundaries"); 
-                boundaries && boundaries->IsArray() && boundaries->AsArray().size() >= 2) {
-                boundary_y_5m_ = boundaries->AsArray()[0].AsInt(0);
-                boundary_y_7m_ = boundaries->AsArray()[1].AsInt(0);
-            }
-            
-            if (const SimpleJson* scales = zones->Find("scales"); 
-                scales && scales->IsArray() && scales->AsArray().size() >= 3) {
-                scale_near_ = scales->AsArray()[0].AsNumber(1.0f);
-                scale_mid_ = scales->AsArray()[1].AsNumber(1.3f);
-                scale_far_ = scales->AsArray()[2].AsNumber(1.8f);
-            }
-        }
-        
-        input_queue_ = ctx.input_queue;
-        output_queues_ = ctx.output_queues;
-        if (!input_queue_) {
-            LogError("[ai_face_det_zoned] no input queue for node " + id_);
-            return false;
-        }
-        if (output_queues_.empty()) {
-            LogError("[ai_face_det_zoned] no output queue for node " + id_);
-            return false;
-        }
-        
-        infer_backend_ = ctx.infer_backend;
-        if (!infer_backend_) {
-            LogError("[ai_face_det_zoned] no infer backend for node " + id_);
-            return false;
-        }
-        
-#if defined(RK3588_ENABLE_RKNN)
-        if (model_path_.empty()) {
-            LogError("[ai_face_det_zoned] model_path is required");
-            return false;
-        }
-        std::string err;
-        model_handle_ = infer_backend_->LoadModel(model_path_, err);
-        if (model_handle_ == kInvalidModelHandle) {
-            LogError("[ai_face_det_zoned] failed to load model: " + err);
-            return false;
-        }
-        
-        // 预计算先验框
-        priors_ = GeneratePriors(model_w_, model_h_, det_cfg_.steps, det_cfg_.min_sizes);
-        
-        LogInfo("[ai_face_det_zoned] model loaded: " + model_path_ + 
-                " (" + std::to_string(model_w_) + "x" + std::to_string(model_h_) + 
-                "), priors=" + std::to_string(priors_.size()));
-#else
-        LogWarn("[ai_face_det_zoned] RKNN disabled, will passthrough frames");
-#endif
-        
-        return true;
-    }
-
-    bool Start() override {
-        LogInfo("[ai_face_det_zoned] start id=" + id_ + 
-                " zones=" + std::string(zones_enabled_ ? "enabled" : "disabled") +
-                " roi=" + std::string(roi_enabled_ ? "enabled" : "disabled") +
-                " roi_xywh=" + std::to_string(roi_x_) + "," + std::to_string(roi_y_) + "," +
-                std::to_string(roi_w_) + "," + std::to_string(roi_h_) +
-                " boundaries=" + std::to_string(boundary_y_5m_) + "," + std::to_string(boundary_y_7m_) +
-                " scales=" + std::to_string(scale_near_) + "," + std::to_string(scale_mid_) + "," + std::to_string(scale_far_));
-        return true;
-    }
-
-    void Stop() override {
-#if defined(RK3588_ENABLE_RKNN)
-        if (model_handle_ != kInvalidModelHandle) {
-            infer_backend_->UnloadModel(model_handle_);
-            model_handle_ = kInvalidModelHandle;
-        }
-#endif
-        LogInfo("[ai_face_det_zoned] stop id=" + id_);
-    }
-
-    NodeStatus Process(FramePtr frame) override {
-        if (!frame) return NodeStatus::DROP;
-
-#if defined(RK3588_ENABLE_RKNN)
-        RunZonedDetection(frame);
-#endif
-        
-        Push(frame);
-        return NodeStatus::OK;
-    }
-
-private:
-    void Push(FramePtr frame) {
-        for (auto& q : output_queues_) q->Push(frame);
-    }
-
-#if defined(RK3588_ENABLE_RKNN)
-    
-    // 将RKNN输出转换为TensorView
-    TensorView ConvertToTensorView(const AiScheduler::BorrowedOutput& o) {
-        TensorView tv;
-        tv.data = o.data;
-        tv.size = o.size;
-        tv.zp = o.zp;
-        tv.scale = o.scale;
-        tv.dims = o.dims;
-        tv.type = o.type;
-        return tv;
-    }
-    
-    void RunZonedDetection(FramePtr frame) {
-        if (!frame->data || frame->data_size == 0) return;
-        if (frame->format != PixelFormat::RGB && frame->format != PixelFormat::BGR) {
-            LogWarn("[ai_face_det_zoned] input must be RGB/BGR");
-            return;
-        }
-        
-        const int src_w = frame->width;
-        const int src_h = frame->height;
-        
-        // 应用ROI裁剪
-        int roi_x = 0, roi_y = 0, roi_w = src_w, roi_h = src_h;
-        if (roi_enabled_) {
-            roi_x = ClampInt(roi_x_, 0, src_w - 1);
-            roi_y = ClampInt(roi_y_, 0, src_h - 1);
-            roi_w = ClampInt(roi_w_, 1, src_w - roi_x);
-            roi_h = ClampInt(roi_h_, 1, src_h - roi_y);
-        }
-        
-        std::vector<FaceDetItem> all_detections;
-        
-        if (zones_enabled_) {
-            // 三分区检测
-            all_detections = DetectWithZones(frame, roi_x, roi_y, roi_w, roi_h);
-        } else {
-            // 单区检测（全ROI区域）
-            auto dets = DetectSingleZone(frame, roi_x, roi_y, roi_w, roi_h, 1.0f);
-            // 坐标映射回原始图像
-            for (auto& det : dets) {
-                det.bbox.x += roi_x;
-                det.bbox.y += roi_y;
-                if (det.has_landmarks) {
-                    for (auto& lm : det.landmarks) {
-                        lm.x += roi_x;
-                        lm.y += roi_y;
-                    }
-                }
-                all_detections.push_back(det);
-            }
-        }
-        
-        // NMS去重
-        all_detections = ApplyNMS(all_detections, det_cfg_.nms_thresh);
-        
-        // 限制最大人脸数
-        if (all_detections.size() > static_cast<size_t>(det_cfg_.max_faces)) {
-            all_detections.resize(det_cfg_.max_faces);
-        }
-        
-        // 构建结果
-        FaceDetResult det_result;
-        det_result.img_w = src_w;
-        det_result.img_h = src_h;
-        det_result.model_name = "retinaface_zoned";
-        det_result.faces = std::move(all_detections);
-        
-        frame->face_det = std::make_shared<FaceDetResult>(std::move(det_result));
-    }
-    
-    std::vector<FaceDetItem> DetectWithZones(FramePtr frame, 
-                                             int roi_x, int roi_y, 
-                                             int roi_w, int roi_h) {
-        std::vector<FaceDetItem> all_dets;
-        
-        // 将分界线坐标转换到ROI坐标系
-        int by5 = ClampInt(boundary_y_5m_ - roi_y, 0, roi_h);
-        int by7 = ClampInt(boundary_y_7m_ - roi_y, 0, roi_h);
-        
-        // 确保顺序正确（y大=下方=近距离）
-        if (by5 < by7) std::swap(by5, by7);
-        
-        // 近区检测 (画面下方，y大，近距离3-5m)
-        if (by5 < roi_h) {
-            auto dets = DetectSingleZone(frame, roi_x, roi_y + by5, roi_w, roi_h - by5, scale_near_);
-            for (auto& det : dets) {
-                det.bbox.x += roi_x;
-                det.bbox.y += roi_y + by5;
-                if (det.has_landmarks) {
-                    for (auto& lm : det.landmarks) {
-                        lm.x += roi_x;
-                        lm.y += roi_y + by5;
-                    }
-                }
-                all_dets.push_back(det);
-            }
-        }
-        
-        // 中区检测 (画面中部，中距离5-7m)
-        if (by7 < by5) {
-            auto dets = DetectSingleZone(frame, roi_x, roi_y + by7, roi_w, by5 - by7, scale_mid_);
-            for (auto& det : dets) {
-                det.bbox.x += roi_x;
-                det.bbox.y += roi_y + by7;
-                if (det.has_landmarks) {
-                    for (auto& lm : det.landmarks) {
-                        lm.x += roi_x;
-                        lm.y += roi_y + by7;
-                    }
-                }
-                all_dets.push_back(det);
-            }
-        }
-        
-        // 远区检测 (画面上方，y小，远距离7-9m)
-        if (by7 > 0) {
-            auto dets = DetectSingleZone(frame, roi_x, roi_y, roi_w, by7, scale_far_);
-            for (auto& det : dets) {
-                det.bbox.x += roi_x;
-                det.bbox.y += roi_y;
-                if (det.has_landmarks) {
-                    for (auto& lm : det.landmarks) {
-                        lm.x += roi_x;
-                        lm.y += roi_y;
-                    }
-                }
-                all_dets.push_back(det);
-            }
-        }
-        
-        return all_dets;
-    }
-    
-    std::vector<FaceDetItem> DetectSingleZone(FramePtr frame,
-                                              int x, int y, int w, int h,
-                                              float scale) {
-        std::vector<FaceDetItem> dets;
-        
-        if (w <= 0 || h <= 0) return dets;
-        
-        const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
-        const int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
-                            : (frame->stride > 0 ? frame->stride : frame->width * 3);
-        
-        // 裁剪区域缩放后的尺寸
-        int crop_w = static_cast<int>(w * scale);
-        int crop_h = static_cast<int>(h * scale);
-        if (crop_w <= 0 || crop_h <= 0) return dets;
-        
-        // 分配缓冲区
-        input_buf_.resize(static_cast<size_t>(model_w_) * model_h_ * 3);
-        
-        // 双线性缩放到模型输入尺寸
-        // 注意：这里从原图裁剪(x,y,w,h)，缩放到(model_w_, model_h_)
-        // 优化：可以直接从src裁剪并缩放，避免中间buffer
-        
-        // 简化的处理：先裁剪到临时buffer，再缩放
-        std::vector<uint8_t> crop_buf(static_cast<size_t>(w) * h * 3);
-        for (int row = 0; row < h; ++row) {
-            const uint8_t* src_row = src + (y + row) * src_stride + x * 3;
-            uint8_t* dst_row = crop_buf.data() + row * w * 3;
-            memcpy(dst_row, src_row, static_cast<size_t>(w) * 3);
-        }
-        
-        // 缩放到模型输入尺寸
-        ResizeRgbBilinear(crop_buf.data(), w, h, w * 3,
-                          input_buf_.data(), model_w_, model_h_,
-                          false);  // 假设输入已经是RGB
-        
-        // NPU推理
-        InferInput input;
-        input.width = model_w_;
-        input.height = model_h_;
-        input.is_nhwc = true;
-        input.data = input_buf_.data();
-        input.size = input_buf_.size();
-        input.type = RKNN_TENSOR_UINT8;
-        
-        auto r = infer_backend_->InferBorrowed(model_handle_, input);
-        if (!r.success || r.outputs.empty()) {
-            LogWarn("[ai_face_det_zoned] inference failed");
-            return dets;
-        }
-        
-        // 解析输出
-        NcTensor loc_tensor, conf_tensor, landm_tensor;
-        bool has_loc = false, has_conf = false, has_landm = false;
-        
-        for (const auto& o : r.outputs) {
-            TensorView tv = ConvertToTensorView(o);
-            NcTensor tmp;
-            if (!has_loc && ExtractNcTensor(tv, 4, tmp)) {
-                loc_tensor = std::move(tmp);
-                has_loc = true;
-            } else if (!has_conf && ExtractNcTensor(tv, 2, tmp)) {
-                conf_tensor = std::move(tmp);
-                has_conf = true;
-            } else if (!has_landm && ExtractNcTensor(tv, 10, tmp)) {
-                landm_tensor = std::move(tmp);
-                has_landm = true;
-            }
-        }
-        
-        if (!has_loc || !has_conf) return dets;
-        
-        // 解码检测结果
-        FaceDetResult result;
-        DecodeRetinaFace(loc_tensor, conf_tensor, landm_tensor,
-                         priors_, w, h, model_w_, model_h_,
-                         det_cfg_, result);
-        
-        if (!result.faces.empty()) {
-            LogInfo("[ai_face_det_zoned] DetectSingleZone: detected " + 
-                    std::to_string(result.faces.size()) + " faces, max_score=" +
-                    std::to_string(result.faces.empty() ? 0 : result.faces[0].score));
-        }
-        
-        return result.faces;
-    }
-    
-    std::vector<FaceDetItem> ApplyNMS(std::vector<FaceDetItem>& dets, float threshold) {
-        if (dets.empty()) return dets;
-        
-        // 按置信度排序
-        std::sort(dets.begin(), dets.end(), 
-                  [](const FaceDetItem& a, const FaceDetItem& b) {
-                      return a.score > b.score;
-                  });
-        
-        std::vector<FaceDetItem> keep;
-        std::vector<bool> suppressed(dets.size(), false);
-        
-        for (size_t i = 0; i < dets.size(); ++i) {
-            if (suppressed[i]) continue;
-            keep.push_back(dets[i]);
-            
-            for (size_t j = i + 1; j < dets.size(); ++j) {
-                if (suppressed[j]) continue;
-                if (IoU(dets[i].bbox, dets[j].bbox) > threshold) {
-                    suppressed[j] = true;
-                }
-            }
-        }
-        
-        return keep;
-    }
-    
-#endif
-
-    std::string id_;
-    std::string model_path_;
-    
-    DetectionConfig det_cfg_;
-    int model_w_ = 320;
-    int model_h_ = 320;
-    
-    // ROI
-    bool roi_enabled_ = false;
-    int roi_x_ = 0, roi_y_ = 0, roi_w_ = 0, roi_h_ = 0;
-    
-    // 三分区
-    bool zones_enabled_ = false;
-    int boundary_y_5m_ = 0;
-    int boundary_y_7m_ = 0;
-    float scale_near_ = 1.0f;
-    float scale_mid_ = 1.3f;
-    float scale_far_ = 1.8f;
-    
-    std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
-    std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
-    std::shared_ptr<IInferBackend> infer_backend_;
-    
-#if defined(RK3588_ENABLE_RKNN)
-    ModelHandle model_handle_ = kInvalidModelHandle;
-    std::vector<Prior> priors_;
-    std::vector<uint8_t> input_buf_;
-#endif
-};
-
-REGISTER_NODE(AiFaceDetZonedNode, "ai_face_det_zoned");
-
-}  // namespace rk3588
--- a/plugins/ai_scrfd_sliding/ai_scrfd_sliding_node.cpp
+++ b/plugins/ai_scrfd_sliding/ai_scrfd_sliding_node.cpp
@ -0,0 +1,311 @@
+/**
+ * ai_scrfd_sliding - SCRFD with sliding window detection
+ * 
+ * Features:
+ * 1. Resize input to target height (640) keeping approximate ratio
+ * 2. Split into multiple 640x640 windows
+ * 3. Detect on each window and merge results
+ * 
+ * For 1080p: resize to 1280x640, 2 windows
+ * For 1440p: resize to 2560x640, 4 windows
+ */
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "face/face_detection_utils.h"
+#include "face/face_result.h"
+#include "face/scrfd_detector.h"
+#include "hw/i_infer_backend.h"
+#include "node.h"
+#include "utils/dma_alloc.h"
+#include "utils/logger.h"
+
+namespace rk3588 {
+
+using namespace face_detection;
+
+class AiScrfdSlidingNode : public INode {
+public:
+    std::string Id() const override { return id_; }
+    std::string Type() const override { return "ai_scrfd_sliding"; }
+
+    bool Init(const SimpleJson& config, const NodeContext& ctx) override {
+        id_ = config.ValueOr<std::string>("id", "scrfd_sliding");
+        model_path_ = config.ValueOr<std::string>("model_path", 
+            "./models/scrfd_500m_640.rknn");
+        
+        // Detection parameters
+        det_cfg_.conf_thresh = config.ValueOr<float>("conf_thresh", 0.3f);
+        det_cfg_.nms_thresh = config.ValueOr<float>("nms_thresh", 0.4f);
+        det_cfg_.max_faces = config.ValueOr<int>("max_faces", 50);
+        det_cfg_.output_landmarks = config.ValueOr<bool>("output_landmarks", true);
+        
+        model_w_ = 640;
+        model_h_ = 640;
+        
+        // Initialize detector
+        detector_.Init(model_w_, model_h_);
+        
+        // Parse sliding windows config
+        // If not configured, auto-calculate based on input resolution
+        windows_.clear();
+        if (const SimpleJson* win_arr = config.Find("windows"); win_arr && win_arr->IsArray()) {
+            for (const auto& w : win_arr->AsArray()) {
+                if (w.IsObject()) {
+                    Window win;
+                    win.x = w.ValueOr<int>("x", 0);
+                    win.y = w.ValueOr<int>("y", 0);
+                    win.w = w.ValueOr<int>("w", 640);
+                    win.h = w.ValueOr<int>("h", 640);
+                    windows_.push_back(win);
+                }
+            }
+        }
+        
+        // Target resize height (default 640)
+        target_height_ = config.ValueOr<int>("target_height", 640);
+        
+        input_queue_ = ctx.input_queue;
+        output_queues_ = ctx.output_queues;
+        if (!input_queue_) {
+            LogError("[ai_scrfd_sliding] no input queue");
+            return false;
+        }
+        
+        infer_backend_ = ctx.infer_backend;
+        if (!infer_backend_) {
+            LogError("[ai_scrfd_sliding] no infer backend");
+            return false;
+        }
+        
+#if defined(RK3588_ENABLE_RKNN)
+        std::string err;
+        model_handle_ = infer_backend_->LoadModel(model_path_, err);
+        if (model_handle_ == kInvalidModelHandle) {
+            LogError("[ai_scrfd_sliding] failed to load model: " + err);
+            return false;
+        }
+        
+        input_buf_.resize(model_w_ * model_h_ * 3);
+        
+        LogInfo("[ai_scrfd_sliding] model loaded: " + model_path_);
+#else
+        LogWarn("[ai_scrfd_sliding] RKNN disabled");
+#endif
+        
+        return true;
+    }
+
+    bool Start() override {
+        LogInfo("[ai_scrfd_sliding] start, windows=" + std::to_string(windows_.size()));
+        return true;
+    }
+
+    void Stop() override {
+#if defined(RK3588_ENABLE_RKNN)
+        if (model_handle_ != kInvalidModelHandle) {
+            infer_backend_->UnloadModel(model_handle_);
+            model_handle_ = kInvalidModelHandle;
+        }
+#endif
+        LogInfo("[ai_scrfd_sliding] stop");
+    }
+
+    NodeStatus Process(FramePtr frame) override {
+        if (!frame) return NodeStatus::DROP;
+        
+#if defined(RK3588_ENABLE_RKNN)
+        RunDetection(frame);
+#endif
+        
+        Push(frame);
+        return NodeStatus::OK;
+    }
+
+private:
+    struct Window {
+        int x, y, w, h;
+    };
+    
+    void Push(FramePtr frame) {
+        for (auto& q : output_queues_) q->Push(frame);
+    }
+
+#if defined(RK3588_ENABLE_RKNN)
+    
+    void RunDetection(FramePtr frame) {
+        if (!frame->data || frame->data_size == 0) return;
+        
+        const int src_w = frame->width;
+        const int src_h = frame->height;
+        
+        if (frame->DmaFd() >= 0) frame->SyncStart();
+        
+        // Calculate windows if not pre-configured
+        std::vector<Window> windows = windows_;
+        if (windows.empty()) {
+            windows = CalculateWindows(src_w, src_h);
+        }
+        
+        std::vector<FaceDetItem> all_detections;
+        
+        const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
+        const int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
+                            : (frame->stride > 0 ? frame->stride : frame->width * 3);
+        
+        // Process each window - crop from original, then resize to 640x640
+        for (size_t i = 0; i < windows.size(); ++i) {
+            const auto& win = windows[i];
+            auto dets = DetectWindowFromSource(src, src_w, src_h, src_stride, win);
+            
+            // Detections are already in original coordinates
+            all_detections.insert(all_detections.end(), dets.begin(), dets.end());
+        }
+        
+        // Apply NMS
+        all_detections = detector_.ApplyNMS(all_detections, det_cfg_.nms_thresh);
+        
+        if (all_detections.size() > static_cast<size_t>(det_cfg_.max_faces)) {
+            all_detections.resize(det_cfg_.max_faces);
+        }
+        
+        FaceDetResult result;
+        result.img_w = src_w;
+        result.img_h = src_h;
+        result.model_name = "scrfd_sliding";
+        result.faces = std::move(all_detections);
+        
+        frame->face_det = std::make_shared<FaceDetResult>(std::move(result));
+    }
+    
+    std::vector<Window> CalculateWindows(int src_w, int src_h) {
+        std::vector<Window> windows;
+        
+        // Strategy: Split source image into overlapping 640x640 regions
+        // For 1080p: 1920x1080 -> 3x2 grid (6 windows)
+        // For 1440p: 2560x1440 -> 4x2 grid (8 windows)
+        
+        // Calculate step size (with overlap)
+        int step_x = (src_w <= 640) ? src_w : (src_w - 640) / ((src_w + 639) / 640 - 1);
+        int step_y = (src_h <= 640) ? src_h : (src_h - 640) / ((src_h + 639) / 640 - 1);
+        
+        if (step_x < 640) step_x = 640;
+        if (step_y < 640) step_y = 640;
+        
+        for (int y = 0; y < src_h; y += step_y) {
+            for (int x = 0; x < src_w; x += step_x) {
+                Window win;
+                win.x = x;
+                win.y = y;
+                win.w = 640;
+                win.h = 640;
+                windows.push_back(win);
+                
+                // Stop if we've covered the width
+                if (x + 640 >= src_w) break;
+            }
+            // Stop if we've covered the height
+            if (y + 640 >= src_h) break;
+        }
+        
+        LogInfo("[ai_scrfd_sliding] Auto-calculated: " + std::to_string(windows.size()) + " windows for " + std::to_string(src_w) + "x" + std::to_string(src_h));
+        
+        return windows;
+    }
+    
+    std::vector<FaceDetItem> DetectWindowFromSource(const uint8_t* src, int src_w, int src_h, int src_stride, const Window& win) {
+        std::vector<FaceDetItem> dets;
+        
+        // Clamp window to source bounds
+        int win_x = std::max(0, std::min(win.x, src_w - 1));
+        int win_y = std::max(0, std::min(win.y, src_h - 1));
+        int win_w = std::min(win.w, src_w - win_x);
+        int win_h = std::min(win.h, src_h - win_y);
+        
+        if (win_w <= 0 || win_h <= 0) {
+            LogWarn("[ai_scrfd_sliding] Invalid window");
+            return dets;
+        }
+        
+        // Crop from source
+        std::vector<uint8_t> crop_buf(static_cast<size_t>(win_w) * win_h * 3);
+        for (int row = 0; row < win_h; ++row) {
+            const uint8_t* src_row = src + (win_y + row) * src_stride + win_x * 3;
+            uint8_t* dst_row = crop_buf.data() + row * win_w * 3;
+            memcpy(dst_row, src_row, static_cast<size_t>(win_w) * 3);
+        }
+        
+        // Resize to 640x640
+        std::vector<uint8_t> model_input(640 * 640 * 3);
+        ResizeRgbBilinear(crop_buf.data(), win_w, win_h, win_w * 3,
+                          model_input.data(), 640, 640, false);
+        
+        // NPU inference
+        InferInput input;
+        input.width = 640;
+        input.height = 640;
+        input.is_nhwc = true;
+        input.data = model_input.data();
+        input.size = model_input.size();
+        input.type = RKNN_TENSOR_UINT8;
+        
+        auto r = infer_backend_->InferBorrowed(model_handle_, input);
+        if (!r.success || r.outputs.empty()) {
+            LogWarn("[ai_scrfd_sliding] inference failed");
+            return dets;
+        }
+        
+        // Decode (get detections in 640x640 coordinates)
+        dets = detector_.Decode(r.outputs, 640, 640, det_cfg_);
+        
+        // Map back to original coordinates
+        float scale_x = static_cast<float>(win_w) / 640.0f;
+        float scale_y = static_cast<float>(win_h) / 640.0f;
+        
+        for (auto& det : dets) {
+            det.bbox.x = win_x + det.bbox.x * scale_x;
+            det.bbox.y = win_y + det.bbox.y * scale_y;
+            det.bbox.w *= scale_x;
+            det.bbox.h *= scale_y;
+            if (det.has_landmarks) {
+                for (auto& lm : det.landmarks) {
+                    lm.x = win_x + lm.x * scale_x;
+                    lm.y = win_y + lm.y * scale_y;
+                }
+            }
+        }
+        
+        return dets;
+    }
+    
+#endif
+
+    std::string id_;
+    std::string model_path_;
+    ScrfdConfig det_cfg_;
+    ScrfdDetector detector_;
+    int model_w_ = 640;
+    int model_h_ = 640;
+    int target_height_ = 640;
+    
+    std::vector<Window> windows_;
+    
+    std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
+    std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
+    std::shared_ptr<IInferBackend> infer_backend_;
+    
+#if defined(RK3588_ENABLE_RKNN)
+    ModelHandle model_handle_ = kInvalidModelHandle;
+    std::vector<uint8_t> input_buf_;
+#endif
+};
+
+REGISTER_NODE(AiScrfdSlidingNode, "ai_scrfd_sliding");
+
+} // namespace rk3588
--- a/plugins/alarm/alarm_node.cpp
+++ b/plugins/alarm/alarm_node.cpp
@ -428,10 +428,7 @@ public:
                for (const auto& d : frame->det->items) {
                    if (d.cls_id == 10) no_boots_count++;
                }
-                if (no_boots_count > 0 || processed_frames_ % 30 == 0) {
-                    LogInfo("[alarm] frame received, dets=" + std::to_string(frame->det->items.size()) +
-                           " no_boots=" + std::to_string(no_boots_count));
-                }
+                // Log throttled
            }

            if (eval_interval_ms_ > 0 && frame->pts > 0) {
--- a/plugins/logic_gate/logic_gate_node.cpp
+++ b/plugins/logic_gate/logic_gate_node.cpp
@ -128,6 +128,30 @@ private:
        }
    }
    
+    // 将检测坐标（相对于原始图像）映射到当前帧坐标
+    Rect MapDetCoordToFrame(const Rect& det_bbox, FramePtr frame) {
+        if (!frame->transform_meta || !frame->transform_meta->valid) {
+            return det_bbox;  // 无变换信息，直接使用
+        }
+        
+        const auto& meta = *frame->transform_meta;
+        if (meta.src_w <= 0 || meta.src_h <= 0 || frame->width <= 0 || frame->height <= 0) {
+            return det_bbox;
+        }
+        
+        // 计算缩放因子：检测坐标是基于 src_w x src_h 的
+        float scale_x = static_cast<float>(frame->width) / meta.src_w;
+        float scale_y = static_cast<float>(frame->height) / meta.src_h;
+        
+        Rect mapped;
+        mapped.x = det_bbox.x * scale_x;
+        mapped.y = det_bbox.y * scale_y;
+        mapped.w = det_bbox.w * scale_x;
+        mapped.h = det_bbox.h * scale_y;
+        
+        return mapped;
+    }
+    
    void ProcessPpeBootsCheck(FramePtr frame) {
        const auto& detections = frame->det->items;
        
@ -145,7 +169,12 @@ private:
        
        if (config_.debug) {
            LogInfo("[LogicGateNode] Persons=" + std::to_string(persons.size()) + 
-                    " Boots=" + std::to_string(boots.size()));
+                    " Boots=" + std::to_string(boots.size()) +
+                    " Frame=" + std::to_string(frame->width) + "x" + std::to_string(frame->height));
+            if (frame->transform_meta && frame->transform_meta->valid) {
+                LogInfo("[LogicGateNode] TransformMeta: src=" + std::to_string(frame->transform_meta->src_w) + 
+                        "x" + std::to_string(frame->transform_meta->src_h));
+            }
        }
        
        // 简化逻辑：必须同时检测到人和鞋，才开始判断
@ -158,7 +187,21 @@ private:
        // 对每只鞋进行颜色检查
        for (const auto& boot : boots) {
            if (config_.enable_color_check && color_analyzer_) {
-                auto color_result = color_analyzer_->Analyze(*frame, boot.bbox);
+                // 将检测坐标映射到当前帧坐标
+                Rect mapped_bbox = MapDetCoordToFrame(boot.bbox, frame);
+                
+                if (config_.debug) {
+                    LogInfo("[LogicGateNode] Boot bbox: [" + std::to_string(static_cast<int>(boot.bbox.x)) + 
+                            "," + std::to_string(static_cast<int>(boot.bbox.y)) + 
+                            " " + std::to_string(static_cast<int>(boot.bbox.w)) + 
+                            "x" + std::to_string(static_cast<int>(boot.bbox.h)) + 
+                            "] -> Mapped: [" + std::to_string(static_cast<int>(mapped_bbox.x)) + 
+                            "," + std::to_string(static_cast<int>(mapped_bbox.y)) + 
+                            " " + std::to_string(static_cast<int>(mapped_bbox.w)) + 
+                            "x" + std::to_string(static_cast<int>(mapped_bbox.h)) + "]");
+                }
+                
+                auto color_result = color_analyzer_->Analyze(*frame, mapped_bbox);
                
                if (config_.debug) {
                    LogInfo("[LogicGateNode] Boot brightness=" + 
--- a/src/face/scrfd_detector.cpp
+++ b/src/face/scrfd_detector.cpp
@ -0,0 +1,157 @@
+/**
+ * SCRFD Detector Implementation
+ */
+
+#include "face/scrfd_detector.h"
+#include "ai_scheduler.h"  // For BorrowedOutput
+#include "face/face_detection_utils.h"
+#include <algorithm>
+#include <cstring>
+
+namespace rk3588 {
+
+ScrfdDetector::ScrfdDetector() = default;
+ScrfdDetector::~ScrfdDetector() = default;
+
+void ScrfdDetector::Init(int model_w, int model_h) {
+    model_w_ = model_w;
+    model_h_ = model_h;
+    
+    // Generate center points
+    const int strides[] = {8, 16, 32};
+    
+    for (int stride : strides) {
+        int num_grid = model_w_ / stride;
+        for (int y = 0; y < num_grid; ++y) {
+            for (int x = 0; x < num_grid; ++x) {
+                // 2 anchors per location
+                for (int a = 0; a < 2; ++a) {
+                    CenterPoint pt;
+                    pt.cx = static_cast<float>(x);
+                    pt.cy = static_cast<float>(y);
+                    pt.stride = static_cast<float>(stride);
+                    center_points_.push_back(pt);
+                }
+            }
+        }
+    }
+}
+
+std::vector<FaceDetItem> ScrfdDetector::Decode(
+    const std::vector<AiScheduler::BorrowedOutput>& outputs,
+    int src_w, int src_h,
+    const ScrfdConfig& cfg) {
+    
+    std::vector<FaceDetItem> detections;
+    
+    if (outputs.size() != 9) return detections;
+    
+    // Output order: score_8, score_16, score_32, bbox_8, bbox_16, bbox_32, kps_8, kps_16, kps_32
+    const int anchor_counts[] = {12800, 3200, 800};
+    const int strides[] = {8, 16, 32};
+    
+    size_t anchor_idx = 0;
+    
+    for (int s = 0; s < 3; ++s) {
+        int stride = strides[s];
+        int count = anchor_counts[s];
+        
+        const auto& score_out = outputs[s];
+        const auto& bbox_out = outputs[s + 3];
+        const auto& kps_out = outputs[s + 6];
+        
+        if (score_out.dims.size() < 3) continue;
+        
+        const float* scores = reinterpret_cast<const float*>(score_out.data);
+        const float* bboxes = reinterpret_cast<const float*>(bbox_out.data);
+        const float* kps = reinterpret_cast<const float*>(kps_out.data);
+        
+        if (!scores || !bboxes || !kps) continue;
+        
+        for (int i = 0; i < count; ++i) {
+            if (anchor_idx >= center_points_.size()) break;
+            
+            float score = scores[i];
+            if (score < cfg.conf_thresh) {
+                anchor_idx++;
+                continue;
+            }
+            
+            const CenterPoint& pt = center_points_[anchor_idx];
+            
+            // BBox: [left, top, right, bottom] - distances from center
+            float left = bboxes[i * 4 + 0];
+            float top = bboxes[i * 4 + 1];
+            float right = bboxes[i * 4 + 2];
+            float bottom = bboxes[i * 4 + 3];
+            
+            // Decode to image coordinates (640x640)
+            float x1_640 = (pt.cx - left) * stride;
+            float y1_640 = (pt.cy - top) * stride;
+            float x2_640 = (pt.cx + right) * stride;
+            float y2_640 = (pt.cy + bottom) * stride;
+            
+            // Scale to original image size
+            float scale_x = static_cast<float>(src_w) / model_w_;
+            float scale_y = static_cast<float>(src_h) / model_h_;
+            
+            FaceDetItem det;
+            det.bbox.x = x1_640 * scale_x;
+            det.bbox.y = y1_640 * scale_y;
+            det.bbox.w = (x2_640 - x1_640) * scale_x;
+            det.bbox.h = (y2_640 - y1_640) * scale_y;
+            det.score = score;
+            det.has_landmarks = cfg.output_landmarks;
+            
+            // Keypoints
+            if (cfg.output_landmarks) {
+                for (int p = 0; p < 5; ++p) {
+                    float kps_x = kps[i * 10 + p * 2 + 0];
+                    float kps_y = kps[i * 10 + p * 2 + 1];
+                    float kx_640 = (pt.cx + kps_x) * stride;
+                    float ky_640 = (pt.cy + kps_y) * stride;
+                    det.landmarks[p].x = kx_640 * scale_x;
+                    det.landmarks[p].y = ky_640 * scale_y;
+                }
+            }
+            
+            detections.push_back(det);
+            anchor_idx++;
+        }
+    }
+    
+    return detections;
+}
+
+std::vector<FaceDetItem> ScrfdDetector::ApplyNMS(
+    std::vector<FaceDetItem>& dets,
+    float nms_thresh) {
+    
+    if (dets.empty()) return dets;
+    
+    // Sort by score
+    std::sort(dets.begin(), dets.end(), 
+              [](const FaceDetItem& a, const FaceDetItem& b) {
+                  return a.score > b.score;
+              });
+    
+    std::vector<FaceDetItem> keep;
+    std::vector<bool> suppressed(dets.size(), false);
+    
+    for (size_t i = 0; i < dets.size(); ++i) {
+        if (suppressed[i]) continue;
+        
+        keep.push_back(dets[i]);
+        
+        for (size_t j = i + 1; j < dets.size(); ++j) {
+            if (suppressed[j]) continue;
+            if (face_detection::IoU(dets[i].bbox, dets[j].bbox) > nms_thresh) {
+                suppressed[j] = true;
+            }
+        }
+    }
+    
+    return keep;
+}
+
+} // namespace rk3588
--- a/web/hls_player.html
+++ b/web/hls_player.html
@ -58,7 +58,6 @@

    <script>
        const streams = [
-            { name: 'SCRFD Face Detection', url: '/hls/scrfd/index.m3u8' },
            { name: 'Cam 1', url: '/hls/cam1/index.m3u8' },
            { name: 'Cam 2', url: '/hls/cam2/index.m3u8' },
            { name: 'Cam 3', url: '/hls/cam3/index.m3u8' },