新增滑动窗口节点,提高了人脸检测和识别能力
This commit is contained in:
parent
d8c492f9ab
commit
8ae1893f5f
311
configs/full_pipeline_1080p.json
Normal file
311
configs/full_pipeline_1080p.json
Normal file
@ -0,0 +1,311 @@
|
||||
{
|
||||
"queue": {
|
||||
"size": 8,
|
||||
"strategy": "drop_oldest"
|
||||
},
|
||||
"graphs": [
|
||||
{
|
||||
"name": "cam1_full_pipeline",
|
||||
"nodes": [
|
||||
{
|
||||
"id": "in_cam1",
|
||||
"type": "input_rtsp",
|
||||
"role": "source",
|
||||
"enable": true,
|
||||
"url": "rtsp://10.0.0.49:8554/cam",
|
||||
"fps": 30,
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
"use_mpp": true,
|
||||
"use_ffmpeg": false,
|
||||
"force_tcp": true,
|
||||
"reconnect_sec": 5,
|
||||
"reconnect_backoff_max_sec": 30
|
||||
},
|
||||
{
|
||||
"id": "pre_face",
|
||||
"type": "preprocess",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"dst_w": 1920,
|
||||
"dst_h": 1080,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"resize_mode": "stretch",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "ppe_detection",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
"id": "face_det",
|
||||
"type": "ai_scrfd_sliding",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"infer_fps": 5,
|
||||
"model_path": "./models/scrfd_500m_640.rknn",
|
||||
"model_w": 640,
|
||||
"model_h": 640,
|
||||
"windows": [
|
||||
{"x": 0, "y": 0, "w": 960, "h": 1080},
|
||||
{"x": 960, "y": 0, "w": 960, "h": 1080}
|
||||
],
|
||||
"conf_thresh": 0.5,
|
||||
"nms_thresh": 0.4,
|
||||
"max_faces": 50,
|
||||
"debug": {
|
||||
"stats": true,
|
||||
"stats_interval": 30
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "face_recog",
|
||||
"type": "ai_face_recog",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"model_path": "./models/mobilefacenet_arcface.rknn",
|
||||
"align": true,
|
||||
"emit_embedding": false,
|
||||
"max_faces": 50,
|
||||
"input_format": "rgb",
|
||||
"input_dtype": "uint8",
|
||||
"threshold": {
|
||||
"accept": 0.45,
|
||||
"margin": 0.05
|
||||
},
|
||||
"gallery": {
|
||||
"backend": "sqlite",
|
||||
"path": "./models/face_gallery.db",
|
||||
"load_on_start": true,
|
||||
"expected_dim": 128,
|
||||
"dtype": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "pre_yolo",
|
||||
"type": "preprocess",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"dst_w": 768,
|
||||
"dst_h": 768,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"resize_mode": "stretch",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "ppe_detection",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
"id": "yolo_ppe",
|
||||
"type": "ai_yolo",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"infer_fps": 5,
|
||||
"model_path": "./models/best-768.rknn",
|
||||
"model_version": "v8",
|
||||
"model_w": 768,
|
||||
"model_h": 768,
|
||||
"num_classes": 11,
|
||||
"conf": 0.25,
|
||||
"nms": 0.45,
|
||||
"debug": {
|
||||
"stats": true,
|
||||
"stats_interval": 30,
|
||||
"detections": false
|
||||
},
|
||||
"class_filter": [3, 6, 10]
|
||||
},
|
||||
{
|
||||
"id": "tracker",
|
||||
"type": "tracker",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"mode": "bytetrack_lite",
|
||||
"per_class": true,
|
||||
"state_key": "ppe_detection",
|
||||
"track_classes": [3, 6, 10],
|
||||
"ignore_classes": [],
|
||||
"allowed_models": ["yolov8"],
|
||||
"high_th": 0.5,
|
||||
"low_th": 0.1,
|
||||
"iou_th": 0.3,
|
||||
"max_age_ms": 1500,
|
||||
"min_hits": 2,
|
||||
"max_tracks": 128
|
||||
},
|
||||
{
|
||||
"id": "logic_boots",
|
||||
"type": "logic_gate",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"mode": "ppe_boots_check",
|
||||
"anchor_class": 6,
|
||||
"boots_class": 3,
|
||||
"color_check": {
|
||||
"enable": true,
|
||||
"method": "hsv",
|
||||
"dark_threshold": 80,
|
||||
"roi_expand": 1.0
|
||||
},
|
||||
"debug": false
|
||||
},
|
||||
{
|
||||
"id": "pre_osd",
|
||||
"type": "preprocess",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"dst_w": 1920,
|
||||
"dst_h": 1080,
|
||||
"dst_format": "nv12",
|
||||
"resize_mode": "stretch",
|
||||
"rga_gate": "ppe_detection",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
"id": "osd",
|
||||
"type": "osd",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"draw_bbox": true,
|
||||
"draw_text": true,
|
||||
"draw_face_det": true,
|
||||
"draw_face_bbox": true,
|
||||
"line_width": 2,
|
||||
"font_scale": 1,
|
||||
"use_rga_bbox": false,
|
||||
"labels": ["helmet", "gloves", "vest", "boots", "goggles", "none", "Person", "no_helmet", "no_goggle", "no_gloves", "no_boots", "violation"]
|
||||
},
|
||||
{
|
||||
"id": "publish",
|
||||
"type": "publish",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"queue": {"size": 2, "policy": "drop_oldest"},
|
||||
"codec": "h264",
|
||||
"fps": 30,
|
||||
"gop": 60,
|
||||
"bitrate_kbps": 4000,
|
||||
"use_mpp": true,
|
||||
"use_ffmpeg_mux": true,
|
||||
"outputs": [
|
||||
{
|
||||
"proto": "hls",
|
||||
"path": "./web/hls/cam1/index.m3u8",
|
||||
"segment_sec": 2
|
||||
},
|
||||
{
|
||||
"proto": "rtsp_server",
|
||||
"port": 8555,
|
||||
"path": "/live/cam1"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "alarm",
|
||||
"type": "alarm",
|
||||
"role": "sink",
|
||||
"enable": true,
|
||||
"eval_fps": 10,
|
||||
"labels": ["helmet", "gloves", "vest", "boots", "goggles", "none", "Person", "no_helmet", "no_goggle", "no_gloves", "no_boots", "violation"],
|
||||
"rules": [
|
||||
{
|
||||
"name": "non_compliant_boots",
|
||||
"class_ids": [10],
|
||||
"roi": {"x": 0.0, "y": 0.0, "w": 1.0, "h": 1.0},
|
||||
"min_score": 0.3,
|
||||
"min_box_area_ratio": 0.01,
|
||||
"require_track_id": true,
|
||||
"min_duration_ms": 800,
|
||||
"min_hits": 2,
|
||||
"hit_window_ms": 1000,
|
||||
"cooldown_ms": 5000,
|
||||
"per_track_cooldown_ms": 5000
|
||||
}
|
||||
],
|
||||
"face_rules": [
|
||||
{
|
||||
"name": "unknown_face",
|
||||
"type": "unknown",
|
||||
"cooldown_ms": 7000,
|
||||
"min_sim": 0.35,
|
||||
"min_hits": 2,
|
||||
"hit_window_ms": 1500,
|
||||
"min_face_area_ratio": 0.01,
|
||||
"min_face_aspect": 0.6,
|
||||
"max_face_aspect": 1.6
|
||||
},
|
||||
{
|
||||
"name": "known_person",
|
||||
"type": "person",
|
||||
"cooldown_ms": 7000,
|
||||
"min_sim": 0.6,
|
||||
"min_hits": 2,
|
||||
"hit_window_ms": 1500,
|
||||
"min_face_area_ratio": 0.01,
|
||||
"min_face_aspect": 0.6,
|
||||
"max_face_aspect": 1.6
|
||||
}
|
||||
],
|
||||
"actions": {
|
||||
"log": {
|
||||
"enable": true,
|
||||
"level": "info"
|
||||
},
|
||||
"snapshot": {
|
||||
"enable": true,
|
||||
"format": "jpg",
|
||||
"quality": 85,
|
||||
"upload": {
|
||||
"type": "minio",
|
||||
"endpoint": "http://10.0.0.49:9000",
|
||||
"bucket": "myminio",
|
||||
"region": "us-east-1",
|
||||
"access_key": "minioadmin",
|
||||
"secret_key": "minioadmin"
|
||||
}
|
||||
},
|
||||
"clip": {
|
||||
"enable": true,
|
||||
"pre_sec": 5,
|
||||
"post_sec": 10,
|
||||
"format": "mp4",
|
||||
"fps": 30,
|
||||
"upload": {
|
||||
"type": "minio",
|
||||
"endpoint": "http://10.0.0.49:9000",
|
||||
"bucket": "myminio",
|
||||
"region": "us-east-1",
|
||||
"access_key": "minioadmin",
|
||||
"secret_key": "minioadmin"
|
||||
}
|
||||
},
|
||||
"external_api": {
|
||||
"enable": true,
|
||||
"getTokenUrl": "http://10.0.0.49:8080/api/getToken",
|
||||
"putMessageUrl": "http://10.0.0.49:8080/api/putMessage",
|
||||
"tenantCode": "32",
|
||||
"channelNo": "cam1",
|
||||
"timeout_ms": 3000,
|
||||
"include_media_url": true,
|
||||
"token_header": "X-Access-Token",
|
||||
"token_json_path": "responseBody.token",
|
||||
"token_cache_sec": 1200
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
["in_cam1", "pre_face"],
|
||||
["pre_face", "face_det"],
|
||||
["face_det", "face_recog"],
|
||||
["face_recog", "pre_yolo"],
|
||||
["pre_yolo", "yolo_ppe"],
|
||||
["yolo_ppe", "tracker"],
|
||||
["tracker", "logic_boots"],
|
||||
["logic_boots", "osd"],
|
||||
["osd", "pre_osd"],
|
||||
["pre_osd", "publish"],
|
||||
["publish", "alarm"]
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -14,8 +14,8 @@
|
||||
"enable": true,
|
||||
"url": "rtsp://10.0.0.49:8554/cam",
|
||||
"fps": 30,
|
||||
"width": 1280,
|
||||
"height": 720,
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
"use_mpp": true,
|
||||
"use_ffmpeg": false,
|
||||
"force_tcp": true,
|
||||
@ -33,7 +33,7 @@
|
||||
"dst_packed": true,
|
||||
"resize_mode": "stretch",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "scrfd_640_test",
|
||||
"rga_gate": "scrfd_1080p",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
@ -42,9 +42,9 @@
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"model_path": "./models/scrfd_500m_640.rknn",
|
||||
"conf_thresh": 0.5,
|
||||
"conf_thresh": 0.3,
|
||||
"nms_thresh": 0.4,
|
||||
"max_faces": 10,
|
||||
"max_faces": 50,
|
||||
"output_landmarks": true,
|
||||
"input_format": "rgb"
|
||||
},
|
||||
@ -67,11 +67,11 @@
|
||||
"type": "preprocess",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"dst_w": 1280,
|
||||
"dst_h": 720,
|
||||
"dst_w": 1920,
|
||||
"dst_h": 1080,
|
||||
"dst_format": "nv12",
|
||||
"resize_mode": "stretch",
|
||||
"rga_gate": "scrfd_640_test",
|
||||
"rga_gate": "scrfd_1080p",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
@ -83,13 +83,13 @@
|
||||
"codec": "h264",
|
||||
"fps": 30,
|
||||
"gop": 60,
|
||||
"bitrate_kbps": 2000,
|
||||
"bitrate_kbps": 4000,
|
||||
"use_mpp": true,
|
||||
"use_ffmpeg_mux": true,
|
||||
"outputs": [
|
||||
{
|
||||
"proto": "hls",
|
||||
"path": "./web/hls/scrfd/index.m3u8",
|
||||
"path": "./web/hls/cam1/index.m3u8",
|
||||
"segment_sec": 2
|
||||
},
|
||||
{
|
||||
|
||||
136
configs/test_scrfd_640_recog.json
Normal file
136
configs/test_scrfd_640_recog.json
Normal file
@ -0,0 +1,136 @@
|
||||
{
|
||||
"queue": {
|
||||
"size": 8,
|
||||
"strategy": "drop_oldest"
|
||||
},
|
||||
"graphs": [
|
||||
{
|
||||
"name": "scrfd_640_recog_test",
|
||||
"nodes": [
|
||||
{
|
||||
"id": "in_cam1",
|
||||
"type": "input_rtsp",
|
||||
"role": "source",
|
||||
"enable": true,
|
||||
"url": "rtsp://10.0.0.49:8554/cam",
|
||||
"fps": 30,
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
"use_mpp": true,
|
||||
"use_ffmpeg": false,
|
||||
"force_tcp": true,
|
||||
"reconnect_sec": 5,
|
||||
"reconnect_backoff_max_sec": 30
|
||||
},
|
||||
{
|
||||
"id": "pre_cam1",
|
||||
"type": "preprocess",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"dst_w": 640,
|
||||
"dst_h": 640,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"resize_mode": "stretch",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "scrfd_1080p",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
"id": "scrfd",
|
||||
"type": "ai_scrfd",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"model_path": "./models/scrfd_500m_640.rknn",
|
||||
"conf_thresh": 0.3,
|
||||
"nms_thresh": 0.4,
|
||||
"max_faces": 50,
|
||||
"output_landmarks": true,
|
||||
"input_format": "rgb"
|
||||
},
|
||||
{
|
||||
"id": "face_recog",
|
||||
"type": "ai_face_recog",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"model_path": "./models/mobilefacenet_arcface.rknn",
|
||||
"align": true,
|
||||
"emit_embedding": false,
|
||||
"max_faces": 50,
|
||||
"input_format": "rgb",
|
||||
"input_dtype": "uint8",
|
||||
"threshold": {
|
||||
"accept": 0.45,
|
||||
"margin": 0.05
|
||||
},
|
||||
"gallery": {
|
||||
"backend": "sqlite",
|
||||
"path": "./models/face_gallery.db",
|
||||
"load_on_start": true,
|
||||
"expected_dim": 512,
|
||||
"dtype": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "osd_cam1",
|
||||
"type": "osd",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"draw_bbox": true,
|
||||
"draw_text": true,
|
||||
"draw_face_det": true,
|
||||
"draw_face_bbox": true,
|
||||
"line_width": 2,
|
||||
"font_scale": 1,
|
||||
"use_rga_bbox": false,
|
||||
"labels": ["face"]
|
||||
},
|
||||
{
|
||||
"id": "post_cam1",
|
||||
"type": "preprocess",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"dst_w": 1920,
|
||||
"dst_h": 1080,
|
||||
"dst_format": "nv12",
|
||||
"resize_mode": "stretch",
|
||||
"rga_gate": "scrfd_1080p",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
"id": "pub_cam1",
|
||||
"type": "publish",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"queue": {"size": 2, "policy": "drop_oldest"},
|
||||
"codec": "h264",
|
||||
"fps": 30,
|
||||
"gop": 60,
|
||||
"bitrate_kbps": 4000,
|
||||
"use_mpp": true,
|
||||
"use_ffmpeg_mux": true,
|
||||
"outputs": [
|
||||
{
|
||||
"proto": "hls",
|
||||
"path": "./web/hls/cam1/index.m3u8",
|
||||
"segment_sec": 2
|
||||
},
|
||||
{
|
||||
"proto": "rtsp_server",
|
||||
"port": 8555,
|
||||
"path": "/live/cam1"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
["in_cam1", "pre_cam1"],
|
||||
["pre_cam1", "scrfd"],
|
||||
["scrfd", "face_recog"],
|
||||
["face_recog", "osd_cam1"],
|
||||
["osd_cam1", "post_cam1"],
|
||||
["post_cam1", "pub_cam1"]
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
139
configs/test_scrfd_sliding_1080p_recog.json
Normal file
139
configs/test_scrfd_sliding_1080p_recog.json
Normal file
@ -0,0 +1,139 @@
|
||||
{
|
||||
"queue": {
|
||||
"size": 8,
|
||||
"strategy": "drop_oldest"
|
||||
},
|
||||
"graphs": [
|
||||
{
|
||||
"name": "scrfd_sliding_1080p_recog",
|
||||
"nodes": [
|
||||
{
|
||||
"id": "in_cam1",
|
||||
"type": "input_rtsp",
|
||||
"role": "source",
|
||||
"enable": true,
|
||||
"url": "rtsp://10.0.0.49:8554/cam",
|
||||
"fps": 30,
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
"use_mpp": true,
|
||||
"use_ffmpeg": false,
|
||||
"force_tcp": true,
|
||||
"reconnect_sec": 5,
|
||||
"reconnect_backoff_max_sec": 30
|
||||
},
|
||||
{
|
||||
"id": "pre_cam1",
|
||||
"type": "preprocess",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"dst_w": 1920,
|
||||
"dst_h": 1080,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"resize_mode": "stretch",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "scrfd_sliding_1080p_recog",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
"id": "scrfd_sliding",
|
||||
"type": "ai_scrfd_sliding",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"model_path": "./models/scrfd_500m_640.rknn",
|
||||
"conf_thresh": 0.5,
|
||||
"nms_thresh": 0.4,
|
||||
"max_faces": 50,
|
||||
"output_landmarks": true,
|
||||
"windows": [
|
||||
{"x": 0, "y": 0, "w": 960, "h": 1080},
|
||||
{"x": 960, "y": 0, "w": 960, "h": 1080}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "face_recog",
|
||||
"type": "ai_face_recog",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"model_path": "./models/mobilefacenet_arcface.rknn",
|
||||
"align": true,
|
||||
"emit_embedding": false,
|
||||
"max_faces": 50,
|
||||
"input_format": "rgb",
|
||||
"input_dtype": "uint8",
|
||||
"threshold": {
|
||||
"accept": 0.45,
|
||||
"margin": 0.05
|
||||
},
|
||||
"gallery": {
|
||||
"backend": "sqlite",
|
||||
"path": "./models/face_gallery.db",
|
||||
"load_on_start": true,
|
||||
"expected_dim": 512,
|
||||
"dtype": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "osd_cam1",
|
||||
"type": "osd",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"draw_bbox": true,
|
||||
"draw_text": true,
|
||||
"draw_face_det": true,
|
||||
"draw_face_bbox": true,
|
||||
"line_width": 2,
|
||||
"font_scale": 1,
|
||||
"use_rga_bbox": false,
|
||||
"labels": ["face"]
|
||||
},
|
||||
{
|
||||
"id": "post_cam1",
|
||||
"type": "preprocess",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"dst_w": 1920,
|
||||
"dst_h": 1080,
|
||||
"dst_format": "nv12",
|
||||
"resize_mode": "stretch",
|
||||
"rga_gate": "scrfd_sliding_1080p_recog",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
"id": "pub_cam1",
|
||||
"type": "publish",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"queue": {"size": 2, "policy": "drop_oldest"},
|
||||
"codec": "h264",
|
||||
"fps": 30,
|
||||
"gop": 60,
|
||||
"bitrate_kbps": 4000,
|
||||
"use_mpp": true,
|
||||
"use_ffmpeg_mux": true,
|
||||
"outputs": [
|
||||
{
|
||||
"proto": "hls",
|
||||
"path": "./web/hls/cam1/index.m3u8",
|
||||
"segment_sec": 2
|
||||
},
|
||||
{
|
||||
"proto": "rtsp_server",
|
||||
"port": 8555,
|
||||
"path": "/live/cam1"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
["in_cam1", "pre_cam1"],
|
||||
["pre_cam1", "scrfd_sliding"],
|
||||
["scrfd_sliding", "face_recog"],
|
||||
["face_recog", "osd_cam1"],
|
||||
["osd_cam1", "post_cam1"],
|
||||
["post_cam1", "pub_cam1"]
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
727
docs/design/detection_parameters_guide.md
Normal file
727
docs/design/detection_parameters_guide.md
Normal file
@ -0,0 +1,727 @@
|
||||
# 人脸检测参数配置指南
|
||||
|
||||
本文档详细说明人脸检测节点(`ai_face_det`, `ai_scrfd`, `ai_scrfd_sliding`)中的核心后处理参数及其对检测结果的影响。
|
||||
|
||||
---
|
||||
|
||||
## 参数概览
|
||||
|
||||
| 参数名 | 类型 | 默认值 | 范围 | 说明 |
|
||||
|--------|------|--------|------|------|
|
||||
| `conf_thresh` | float | 0.5/0.6/0.7 | 0.0 ~ 1.0 | 置信度阈值,过滤低置信度候选框 |
|
||||
| `nms_thresh` | float | 0.4 | 0.0 ~ 1.0 | NMS IoU 阈值,控制重复框去重力度 |
|
||||
| `max_faces` | int | 10/50 | ≥ 1 | 单帧最大返回人脸数 |
|
||||
|
||||
---
|
||||
|
||||
## 1. conf_thresh (置信度阈值)
|
||||
|
||||
### 含义
|
||||
|
||||
模型对检测到的人脸的置信度(confidence score)阈值。神经网络在推理时会对每个候选框输出一个置信度分数,表示该位置存在人脸的确定性程度。
|
||||
|
||||
### 对检测结果的影响
|
||||
|
||||
| 设置 | 效果 | 适用场景 |
|
||||
|------|------|----------|
|
||||
| **调高** (如 0.7) | 减少误检,只保留高置信度人脸 | 高精度需求场景(门禁、考勤) |
|
||||
| **调低** (如 0.2) | 增加检出率,更多弱特征人脸被检测 | 弱光、远距离、小目标场景 |
|
||||
|
||||
### 注意事项
|
||||
|
||||
- 值过高可能导致**漏检**:模糊人脸、侧脸、小人脸可能被过滤
|
||||
- 值过低可能导致**误检**:背景中的类似人脸的纹理可能被误判
|
||||
|
||||
### 代码实现
|
||||
|
||||
在 SCRFD 后处理中 (`ai_scrfd_node.cpp:282`):
|
||||
|
||||
```cpp
|
||||
if (score < cfg_.conf_thresh) continue;
|
||||
```
|
||||
|
||||
在 RetinaFace 后处理中 (`ai_face_det_node.cpp:784`):
|
||||
|
||||
```cpp
|
||||
if (score < cfg.conf_thresh) continue;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2. nms_thresh (NMS IoU 阈值)
|
||||
|
||||
### 含义
|
||||
|
||||
非极大值抑制(Non-Maximum Suppression)的 IoU(Intersection over Union,交并比)阈值。
|
||||
|
||||
**NMS 的作用**:同一个真实人脸可能被多个 anchor/候选框检测到,NMS 用于去除重叠的重复检测框,只保留最优的一个。
|
||||
|
||||
**IoU 计算**:
|
||||
```
|
||||
IoU = 两个框的交集面积 / 两个框的并集面积
|
||||
```
|
||||
|
||||
### 对检测结果的影响
|
||||
|
||||
| 设置 | 效果 | 适用场景 |
|
||||
|------|------|----------|
|
||||
| **调高** (如 0.6) | 保留更多重叠框,对密集人脸友好 | 多人密集场景(会议室、教室) |
|
||||
| **调低** (如 0.3) | 严格去重,只保留最优框 | 单人场景、需要精确框选 |
|
||||
|
||||
### 注意事项
|
||||
|
||||
- 值过高:同一人脸可能返回多个重叠框
|
||||
- 值过低:密集人脸场景可能误删相邻的不同人脸
|
||||
|
||||
### 代码实现
|
||||
|
||||
在 SCRFD 后处理中 (`ai_scrfd_node.cpp:172`):
|
||||
|
||||
```cpp
|
||||
detections = ApplyNMS(detections, cfg_.nms_thresh);
|
||||
```
|
||||
|
||||
NMS 算法逻辑 (`ai_face_det_node.cpp:156-167`):
|
||||
|
||||
```cpp
|
||||
void NmsSorted(const std::vector<Rect>& boxes, const std::vector<float>& scores,
|
||||
float nms_thresh, std::vector<int>& keep) {
|
||||
for (...) {
|
||||
bool suppressed = false;
|
||||
for (int kept : keep) {
|
||||
if (IoU(boxes[idx], boxes[kept]) > nms_thresh) {
|
||||
suppressed = true; // 被已保留的框抑制
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!suppressed) keep.push_back(idx);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 3. max_faces (最大人脸数)
|
||||
|
||||
### 含义
|
||||
|
||||
单帧图像中最多返回的人脸检测数量限制。
|
||||
|
||||
### 对检测结果的影响
|
||||
|
||||
| 设置 | 效果 | 性能影响 |
|
||||
|------|------|----------|
|
||||
| **调高** (如 50) | 可检测更多人脸,不遗漏密集场景目标 | 增加后处理开销,RGA/OSD 绘制负载增大 |
|
||||
| **调低** (如 5) | 仅保留置信度最高的前几个人脸 | 减少计算量,提升实时性 |
|
||||
|
||||
### 注意事项
|
||||
|
||||
- 当画面中出现超过 `max_faces` 数量的人脸时,系统会按置信度排序,只保留前 N 个
|
||||
- 设置过大可能导致 RGA 任务堆积,引起 OSD 绘制卡顿
|
||||
|
||||
### 代码实现
|
||||
|
||||
在 SCRFD 后处理中 (`ai_scrfd_node.cpp:174-176`):
|
||||
|
||||
```cpp
|
||||
if (detections.size() > static_cast<size_t>(cfg_.max_faces)) {
|
||||
detections.resize(cfg_.max_faces);
|
||||
}
|
||||
```
|
||||
|
||||
在 RetinaFace 后处理中 (`ai_face_det_node.cpp:840`):
|
||||
|
||||
```cpp
|
||||
const int out_n = std::min<int>(cfg.max_faces, static_cast<int>(keep.size()));
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 推荐配置
|
||||
|
||||
### 按应用场景
|
||||
|
||||
| 场景 | conf_thresh | nms_thresh | max_faces | 说明 |
|
||||
|------|-------------|------------|-----------|------|
|
||||
| **高精度门禁/考勤** | 0.6 ~ 0.7 | 0.4 | 5 ~ 10 | 减少误识别,确保准确率 |
|
||||
| **多人大场景** (会议室/教室) | 0.4 ~ 0.5 | 0.4 ~ 0.5 | 20 ~ 50 | 平衡检出率和去重效果 |
|
||||
| **实时性优先** | 0.5 | 0.4 | 10 | 减少后处理开销 |
|
||||
| **弱光/远距离/小目标** | 0.3 ~ 0.4 | 0.3 | 10 ~ 20 | 提高检出率,但需容忍一定误检 |
|
||||
| **单人视频通话** | 0.6 | 0.4 | 1 ~ 3 | 最小化处理开销 |
|
||||
|
||||
### 按硬件性能
|
||||
|
||||
| 设备性能 | max_faces 建议 | 优化策略 |
|
||||
|----------|----------------|----------|
|
||||
| **RK3588 高性能模式** | 20 ~ 50 | 可同时处理多路高清视频 |
|
||||
| **RK3588 平衡模式** | 10 ~ 20 | 适当降低分辨率和检测频率 |
|
||||
| **RK3566/RK3568** | 5 ~ 10 | 降低输入分辨率,提高 conf_thresh 减少候选框 |
|
||||
|
||||
---
|
||||
|
||||
## 参数联动关系
|
||||
|
||||
这三个参数需要协同调整:
|
||||
|
||||
1. **提高 `conf_thresh`** → 候选框数量减少 → 可降低 `max_faces` → NMS 压力减小
|
||||
2. **降低 `conf_thresh`** → 候选框数量增加 → 可能需要提高 `max_faces` → NMS 压力增大
|
||||
3. **密集场景**:适当提高 `nms_thresh` 避免误删相邻人脸,同时确保 `max_faces` 足够大
|
||||
|
||||
---
|
||||
|
||||
## 配置示例
|
||||
|
||||
### SCRFD 配置 (`ai_scrfd` 节点)
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "ai_scrfd",
|
||||
"model_path": "./models/scrfd_500m_640.rknn",
|
||||
"conf_thresh": 0.5,
|
||||
"nms_thresh": 0.4,
|
||||
"max_faces": 50,
|
||||
"output_landmarks": true,
|
||||
"input_format": "rgb"
|
||||
}
|
||||
```
|
||||
|
||||
### RetinaFace 配置 (`ai_face_det` 节点)
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "ai_face_det",
|
||||
"model_path": "./models/RetinaFace_mobile320.rknn",
|
||||
"conf": 0.7,
|
||||
"nms": 0.4,
|
||||
"max_faces": 10,
|
||||
"output_landmarks": true,
|
||||
"input_format": "rgb"
|
||||
}
|
||||
```
|
||||
|
||||
### 分区域检测配置 (`ai_face_det_zoned` 节点)
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "ai_face_det_zoned",
|
||||
"model_path": "./models/RetinaFace_mobile320.rknn",
|
||||
"conf": 0.6,
|
||||
"nms": 0.4,
|
||||
"max_faces": 10,
|
||||
"output_landmarks": true
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 调试建议
|
||||
|
||||
1. **先调 conf_thresh**:从默认值开始,观察是否漏检或误检
|
||||
2. **再调 nms_thresh**:在密集人脸场景测试,确保既不重复框选也不漏检
|
||||
3. **最后调 max_faces**:根据实际场景人数和硬件性能调整
|
||||
|
||||
### 日志查看
|
||||
|
||||
启动时节点会打印当前参数:
|
||||
|
||||
```
|
||||
[ai_face_det] start id=face_det conf=0.7 nms=0.4 max_faces=10
|
||||
[ai_scrfd] start id=scrfd conf=0.5 nms=0.4 max_faces=50
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 常见问题
|
||||
|
||||
### Q1: 为什么检测到的人脸框会抖动/闪烁?
|
||||
|
||||
**可能原因**:
|
||||
- `conf_thresh` 设置过低,边缘候选框置信度波动导致时有时无
|
||||
- `nms_thresh` 过低,相邻帧选择不同的 anchor
|
||||
|
||||
**解决方法**:适当提高 `conf_thresh` 或调整 `nms_thresh`
|
||||
|
||||
### Q2: 密集场景漏检严重怎么办?
|
||||
|
||||
**解决方法**:
|
||||
- 降低 `conf_thresh` 到 0.4 左右
|
||||
- 提高 `max_faces` 到 30 以上
|
||||
- 适当提高 `nms_thresh` 到 0.5,避免相邻人脸被抑制
|
||||
|
||||
### Q3: OSD 绘制卡顿,RGA 任务堆积?
|
||||
|
||||
**解决方法**:
|
||||
- 降低 `max_faces` 减少绘制负载
|
||||
- 提高 `conf_thresh` 减少检测数量
|
||||
|
||||
---
|
||||
|
||||
## 四、人脸识别参数 (`ai_face_recog`)
|
||||
|
||||
`ai_face_recog` 节点接收人脸检测结果,提取人脸特征向量并与特征库进行比对,完成人脸识别。
|
||||
|
||||
### 4.1 参数概览
|
||||
|
||||
| 参数名 | 类型 | 默认值 | 说明 |
|
||||
|--------|------|--------|------|
|
||||
| `align` | bool | true | 是否使用5点关键点进行人脸对齐 |
|
||||
| `emit_embedding` | bool | false | 是否输出特征向量(用于调试) |
|
||||
| `max_faces` | int | 10 | 单帧最大处理人脸数 |
|
||||
| `input_format` | string | "rgb" | 输入图像格式:rgb/bgr |
|
||||
| `input_dtype` | string | "uint8" | 输入数据类型:uint8/float |
|
||||
| `threshold.accept` | float | 0.45 | 识别通过阈值,相似度超过此值才接受 |
|
||||
| `threshold.margin` | float | 0.05 | 边距阈值,最佳与次佳匹配的差距要求 |
|
||||
| `gallery.backend` | string | "sqlite" | 人脸库后端类型 |
|
||||
| `gallery.path` | string | "./models/face_gallery.db" | 人脸库文件路径 |
|
||||
|
||||
---
|
||||
|
||||
### 4.2 align (人脸对齐)
|
||||
|
||||
#### 含义
|
||||
|
||||
是否使用检测到的5个面部关键点(眼睛、鼻子、嘴角)进行人脸对齐变换。
|
||||
|
||||
#### 对识别效果的影响
|
||||
|
||||
| 设置 | 效果 | 适用场景 |
|
||||
|------|------|----------|
|
||||
| **true** | 对齐后人脸姿态归一化,提高识别准确率 | 高位摄像头、角度倾斜、侧脸场景 |
|
||||
| **false** | 直接裁剪人脸区域,计算量略小 | 正面、固定位置场景 |
|
||||
|
||||
#### 对齐原理
|
||||
|
||||
使用5点关键点与标准模板进行相似变换(Similarity Transform):
|
||||
- 标准模板坐标(112x112输入):左眼(38.29,51.70)、右眼(73.53,51.50)、鼻尖(56.02,71.74)、左嘴角(41.55,92.37)、右嘴角(70.73,92.20)
|
||||
- 代码实现:`ai_face_recog_node.cpp:851-865`
|
||||
|
||||
```cpp
|
||||
if (cfg->align && face.has_landmarks && model_w_ == 112 && model_h_ == 112) {
|
||||
const std::array<Point2f, 5> dst = { ... }; // 标准模板
|
||||
SimilarityTransform t;
|
||||
InvTransform inv;
|
||||
if (ComputeSimilarity(face.landmarks, dst, t) && InvertSimilarity(t, inv)) {
|
||||
WarpFace(src, w, h, stride, inv, face_buf_.data(), model_w_, model_h_, need_swap);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.3 threshold.accept (接受阈值)
|
||||
|
||||
#### 含义
|
||||
|
||||
特征向量相似度阈值,范围 `0.0 ~ 1.0`。只有当待识别人脸与库中某人的相似度超过此值时,才认为是匹配成功。
|
||||
|
||||
#### 对识别结果的影响
|
||||
|
||||
| 设置 | 效果 | 误识率 | 拒识率 |
|
||||
|------|------|--------|--------|
|
||||
| **调高** (如 0.55) | 更严格,只接受高度相似 | 低 | 高 |
|
||||
| **调低** (如 0.35) | 更宽松,容易匹配 | 高 | 低 |
|
||||
|
||||
#### 推荐值
|
||||
|
||||
| 场景 | 推荐值 | 说明 |
|
||||
|------|--------|------|
|
||||
| **高安全性场景** | 0.50 ~ 0.55 | 门禁、支付,严格控制误识 |
|
||||
| **一般场景** | 0.45 ~ 0.50 | 考勤、签到,平衡准确率和体验 |
|
||||
| **快速通行场景** | 0.40 ~ 0.45 | 闸机、通道,减少拒识 |
|
||||
|
||||
#### 代码实现
|
||||
|
||||
```cpp
|
||||
const bool accept = (sr.best_person_id >= 0) &&
|
||||
(sr.best_sim >= cfg->thr_accept) &&
|
||||
((cfg->thr_margin <= 0.0f) || ((sr.best_sim - sr.second_sim) >= cfg->thr_margin));
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.4 threshold.margin (边距阈值)
|
||||
|
||||
#### 含义
|
||||
|
||||
要求最佳匹配与次佳匹配的相似度差距至少达到此值,用于排除模糊匹配(如两个人都很像的情况)。设为 `0` 或负数可禁用此检查。
|
||||
|
||||
#### 作用示例
|
||||
|
||||
假设待识别人脸与库中人员相似度如下:
|
||||
- 张三(最佳): 0.62
|
||||
- 李四(次佳): 0.58
|
||||
- 差距: 0.04
|
||||
|
||||
如果 `margin = 0.05`,则 0.04 < 0.05,匹配失败(标记为 unknown)
|
||||
如果 `margin = 0.03`,则 0.04 > 0.03,匹配成功(识别为张三)
|
||||
|
||||
#### 推荐值
|
||||
|
||||
- **0.05**(默认):适合大多数人脸库
|
||||
- **0.00** 或负数:禁用边距检查,只依赖 accept 阈值
|
||||
|
||||
---
|
||||
|
||||
### 4.5 max_faces (最大处理人脸数)
|
||||
|
||||
#### 含义
|
||||
|
||||
单帧最多处理的人脸数量。由于特征提取需要 NPU 推理,此参数直接影响处理延迟。
|
||||
|
||||
#### 与检测节点 max_faces 的关系
|
||||
|
||||
```
|
||||
实际处理数 = min(face_det.max_faces, face_recog.max_faces)
|
||||
```
|
||||
|
||||
建议两个节点的 `max_faces` 保持一致或识别节点略小。
|
||||
|
||||
---
|
||||
|
||||
### 4.6 gallery (人脸库配置)
|
||||
|
||||
#### 参数说明
|
||||
|
||||
| 参数 | 默认值 | 说明 |
|
||||
|------|--------|------|
|
||||
| `backend` | "sqlite" | 后端类型,目前仅支持 sqlite |
|
||||
| `path` | "./models/face_gallery.db" | 人脸库数据库文件路径 |
|
||||
| `load_on_start` | true | 启动时加载到内存 |
|
||||
| `expected_dim` | 512 | 特征向量维度(MobileFaceNet 为 512) |
|
||||
| `dtype` | "auto" | 数据类型,auto/float32 |
|
||||
|
||||
#### 人脸库管理
|
||||
|
||||
人脸库使用 SQLite 存储,包含以下信息:
|
||||
- `person_id`:人员唯一ID
|
||||
- `name`:人员名称
|
||||
- `embedding`:特征向量(512维浮点数)
|
||||
- 可通过 Web 管理接口或脚本添加/删除/更新人脸
|
||||
|
||||
---
|
||||
|
||||
### 4.7 normalize (输入归一化)
|
||||
|
||||
#### 两种归一化方式
|
||||
|
||||
**方式一:缩放+偏移(简单)**
|
||||
```json
|
||||
{
|
||||
"normalize": {
|
||||
"scale": 0.0078125,
|
||||
"bias": 0.0
|
||||
}
|
||||
}
|
||||
```
|
||||
公式:`output = input * scale + bias`
|
||||
|
||||
**方式二:均值+标准差(标准)**
|
||||
```json
|
||||
{
|
||||
"normalize": {
|
||||
"mean": [127.5, 127.5, 127.5],
|
||||
"std": [128.0, 128.0, 128.0]
|
||||
}
|
||||
}
|
||||
```
|
||||
公式:`output = (input - mean) / std`
|
||||
|
||||
#### 默认值
|
||||
|
||||
MobileFaceNet 模型通常使用:
|
||||
- `scale`: 1.0(不对 uint8 输入做缩放,由模型内部处理)
|
||||
- 或 `mean: [127.5,127.5,127.5], std: [127.5,127.5,127.5]` 归一化到 [-1, 1]
|
||||
|
||||
---
|
||||
|
||||
### 4.8 人脸识别配置示例
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "face_recog",
|
||||
"type": "ai_face_recog",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"model_path": "./models/mobilefacenet_arcface.rknn",
|
||||
"align": true,
|
||||
"emit_embedding": false,
|
||||
"max_faces": 50,
|
||||
"input_format": "rgb",
|
||||
"input_dtype": "uint8",
|
||||
"threshold": {
|
||||
"accept": 0.45,
|
||||
"margin": 0.05
|
||||
},
|
||||
"gallery": {
|
||||
"backend": "sqlite",
|
||||
"path": "./models/face_gallery.db",
|
||||
"load_on_start": true,
|
||||
"expected_dim": 512,
|
||||
"dtype": "auto"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.9 检测+识别完整流程配置
|
||||
|
||||
```json
|
||||
{
|
||||
"graphs": [{
|
||||
"nodes": [
|
||||
{
|
||||
"id": "scrfd",
|
||||
"type": "ai_scrfd",
|
||||
"conf_thresh": 0.3,
|
||||
"nms_thresh": 0.4,
|
||||
"max_faces": 50,
|
||||
"output_landmarks": true
|
||||
},
|
||||
{
|
||||
"id": "face_recog",
|
||||
"type": "ai_face_recog",
|
||||
"align": true,
|
||||
"max_faces": 50,
|
||||
"threshold": { "accept": 0.45, "margin": 0.05 },
|
||||
"gallery": { "path": "./models/face_gallery.db" }
|
||||
},
|
||||
{
|
||||
"id": "osd",
|
||||
"type": "osd",
|
||||
"draw_face_det": true,
|
||||
"draw_face_bbox": true
|
||||
}
|
||||
],
|
||||
"edges": [
|
||||
["scrfd", "face_recog"],
|
||||
["face_recog", "osd"]
|
||||
]
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
---
|
||||
|
||||
## 五、滑动窗口检测参数 (`ai_scrfd_sliding`)
|
||||
|
||||
`ai_scrfd_sliding` 是专为**高分辨率视频**设计的滑动窗口检测节点,通过将画面分割成多个窗口分别检测,有效提升远处小目标的检出率。
|
||||
|
||||
### 5.1 节点特性
|
||||
|
||||
| 特性 | 说明 |
|
||||
|------|------|
|
||||
| **原始分辨率输入** | 直接接收原始图像,保留更多细节 |
|
||||
| **滑动窗口检测** | 将画面分割成多个窗口,分别检测后合并结果 |
|
||||
| **保持宽高比** | 每个窗口 resize 到 640x640,轻微变形但可接受 |
|
||||
| **窗口可配置** | 支持自定义窗口数量和位置 |
|
||||
|
||||
### 5.2 参数说明
|
||||
|
||||
| 参数 | 类型 | 默认值 | 说明 |
|
||||
|------|------|--------|------|
|
||||
| `model_path` | string | - | SCRFD 模型路径 |
|
||||
| `conf_thresh` | float | 0.3 | 置信度阈值 |
|
||||
| `nms_thresh` | float | 0.4 | NMS IoU 阈值 |
|
||||
| `max_faces` | int | 50 | 最大检测人脸数 |
|
||||
| `output_landmarks` | bool | true | 是否输出5点关键点 |
|
||||
| `windows` | array | 自动计算 | 窗口配置数组 |
|
||||
|
||||
### 5.3 窗口配置 (`windows`)
|
||||
|
||||
如果不配置 `windows`,节点会根据输入分辨率自动计算窗口。
|
||||
|
||||
**窗口格式**:
|
||||
```json
|
||||
{
|
||||
"x": 0, // 窗口左上角 X 坐标
|
||||
"y": 0, // 窗口左上角 Y 坐标
|
||||
"w": 960, // 窗口宽度
|
||||
"h": 1080 // 窗口高度
|
||||
}
|
||||
```
|
||||
|
||||
**窗口设计原则**:
|
||||
- 窗口之间应有适当重叠,避免漏检
|
||||
- 窗口尺寸建议接近 640x640 的倍数(resize 后变形较小)
|
||||
- 对于 16:9 视频,水平分割效果较好
|
||||
|
||||
### 5.4 不同分辨率配置参考
|
||||
|
||||
#### 1080p (1920×1080) - 推荐2窗口
|
||||
|
||||
```json
|
||||
{
|
||||
"windows": [
|
||||
{"x": 0, "y": 0, "w": 960, "h": 1080},
|
||||
{"x": 960, "y": 0, "w": 960, "h": 1080}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**说明**:
|
||||
- 窗口 0:左半边 960x1080
|
||||
- 窗口 1:右半边 960x1080
|
||||
- 正好覆盖 1920 宽度,无重叠
|
||||
- 每个窗口 resize 到 640x640,比例 0.89:1
|
||||
|
||||
#### 1440p (2560×1440) - 推荐2窗口
|
||||
|
||||
```json
|
||||
{
|
||||
"windows": [
|
||||
{"x": 0, "y": 0, "w": 1280, "h": 1440},
|
||||
{"x": 1280, "y": 0, "w": 1280, "h": 1440}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**说明**:
|
||||
- 窗口 0:左半边 1280x1440
|
||||
- 窗口 1:右半边 1280x1440
|
||||
- 比例 0.89:1,与 1080p 一致
|
||||
|
||||
#### 更高分辨率 - 增加窗口数
|
||||
|
||||
对于 4K (3840×2160) 等更高分辨率,可以增加窗口数量:
|
||||
|
||||
```json
|
||||
{
|
||||
"windows": [
|
||||
{"x": 0, "y": 0, "w": 1280, "h": 1080},
|
||||
{"x": 1280, "y": 0, "w": 1280, "h": 1080},
|
||||
{"x": 2560, "y": 0, "w": 1280, "h": 1080}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 5.5 配置示例
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "scrfd_sliding",
|
||||
"type": "ai_scrfd_sliding",
|
||||
"role": "filter",
|
||||
"enable": true,
|
||||
"model_path": "./models/scrfd_500m_640.rknn",
|
||||
"conf_thresh": 0.3,
|
||||
"nms_thresh": 0.4,
|
||||
"max_faces": 50,
|
||||
"output_landmarks": true,
|
||||
"windows": [
|
||||
{"x": 0, "y": 0, "w": 960, "h": 1080},
|
||||
{"x": 960, "y": 0, "w": 960, "h": 1080}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 5.6 性能考量
|
||||
|
||||
- **窗口数 = 推理次数**:2 个窗口 = 2 次模型推理
|
||||
- **分辨率越高,窗口数越多**:需要在检测效果和性能之间平衡
|
||||
- **建议窗口数**:
|
||||
- 1080p:2 个窗口
|
||||
- 1440p:2 个窗口(或 4 个窗口用于更精细检测)
|
||||
- 4K:3-4 个窗口
|
||||
|
||||
### 5.7 滑动窗口检测常见问题
|
||||
|
||||
#### Q7: 窗口边缘的人脸被分割成两半?
|
||||
|
||||
**解决方法**:
|
||||
- 增加窗口重叠区域(如窗口 0 结束于 1000,窗口 1 开始于 900)
|
||||
- NMS 会自动合并重复检测
|
||||
|
||||
#### Q8: 远处人脸还是检测不到?
|
||||
|
||||
**解决方法**:
|
||||
- 增加窗口数量,让每个窗口覆盖更小区域
|
||||
- 降低 `conf_thresh` 让更多候选框通过
|
||||
- 考虑使用更高分辨率摄像头
|
||||
|
||||
#### Q9: 检测延迟增加?
|
||||
|
||||
**解决方法**:
|
||||
- 减少窗口数量
|
||||
- 降低 `max_faces` 减少后处理负担
|
||||
- 使用更高性能硬件
|
||||
|
||||
---
|
||||
|
||||
## 六、综合配置建议
|
||||
|
||||
### 6.1 场景配置速查表
|
||||
|
||||
| 场景 | 检测节点 | 关键参数 | 说明 |
|
||||
|------|----------|----------|------|
|
||||
| **门禁/考勤** | `ai_face_det` | conf=0.7, max_faces=5 | 近距离,高精度 |
|
||||
| **车间/厂房** | `ai_scrfd_sliding` | 2窗口 | 高位摄像头,大透视 |
|
||||
| **会议室** | `ai_scrfd` | conf=0.4, max_faces=50 | 多人场景 |
|
||||
| **户外/街道** | `ai_scrfd_sliding` | 2-4窗口 | 远距离检测 |
|
||||
|
||||
### 6.2 分辨率配置对照表
|
||||
|
||||
| 分辨率 | 检测节点 | 输入处理 | 建议 |
|
||||
|--------|----------|----------|------|
|
||||
| 720p | `ai_scrfd` | 前置缩放至640 | 通用配置 |
|
||||
| 1080p | `ai_scrfd_sliding` | 2窗口(960x1080) | 滑动窗口检测 |
|
||||
| 1440p | `ai_scrfd_sliding` | 2窗口(1280x1440) | 滑动窗口检测 |
|
||||
| 4K | `ai_scrfd_sliding` | 3-4窗口 | 更多窗口提升精度 |
|
||||
|
||||
---
|
||||
|
||||
## 七、常见问题汇总
|
||||
|
||||
### Q1: 检测框抖动/闪烁
|
||||
|
||||
**可能原因**:
|
||||
- `conf_thresh` 设置过低,边缘候选框置信度波动
|
||||
- `nms_thresh` 过低,相邻帧选择不同 anchor
|
||||
|
||||
**解决方法**:适当提高 `conf_thresh` 或调整 `nms_thresh`
|
||||
|
||||
### Q2: 密集场景漏检严重?
|
||||
|
||||
**解决方法**:
|
||||
- 降低 `conf_thresh` 到 0.4 左右
|
||||
- 提高 `max_faces` 到 30 以上
|
||||
- 适当提高 `nms_thresh` 到 0.5
|
||||
|
||||
### Q3: OSD 绘制卡顿?
|
||||
|
||||
**解决方法**:
|
||||
- 降低 `max_faces` 减少绘制负载
|
||||
- 提高 `conf_thresh` 减少检测数量
|
||||
|
||||
### Q4: 识别准确率不高?
|
||||
|
||||
**可能原因及解决方法**:
|
||||
1. **对齐问题**:确保 `align: true`,且检测节点 `output_landmarks: true`
|
||||
2. **阈值不合适**:调整 `threshold.accept`,根据实际测试确定最佳值
|
||||
3. **人脸库质量**:确保库中人脸照片清晰、正面、光线均匀
|
||||
4. **检测框质量**:适当提高检测 `conf_thresh`,过滤低质量检测框
|
||||
|
||||
#### Q5: 远距离/小目标识别效果差?
|
||||
|
||||
**解决方法**:
|
||||
- 提高检测 `conf_thresh`,让只有清晰的人脸进入识别
|
||||
- 检查摄像头分辨率,确保人脸区域至少 60x60 像素
|
||||
- 考虑使用更高清的摄像头或调整安装角度
|
||||
|
||||
#### Q6: 识别延迟高?
|
||||
|
||||
**优化方法**:
|
||||
- 降低 `max_faces`,减少单帧处理数量
|
||||
- 提高检测 `conf_thresh`,减少候选框
|
||||
- 确保 `gallery.load_on_start: true`,避免运行时查询数据库
|
||||
|
||||
---
|
||||
|
||||
## 相关文档
|
||||
|
||||
- [SCRFD 模型规格说明](../scrfd_500m_640_spec.md)
|
||||
- [YOLO 检测参数配置](../config_guide.md)
|
||||
- [DAG 节点与边说明](./dag_graph_node_edge.md)
|
||||
- [MobileFaceNet 模型说明](../models.md)
|
||||
@ -542,141 +542,98 @@ inline float ExtractNCHW(const TensorView& t, int c, int h, int w, int C, int H,
|
||||
}
|
||||
|
||||
/**
|
||||
* 解码SCRFD检测结果
|
||||
* 解码SCRFD检测结果 - 与 ai_scrfd 节点使用相同的逻辑
|
||||
*
|
||||
* @param outputs 9个输出张量 [score_8, score_16, score_32, bbox_8, bbox_16, bbox_32, kps_8, kps_16, kps_32]
|
||||
* @param anchors 预生成的anchor
|
||||
* @param anchors 预生成的anchor (center_x, center_y, stride)
|
||||
* @param src_w 原始图像宽度
|
||||
* @param src_h 原始图像高度
|
||||
* @param model_w 模型输入宽度
|
||||
* @param model_h 模型输入高度
|
||||
* @param cfg 检测配置
|
||||
* @param conf_thresh 置信度阈值
|
||||
* @param output_lm 是否输出关键点
|
||||
* @param out 输出结果
|
||||
*/
|
||||
inline void DecodeScrfd(const std::vector<TensorView>& outputs,
|
||||
const std::vector<ScrfdAnchor>& anchors,
|
||||
int src_w, int src_h,
|
||||
int model_w, int model_h,
|
||||
const DetectionConfig& cfg,
|
||||
float conf_thresh,
|
||||
bool output_lm,
|
||||
FaceDetResult& out) {
|
||||
if (outputs.size() != 9) {
|
||||
return; // SCRFD需要9个输出
|
||||
}
|
||||
if (outputs.size() != 9) return;
|
||||
|
||||
const float sx = static_cast<float>(src_w) / static_cast<float>(model_w);
|
||||
const float sy = static_cast<float>(src_h) / static_cast<float>(model_h);
|
||||
|
||||
std::vector<Rect> boxes;
|
||||
std::vector<float> scores;
|
||||
std::vector<std::array<Point2f, 5>> lmks;
|
||||
// Output order: score_8, score_16, score_32, bbox_8, bbox_16, bbox_32, kps_8, kps_16, kps_32
|
||||
const int anchor_counts[] = {12800, 3200, 800};
|
||||
const int strides[] = {8, 16, 32};
|
||||
|
||||
size_t anchor_idx = 0;
|
||||
const int strides[3] = {8, 16, 32};
|
||||
float scale_x = static_cast<float>(src_w) / model_w;
|
||||
float scale_y = static_cast<float>(src_h) / model_h;
|
||||
|
||||
for (int s = 0; s < 3; ++s) {
|
||||
int score_idx = s;
|
||||
int bbox_idx = s + 3;
|
||||
int kps_idx = s + 6;
|
||||
int stride = strides[s];
|
||||
int count = anchor_counts[s];
|
||||
|
||||
const TensorView& score_t = outputs[score_idx];
|
||||
const TensorView& bbox_t = outputs[bbox_idx];
|
||||
const TensorView& kps_t = outputs[kps_idx];
|
||||
// 检查输出数据是否有效
|
||||
if (outputs[s].type != RKNN_TENSOR_FLOAT32 ||
|
||||
outputs[s + 3].type != RKNN_TENSOR_FLOAT32 ||
|
||||
outputs[s + 6].type != RKNN_TENSOR_FLOAT32) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 检查维度
|
||||
if (score_t.dims.size() < 4 || bbox_t.dims.size() < 4) continue;
|
||||
const float* scores = reinterpret_cast<const float*>(outputs[s].data);
|
||||
const float* bboxes = reinterpret_cast<const float*>(outputs[s + 3].data);
|
||||
const float* kps = reinterpret_cast<const float*>(outputs[s + 6].data);
|
||||
|
||||
int C = static_cast<int>(score_t.dims[1]);
|
||||
int H = static_cast<int>(score_t.dims[2]);
|
||||
int W = static_cast<int>(score_t.dims[3]);
|
||||
int anchors_per_loc = C / 2; // fg/bg
|
||||
if (!scores || !bboxes || !kps) continue;
|
||||
|
||||
for (int h = 0; h < H; ++h) {
|
||||
for (int w = 0; w < W; ++w) {
|
||||
for (int a = 0; a < anchors_per_loc; ++a) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (anchor_idx >= anchors.size()) break;
|
||||
|
||||
// 提取前景分数 (channel a*2+1)
|
||||
float score = ExtractNCHW(score_t, a * 2 + 1, h, w, C, H, W);
|
||||
|
||||
if (score >= cfg.conf_thresh) {
|
||||
const ScrfdAnchor& anchor = anchors[anchor_idx];
|
||||
|
||||
// 提取bbox [dx, dy, dw, dh]
|
||||
float dx = ExtractNCHW(bbox_t, a * 4 + 0, h, w,
|
||||
static_cast<int>(bbox_t.dims[1]), H, W) * stride;
|
||||
float dy = ExtractNCHW(bbox_t, a * 4 + 1, h, w,
|
||||
static_cast<int>(bbox_t.dims[1]), H, W) * stride;
|
||||
float dw = ExtractNCHW(bbox_t, a * 4 + 2, h, w,
|
||||
static_cast<int>(bbox_t.dims[1]), H, W) * stride;
|
||||
float dh = ExtractNCHW(bbox_t, a * 4 + 3, h, w,
|
||||
static_cast<int>(bbox_t.dims[1]), H, W) * stride;
|
||||
|
||||
float cx = anchor.cx + dx;
|
||||
float cy = anchor.cy + dy;
|
||||
float x1 = (cx - dw * 0.5f) * sx;
|
||||
float y1 = (cy - dh * 0.5f) * sy;
|
||||
float x2 = (cx + dw * 0.5f) * sx;
|
||||
float y2 = (cy + dh * 0.5f) * sy;
|
||||
|
||||
x1 = static_cast<float>(ClampInt(static_cast<int>(x1), 0, src_w - 1));
|
||||
y1 = static_cast<float>(ClampInt(static_cast<int>(y1), 0, src_h - 1));
|
||||
x2 = static_cast<float>(ClampInt(static_cast<int>(x2), 0, src_w - 1));
|
||||
y2 = static_cast<float>(ClampInt(static_cast<int>(y2), 0, src_h - 1));
|
||||
|
||||
Rect bb;
|
||||
bb.x = x1;
|
||||
bb.y = y1;
|
||||
bb.w = std::max(0.0f, x2 - x1);
|
||||
bb.h = std::max(0.0f, y2 - y1);
|
||||
|
||||
if (bb.w > 1.0f && bb.h > 1.0f) {
|
||||
boxes.push_back(bb);
|
||||
scores.push_back(score);
|
||||
|
||||
// 提取关键点
|
||||
if (cfg.output_landmarks) {
|
||||
std::array<Point2f, 5> pts{};
|
||||
for (int k = 0; k < 5; ++k) {
|
||||
float lx = ExtractNCHW(kps_t, a * 10 + k * 2 + 0, h, w,
|
||||
static_cast<int>(kps_t.dims[1]), H, W) * stride;
|
||||
float ly = ExtractNCHW(kps_t, a * 10 + k * 2 + 1, h, w,
|
||||
static_cast<int>(kps_t.dims[1]), H, W) * stride;
|
||||
pts[k].x = (anchor.cx + lx) * sx;
|
||||
pts[k].y = (anchor.cy + ly) * sy;
|
||||
}
|
||||
lmks.push_back(pts);
|
||||
}
|
||||
}
|
||||
float score = scores[i];
|
||||
if (score < conf_thresh) {
|
||||
anchor_idx++;
|
||||
continue;
|
||||
}
|
||||
|
||||
++anchor_idx;
|
||||
}
|
||||
}
|
||||
const ScrfdAnchor& pt = anchors[anchor_idx];
|
||||
|
||||
// BBox: [left, top, right, bottom] - distances from center
|
||||
float left = bboxes[i * 4 + 0];
|
||||
float top = bboxes[i * 4 + 1];
|
||||
float right = bboxes[i * 4 + 2];
|
||||
float bottom = bboxes[i * 4 + 3];
|
||||
|
||||
// Decode to image coordinates (640x640)
|
||||
float x1_640 = (pt.cx - left) * stride;
|
||||
float y1_640 = (pt.cy - top) * stride;
|
||||
float x2_640 = (pt.cx + right) * stride;
|
||||
float y2_640 = (pt.cy + bottom) * stride;
|
||||
|
||||
FaceDetItem det;
|
||||
det.bbox.x = x1_640 * scale_x;
|
||||
det.bbox.y = y1_640 * scale_y;
|
||||
det.bbox.w = (x2_640 - x1_640) * scale_x;
|
||||
det.bbox.h = (y2_640 - y1_640) * scale_y;
|
||||
det.score = score;
|
||||
det.has_landmarks = output_lm;
|
||||
|
||||
// Keypoints
|
||||
if (output_lm) {
|
||||
for (int p = 0; p < 5; ++p) {
|
||||
float kps_x = kps[i * 10 + p * 2 + 0];
|
||||
float kps_y = kps[i * 10 + p * 2 + 1];
|
||||
float kx_640 = (pt.cx + kps_x) * stride;
|
||||
float ky_640 = (pt.cy + kps_y) * stride;
|
||||
det.landmarks[p].x = kx_640 * scale_x;
|
||||
det.landmarks[p].y = ky_640 * scale_y;
|
||||
}
|
||||
}
|
||||
|
||||
if (boxes.empty()) return;
|
||||
|
||||
// NMS
|
||||
std::vector<int> keep;
|
||||
NmsSorted(boxes, scores, cfg.nms_thresh, keep);
|
||||
if (keep.empty()) return;
|
||||
|
||||
// 构建输出
|
||||
const int out_n = std::min<int>(cfg.max_faces, static_cast<int>(keep.size()));
|
||||
out.faces.reserve(static_cast<size_t>(out_n));
|
||||
for (int i = 0; i < out_n; ++i) {
|
||||
const int k = keep[static_cast<size_t>(i)];
|
||||
FaceDetItem item;
|
||||
item.bbox = boxes[static_cast<size_t>(k)];
|
||||
item.score = scores[static_cast<size_t>(k)];
|
||||
item.track_id = -1;
|
||||
if (cfg.output_landmarks && k < static_cast<int>(lmks.size())) {
|
||||
item.has_landmarks = true;
|
||||
item.landmarks = lmks[static_cast<size_t>(k)];
|
||||
out.faces.push_back(det);
|
||||
anchor_idx++;
|
||||
}
|
||||
out.faces.push_back(std::move(item));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
85
include/face/scrfd_detector.h
Normal file
85
include/face/scrfd_detector.h
Normal file
@ -0,0 +1,85 @@
|
||||
#pragma once
|
||||
|
||||
/**
|
||||
* SCRFD Detector - 可复用的 SCRFD 检测器
|
||||
* 供 ai_scrfd 和 ai_scrfd_zoned 节点使用
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
#include "face/face_result.h"
|
||||
|
||||
// 包含 AiScheduler 以使用 BorrowedOutput
|
||||
#include "ai_scheduler.h"
|
||||
|
||||
namespace rk3588 {
|
||||
|
||||
/**
|
||||
* SCRFD 检测结果
|
||||
*/
|
||||
struct ScrfdDetection {
|
||||
FaceDetItem item;
|
||||
};
|
||||
|
||||
/**
|
||||
* SCRFD 检测器配置
|
||||
*/
|
||||
struct ScrfdConfig {
|
||||
float conf_thresh = 0.5f;
|
||||
float nms_thresh = 0.4f;
|
||||
int max_faces = 50;
|
||||
bool output_landmarks = true;
|
||||
};
|
||||
|
||||
/**
|
||||
* SCRFD 检测器
|
||||
*
|
||||
* 使用示例:
|
||||
* ScrfdDetector det;
|
||||
* det.Init(640, 640);
|
||||
* auto dets = det.Decode(outputs, src_w, src_h, config);
|
||||
*/
|
||||
class ScrfdDetector {
|
||||
public:
|
||||
ScrfdDetector();
|
||||
~ScrfdDetector();
|
||||
|
||||
/**
|
||||
* 初始化检测器
|
||||
* @param model_w 模型输入宽度 (640)
|
||||
* @param model_h 模型输入高度 (640)
|
||||
*/
|
||||
void Init(int model_w, int model_h);
|
||||
|
||||
/**
|
||||
* 解码 SCRFD 输出
|
||||
* @param outputs 9个输出张量 (BorrowedOutput)
|
||||
* @param src_w 原始图像宽度
|
||||
* @param src_h 原始图像高度
|
||||
* @param cfg 检测配置
|
||||
* @return 检测结果列表
|
||||
*/
|
||||
std::vector<FaceDetItem> Decode(
|
||||
const std::vector<AiScheduler::BorrowedOutput>& outputs,
|
||||
int src_w, int src_h,
|
||||
const ScrfdConfig& cfg);
|
||||
|
||||
/**
|
||||
* 应用 NMS
|
||||
*/
|
||||
std::vector<FaceDetItem> ApplyNMS(
|
||||
std::vector<FaceDetItem>& dets,
|
||||
float nms_thresh);
|
||||
|
||||
private:
|
||||
struct CenterPoint {
|
||||
float cx, cy;
|
||||
float stride;
|
||||
};
|
||||
|
||||
std::vector<CenterPoint> center_points_;
|
||||
int model_w_ = 640;
|
||||
int model_h_ = 640;
|
||||
};
|
||||
|
||||
} // namespace rk3588
|
||||
@ -269,24 +269,6 @@ set_target_properties(ai_face_det PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||
)
|
||||
|
||||
# ai_face_det_zoned plugin (RKNN-based RetinaFace with distance zone detection)
|
||||
add_library(ai_face_det_zoned SHARED
|
||||
ai_face_det_zoned/ai_face_det_zoned_node.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/utils/dma_alloc.cpp
|
||||
)
|
||||
target_include_directories(ai_face_det_zoned PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
|
||||
target_link_libraries(ai_face_det_zoned PRIVATE project_options Threads::Threads ai_scheduler)
|
||||
if(RK3588_ENABLE_RKNN AND RK_RKNN_LIB)
|
||||
target_compile_definitions(ai_face_det_zoned PRIVATE RK3588_ENABLE_RKNN)
|
||||
target_include_directories(ai_face_det_zoned PRIVATE ${RKNN_RUNTIME_INCLUDE_DIR})
|
||||
target_link_libraries(ai_face_det_zoned PRIVATE ${RK_RKNN_LIB})
|
||||
endif()
|
||||
set_target_properties(ai_face_det_zoned PROPERTIES
|
||||
OUTPUT_NAME "ai_face_det_zoned"
|
||||
LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||
)
|
||||
|
||||
# ai_scrfd plugin (SCRFD 640x640 face detection)
|
||||
add_library(ai_scrfd SHARED
|
||||
ai_scrfd/ai_scrfd_node.cpp
|
||||
@ -305,6 +287,25 @@ set_target_properties(ai_scrfd PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||
)
|
||||
|
||||
# ai_scrfd_sliding plugin (SCRFD with sliding window detection)
|
||||
add_library(ai_scrfd_sliding SHARED
|
||||
ai_scrfd_sliding/ai_scrfd_sliding_node.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/face/scrfd_detector.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/utils/dma_alloc.cpp
|
||||
)
|
||||
target_include_directories(ai_scrfd_sliding PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
|
||||
target_link_libraries(ai_scrfd_sliding PRIVATE project_options Threads::Threads ai_scheduler)
|
||||
if(RK3588_ENABLE_RKNN)
|
||||
target_compile_definitions(ai_scrfd_sliding PRIVATE RK3588_ENABLE_RKNN)
|
||||
target_include_directories(ai_scrfd_sliding PRIVATE ${RKNN_RUNTIME_INCLUDE_DIR})
|
||||
target_link_libraries(ai_scrfd_sliding PRIVATE ${RK_RKNN_LIB})
|
||||
endif()
|
||||
set_target_properties(ai_scrfd_sliding PROPERTIES
|
||||
OUTPUT_NAME "ai_scrfd_sliding"
|
||||
LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||
)
|
||||
|
||||
# ai_face_recog plugin (RKNN-based ArcFace/MobileFaceNet inference)
|
||||
add_library(ai_face_recog SHARED
|
||||
ai_face_recog/ai_face_recog_node.cpp
|
||||
@ -511,7 +512,7 @@ if(RK3588_ENABLE_ZLMEDIAKIT AND RK_ZLMK_API_LIB)
|
||||
)
|
||||
endif()
|
||||
|
||||
install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_face_det_zoned ai_face_recog tracker gate osd alarm logic_gate storage ai_scheduler
|
||||
install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate storage ai_scheduler
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
|
||||
)
|
||||
|
||||
@ -1,502 +0,0 @@
|
||||
/**
|
||||
* ai_face_det_zoned - 三分区距离感知人脸检测节点
|
||||
*
|
||||
* 特性:
|
||||
* 1. 接收原始分辨率输入(不经过前置缩放)
|
||||
* 2. 基于距离进行ROI裁剪和三分区检测
|
||||
* 3. 近区(3-5m) 1.0x / 中区(5-7m) 1.3x / 远区(7-9m) 1.8x
|
||||
* 4. 复用 face_detection_utils.h 中的公共函数
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "face/face_detection_utils.h"
|
||||
#include "hw/i_infer_backend.h"
|
||||
#include "face/face_result.h"
|
||||
#include "node.h"
|
||||
#include "utils/dma_alloc.h"
|
||||
#include "utils/logger.h"
|
||||
|
||||
namespace rk3588 {
|
||||
|
||||
using namespace face_detection;
|
||||
|
||||
class AiFaceDetZonedNode : public INode {
|
||||
public:
|
||||
std::string Id() const override { return id_; }
|
||||
std::string Type() const override { return "ai_face_det_zoned"; }
|
||||
|
||||
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
|
||||
id_ = config.ValueOr<std::string>("id", "face_det_zoned");
|
||||
model_path_ = config.ValueOr<std::string>("model_path",
|
||||
"./models/RetinaFace_mobile320.rknn");
|
||||
|
||||
// 基础检测参数
|
||||
det_cfg_.conf_thresh = config.ValueOr<float>("conf", 0.6f);
|
||||
det_cfg_.nms_thresh = config.ValueOr<float>("nms", 0.4f);
|
||||
det_cfg_.max_faces = config.ValueOr<int>("max_faces", 10);
|
||||
det_cfg_.output_landmarks = config.ValueOr<bool>("output_landmarks", true);
|
||||
|
||||
// 模型输入尺寸(默认320)
|
||||
model_w_ = config.ValueOr<int>("model_w", 320);
|
||||
model_h_ = config.ValueOr<int>("model_h", 320);
|
||||
|
||||
// 先验框步长和最小尺寸(RetinaFace默认)
|
||||
det_cfg_.steps = {8, 16, 32};
|
||||
det_cfg_.min_sizes = {{16, 32}, {64, 128}, {256, 512}};
|
||||
|
||||
// ROI配置 - 支持格式: "roi": {"x": 0, "y": 0, "w": 1920, "h": 1080}
|
||||
roi_enabled_ = false;
|
||||
roi_x_ = roi_y_ = roi_w_ = roi_h_ = 0;
|
||||
if (const SimpleJson* roi = config.Find("roi"); roi && roi->IsObject()) {
|
||||
// 直接读取平级格式
|
||||
roi_x_ = roi->ValueOr<int>("x", 0);
|
||||
roi_y_ = roi->ValueOr<int>("y", 0);
|
||||
roi_w_ = roi->ValueOr<int>("w", 0);
|
||||
roi_h_ = roi->ValueOr<int>("h", 0);
|
||||
|
||||
// 如果w/h有效,则启用ROI
|
||||
if (roi_w_ > 0 && roi_h_ > 0) {
|
||||
roi_enabled_ = true;
|
||||
}
|
||||
// 兼容旧格式: "roi": {"crop": {...}}
|
||||
else if (const SimpleJson* crop = roi->Find("crop"); crop && crop->IsObject()) {
|
||||
roi_x_ = crop->ValueOr<int>("x", 0);
|
||||
roi_y_ = crop->ValueOr<int>("y", 0);
|
||||
roi_w_ = crop->ValueOr<int>("w", 0);
|
||||
roi_h_ = crop->ValueOr<int>("h", 0);
|
||||
if (roi_w_ > 0 && roi_h_ > 0) {
|
||||
roi_enabled_ = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 三分区配置 - 支持两种格式:
|
||||
// 1. 旧格式: "distance_zones": {"enabled": true, "boundaries": [y1, y2], "scales": [s1, s2, s3]}
|
||||
// 2. 新格式: "zones": {"near_zone": {"y_start": 0, "y_end": 405, "scale": 0.5}, ...}
|
||||
zones_enabled_ = false;
|
||||
boundary_y_5m_ = boundary_y_7m_ = 0;
|
||||
scale_near_ = 1.0f;
|
||||
scale_mid_ = 1.3f;
|
||||
scale_far_ = 1.8f;
|
||||
|
||||
// 优先尝试新格式 "zones"
|
||||
if (const SimpleJson* zones = config.Find("zones");
|
||||
zones && zones->IsObject()) {
|
||||
bool has_near = false, has_mid = false, has_far = false;
|
||||
int near_y_end = 0, mid_y_end = 0;
|
||||
|
||||
if (const SimpleJson* near = zones->Find("near_zone"); near && near->IsObject()) {
|
||||
near_y_end = near->ValueOr<int>("y_end", 0);
|
||||
scale_near_ = near->ValueOr<float>("scale", 1.0f);
|
||||
has_near = true;
|
||||
}
|
||||
|
||||
if (const SimpleJson* mid = zones->Find("mid_zone"); mid && mid->IsObject()) {
|
||||
mid_y_end = mid->ValueOr<int>("y_end", 0);
|
||||
scale_mid_ = mid->ValueOr<float>("scale", 1.0f);
|
||||
has_mid = true;
|
||||
}
|
||||
|
||||
if (const SimpleJson* far = zones->Find("far_zone"); far && far->IsObject()) {
|
||||
scale_far_ = far->ValueOr<float>("scale", 1.0f);
|
||||
has_far = true;
|
||||
}
|
||||
|
||||
if (has_near && has_mid && has_far) {
|
||||
zones_enabled_ = true;
|
||||
boundary_y_5m_ = near_y_end; // near和mid的分界
|
||||
boundary_y_7m_ = mid_y_end; // mid和far的分界
|
||||
}
|
||||
}
|
||||
// 兼容旧格式
|
||||
else if (const SimpleJson* zones = config.Find("distance_zones");
|
||||
zones && zones->IsObject()) {
|
||||
zones_enabled_ = zones->ValueOr<bool>("enabled", false);
|
||||
|
||||
if (const SimpleJson* boundaries = zones->Find("boundaries");
|
||||
boundaries && boundaries->IsArray() && boundaries->AsArray().size() >= 2) {
|
||||
boundary_y_5m_ = boundaries->AsArray()[0].AsInt(0);
|
||||
boundary_y_7m_ = boundaries->AsArray()[1].AsInt(0);
|
||||
}
|
||||
|
||||
if (const SimpleJson* scales = zones->Find("scales");
|
||||
scales && scales->IsArray() && scales->AsArray().size() >= 3) {
|
||||
scale_near_ = scales->AsArray()[0].AsNumber(1.0f);
|
||||
scale_mid_ = scales->AsArray()[1].AsNumber(1.3f);
|
||||
scale_far_ = scales->AsArray()[2].AsNumber(1.8f);
|
||||
}
|
||||
}
|
||||
|
||||
input_queue_ = ctx.input_queue;
|
||||
output_queues_ = ctx.output_queues;
|
||||
if (!input_queue_) {
|
||||
LogError("[ai_face_det_zoned] no input queue for node " + id_);
|
||||
return false;
|
||||
}
|
||||
if (output_queues_.empty()) {
|
||||
LogError("[ai_face_det_zoned] no output queue for node " + id_);
|
||||
return false;
|
||||
}
|
||||
|
||||
infer_backend_ = ctx.infer_backend;
|
||||
if (!infer_backend_) {
|
||||
LogError("[ai_face_det_zoned] no infer backend for node " + id_);
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
if (model_path_.empty()) {
|
||||
LogError("[ai_face_det_zoned] model_path is required");
|
||||
return false;
|
||||
}
|
||||
std::string err;
|
||||
model_handle_ = infer_backend_->LoadModel(model_path_, err);
|
||||
if (model_handle_ == kInvalidModelHandle) {
|
||||
LogError("[ai_face_det_zoned] failed to load model: " + err);
|
||||
return false;
|
||||
}
|
||||
|
||||
// 预计算先验框
|
||||
priors_ = GeneratePriors(model_w_, model_h_, det_cfg_.steps, det_cfg_.min_sizes);
|
||||
|
||||
LogInfo("[ai_face_det_zoned] model loaded: " + model_path_ +
|
||||
" (" + std::to_string(model_w_) + "x" + std::to_string(model_h_) +
|
||||
"), priors=" + std::to_string(priors_.size()));
|
||||
#else
|
||||
LogWarn("[ai_face_det_zoned] RKNN disabled, will passthrough frames");
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Start() override {
|
||||
LogInfo("[ai_face_det_zoned] start id=" + id_ +
|
||||
" zones=" + std::string(zones_enabled_ ? "enabled" : "disabled") +
|
||||
" roi=" + std::string(roi_enabled_ ? "enabled" : "disabled") +
|
||||
" roi_xywh=" + std::to_string(roi_x_) + "," + std::to_string(roi_y_) + "," +
|
||||
std::to_string(roi_w_) + "," + std::to_string(roi_h_) +
|
||||
" boundaries=" + std::to_string(boundary_y_5m_) + "," + std::to_string(boundary_y_7m_) +
|
||||
" scales=" + std::to_string(scale_near_) + "," + std::to_string(scale_mid_) + "," + std::to_string(scale_far_));
|
||||
return true;
|
||||
}
|
||||
|
||||
void Stop() override {
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
if (model_handle_ != kInvalidModelHandle) {
|
||||
infer_backend_->UnloadModel(model_handle_);
|
||||
model_handle_ = kInvalidModelHandle;
|
||||
}
|
||||
#endif
|
||||
LogInfo("[ai_face_det_zoned] stop id=" + id_);
|
||||
}
|
||||
|
||||
NodeStatus Process(FramePtr frame) override {
|
||||
if (!frame) return NodeStatus::DROP;
|
||||
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
RunZonedDetection(frame);
|
||||
#endif
|
||||
|
||||
Push(frame);
|
||||
return NodeStatus::OK;
|
||||
}
|
||||
|
||||
private:
|
||||
void Push(FramePtr frame) {
|
||||
for (auto& q : output_queues_) q->Push(frame);
|
||||
}
|
||||
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
|
||||
// 将RKNN输出转换为TensorView
|
||||
TensorView ConvertToTensorView(const AiScheduler::BorrowedOutput& o) {
|
||||
TensorView tv;
|
||||
tv.data = o.data;
|
||||
tv.size = o.size;
|
||||
tv.zp = o.zp;
|
||||
tv.scale = o.scale;
|
||||
tv.dims = o.dims;
|
||||
tv.type = o.type;
|
||||
return tv;
|
||||
}
|
||||
|
||||
void RunZonedDetection(FramePtr frame) {
|
||||
if (!frame->data || frame->data_size == 0) return;
|
||||
if (frame->format != PixelFormat::RGB && frame->format != PixelFormat::BGR) {
|
||||
LogWarn("[ai_face_det_zoned] input must be RGB/BGR");
|
||||
return;
|
||||
}
|
||||
|
||||
const int src_w = frame->width;
|
||||
const int src_h = frame->height;
|
||||
|
||||
// 应用ROI裁剪
|
||||
int roi_x = 0, roi_y = 0, roi_w = src_w, roi_h = src_h;
|
||||
if (roi_enabled_) {
|
||||
roi_x = ClampInt(roi_x_, 0, src_w - 1);
|
||||
roi_y = ClampInt(roi_y_, 0, src_h - 1);
|
||||
roi_w = ClampInt(roi_w_, 1, src_w - roi_x);
|
||||
roi_h = ClampInt(roi_h_, 1, src_h - roi_y);
|
||||
}
|
||||
|
||||
std::vector<FaceDetItem> all_detections;
|
||||
|
||||
if (zones_enabled_) {
|
||||
// 三分区检测
|
||||
all_detections = DetectWithZones(frame, roi_x, roi_y, roi_w, roi_h);
|
||||
} else {
|
||||
// 单区检测(全ROI区域)
|
||||
auto dets = DetectSingleZone(frame, roi_x, roi_y, roi_w, roi_h, 1.0f);
|
||||
// 坐标映射回原始图像
|
||||
for (auto& det : dets) {
|
||||
det.bbox.x += roi_x;
|
||||
det.bbox.y += roi_y;
|
||||
if (det.has_landmarks) {
|
||||
for (auto& lm : det.landmarks) {
|
||||
lm.x += roi_x;
|
||||
lm.y += roi_y;
|
||||
}
|
||||
}
|
||||
all_detections.push_back(det);
|
||||
}
|
||||
}
|
||||
|
||||
// NMS去重
|
||||
all_detections = ApplyNMS(all_detections, det_cfg_.nms_thresh);
|
||||
|
||||
// 限制最大人脸数
|
||||
if (all_detections.size() > static_cast<size_t>(det_cfg_.max_faces)) {
|
||||
all_detections.resize(det_cfg_.max_faces);
|
||||
}
|
||||
|
||||
// 构建结果
|
||||
FaceDetResult det_result;
|
||||
det_result.img_w = src_w;
|
||||
det_result.img_h = src_h;
|
||||
det_result.model_name = "retinaface_zoned";
|
||||
det_result.faces = std::move(all_detections);
|
||||
|
||||
frame->face_det = std::make_shared<FaceDetResult>(std::move(det_result));
|
||||
}
|
||||
|
||||
std::vector<FaceDetItem> DetectWithZones(FramePtr frame,
|
||||
int roi_x, int roi_y,
|
||||
int roi_w, int roi_h) {
|
||||
std::vector<FaceDetItem> all_dets;
|
||||
|
||||
// 将分界线坐标转换到ROI坐标系
|
||||
int by5 = ClampInt(boundary_y_5m_ - roi_y, 0, roi_h);
|
||||
int by7 = ClampInt(boundary_y_7m_ - roi_y, 0, roi_h);
|
||||
|
||||
// 确保顺序正确(y大=下方=近距离)
|
||||
if (by5 < by7) std::swap(by5, by7);
|
||||
|
||||
// 近区检测 (画面下方,y大,近距离3-5m)
|
||||
if (by5 < roi_h) {
|
||||
auto dets = DetectSingleZone(frame, roi_x, roi_y + by5, roi_w, roi_h - by5, scale_near_);
|
||||
for (auto& det : dets) {
|
||||
det.bbox.x += roi_x;
|
||||
det.bbox.y += roi_y + by5;
|
||||
if (det.has_landmarks) {
|
||||
for (auto& lm : det.landmarks) {
|
||||
lm.x += roi_x;
|
||||
lm.y += roi_y + by5;
|
||||
}
|
||||
}
|
||||
all_dets.push_back(det);
|
||||
}
|
||||
}
|
||||
|
||||
// 中区检测 (画面中部,中距离5-7m)
|
||||
if (by7 < by5) {
|
||||
auto dets = DetectSingleZone(frame, roi_x, roi_y + by7, roi_w, by5 - by7, scale_mid_);
|
||||
for (auto& det : dets) {
|
||||
det.bbox.x += roi_x;
|
||||
det.bbox.y += roi_y + by7;
|
||||
if (det.has_landmarks) {
|
||||
for (auto& lm : det.landmarks) {
|
||||
lm.x += roi_x;
|
||||
lm.y += roi_y + by7;
|
||||
}
|
||||
}
|
||||
all_dets.push_back(det);
|
||||
}
|
||||
}
|
||||
|
||||
// 远区检测 (画面上方,y小,远距离7-9m)
|
||||
if (by7 > 0) {
|
||||
auto dets = DetectSingleZone(frame, roi_x, roi_y, roi_w, by7, scale_far_);
|
||||
for (auto& det : dets) {
|
||||
det.bbox.x += roi_x;
|
||||
det.bbox.y += roi_y;
|
||||
if (det.has_landmarks) {
|
||||
for (auto& lm : det.landmarks) {
|
||||
lm.x += roi_x;
|
||||
lm.y += roi_y;
|
||||
}
|
||||
}
|
||||
all_dets.push_back(det);
|
||||
}
|
||||
}
|
||||
|
||||
return all_dets;
|
||||
}
|
||||
|
||||
std::vector<FaceDetItem> DetectSingleZone(FramePtr frame,
|
||||
int x, int y, int w, int h,
|
||||
float scale) {
|
||||
std::vector<FaceDetItem> dets;
|
||||
|
||||
if (w <= 0 || h <= 0) return dets;
|
||||
|
||||
const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
|
||||
const int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
|
||||
: (frame->stride > 0 ? frame->stride : frame->width * 3);
|
||||
|
||||
// 裁剪区域缩放后的尺寸
|
||||
int crop_w = static_cast<int>(w * scale);
|
||||
int crop_h = static_cast<int>(h * scale);
|
||||
if (crop_w <= 0 || crop_h <= 0) return dets;
|
||||
|
||||
// 分配缓冲区
|
||||
input_buf_.resize(static_cast<size_t>(model_w_) * model_h_ * 3);
|
||||
|
||||
// 双线性缩放到模型输入尺寸
|
||||
// 注意:这里从原图裁剪(x,y,w,h),缩放到(model_w_, model_h_)
|
||||
// 优化:可以直接从src裁剪并缩放,避免中间buffer
|
||||
|
||||
// 简化的处理:先裁剪到临时buffer,再缩放
|
||||
std::vector<uint8_t> crop_buf(static_cast<size_t>(w) * h * 3);
|
||||
for (int row = 0; row < h; ++row) {
|
||||
const uint8_t* src_row = src + (y + row) * src_stride + x * 3;
|
||||
uint8_t* dst_row = crop_buf.data() + row * w * 3;
|
||||
memcpy(dst_row, src_row, static_cast<size_t>(w) * 3);
|
||||
}
|
||||
|
||||
// 缩放到模型输入尺寸
|
||||
ResizeRgbBilinear(crop_buf.data(), w, h, w * 3,
|
||||
input_buf_.data(), model_w_, model_h_,
|
||||
false); // 假设输入已经是RGB
|
||||
|
||||
// NPU推理
|
||||
InferInput input;
|
||||
input.width = model_w_;
|
||||
input.height = model_h_;
|
||||
input.is_nhwc = true;
|
||||
input.data = input_buf_.data();
|
||||
input.size = input_buf_.size();
|
||||
input.type = RKNN_TENSOR_UINT8;
|
||||
|
||||
auto r = infer_backend_->InferBorrowed(model_handle_, input);
|
||||
if (!r.success || r.outputs.empty()) {
|
||||
LogWarn("[ai_face_det_zoned] inference failed");
|
||||
return dets;
|
||||
}
|
||||
|
||||
// 解析输出
|
||||
NcTensor loc_tensor, conf_tensor, landm_tensor;
|
||||
bool has_loc = false, has_conf = false, has_landm = false;
|
||||
|
||||
for (const auto& o : r.outputs) {
|
||||
TensorView tv = ConvertToTensorView(o);
|
||||
NcTensor tmp;
|
||||
if (!has_loc && ExtractNcTensor(tv, 4, tmp)) {
|
||||
loc_tensor = std::move(tmp);
|
||||
has_loc = true;
|
||||
} else if (!has_conf && ExtractNcTensor(tv, 2, tmp)) {
|
||||
conf_tensor = std::move(tmp);
|
||||
has_conf = true;
|
||||
} else if (!has_landm && ExtractNcTensor(tv, 10, tmp)) {
|
||||
landm_tensor = std::move(tmp);
|
||||
has_landm = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_loc || !has_conf) return dets;
|
||||
|
||||
// 解码检测结果
|
||||
FaceDetResult result;
|
||||
DecodeRetinaFace(loc_tensor, conf_tensor, landm_tensor,
|
||||
priors_, w, h, model_w_, model_h_,
|
||||
det_cfg_, result);
|
||||
|
||||
if (!result.faces.empty()) {
|
||||
LogInfo("[ai_face_det_zoned] DetectSingleZone: detected " +
|
||||
std::to_string(result.faces.size()) + " faces, max_score=" +
|
||||
std::to_string(result.faces.empty() ? 0 : result.faces[0].score));
|
||||
}
|
||||
|
||||
return result.faces;
|
||||
}
|
||||
|
||||
std::vector<FaceDetItem> ApplyNMS(std::vector<FaceDetItem>& dets, float threshold) {
|
||||
if (dets.empty()) return dets;
|
||||
|
||||
// 按置信度排序
|
||||
std::sort(dets.begin(), dets.end(),
|
||||
[](const FaceDetItem& a, const FaceDetItem& b) {
|
||||
return a.score > b.score;
|
||||
});
|
||||
|
||||
std::vector<FaceDetItem> keep;
|
||||
std::vector<bool> suppressed(dets.size(), false);
|
||||
|
||||
for (size_t i = 0; i < dets.size(); ++i) {
|
||||
if (suppressed[i]) continue;
|
||||
keep.push_back(dets[i]);
|
||||
|
||||
for (size_t j = i + 1; j < dets.size(); ++j) {
|
||||
if (suppressed[j]) continue;
|
||||
if (IoU(dets[i].bbox, dets[j].bbox) > threshold) {
|
||||
suppressed[j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return keep;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
std::string id_;
|
||||
std::string model_path_;
|
||||
|
||||
DetectionConfig det_cfg_;
|
||||
int model_w_ = 320;
|
||||
int model_h_ = 320;
|
||||
|
||||
// ROI
|
||||
bool roi_enabled_ = false;
|
||||
int roi_x_ = 0, roi_y_ = 0, roi_w_ = 0, roi_h_ = 0;
|
||||
|
||||
// 三分区
|
||||
bool zones_enabled_ = false;
|
||||
int boundary_y_5m_ = 0;
|
||||
int boundary_y_7m_ = 0;
|
||||
float scale_near_ = 1.0f;
|
||||
float scale_mid_ = 1.3f;
|
||||
float scale_far_ = 1.8f;
|
||||
|
||||
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
|
||||
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
||||
std::shared_ptr<IInferBackend> infer_backend_;
|
||||
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
ModelHandle model_handle_ = kInvalidModelHandle;
|
||||
std::vector<Prior> priors_;
|
||||
std::vector<uint8_t> input_buf_;
|
||||
#endif
|
||||
};
|
||||
|
||||
REGISTER_NODE(AiFaceDetZonedNode, "ai_face_det_zoned");
|
||||
|
||||
} // namespace rk3588
|
||||
311
plugins/ai_scrfd_sliding/ai_scrfd_sliding_node.cpp
Normal file
311
plugins/ai_scrfd_sliding/ai_scrfd_sliding_node.cpp
Normal file
@ -0,0 +1,311 @@
|
||||
/**
|
||||
* ai_scrfd_sliding - SCRFD with sliding window detection
|
||||
*
|
||||
* Features:
|
||||
* 1. Resize input to target height (640) keeping approximate ratio
|
||||
* 2. Split into multiple 640x640 windows
|
||||
* 3. Detect on each window and merge results
|
||||
*
|
||||
* For 1080p: resize to 1280x640, 2 windows
|
||||
* For 1440p: resize to 2560x640, 4 windows
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "face/face_detection_utils.h"
|
||||
#include "face/face_result.h"
|
||||
#include "face/scrfd_detector.h"
|
||||
#include "hw/i_infer_backend.h"
|
||||
#include "node.h"
|
||||
#include "utils/dma_alloc.h"
|
||||
#include "utils/logger.h"
|
||||
|
||||
namespace rk3588 {
|
||||
|
||||
using namespace face_detection;
|
||||
|
||||
class AiScrfdSlidingNode : public INode {
|
||||
public:
|
||||
std::string Id() const override { return id_; }
|
||||
std::string Type() const override { return "ai_scrfd_sliding"; }
|
||||
|
||||
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
|
||||
id_ = config.ValueOr<std::string>("id", "scrfd_sliding");
|
||||
model_path_ = config.ValueOr<std::string>("model_path",
|
||||
"./models/scrfd_500m_640.rknn");
|
||||
|
||||
// Detection parameters
|
||||
det_cfg_.conf_thresh = config.ValueOr<float>("conf_thresh", 0.3f);
|
||||
det_cfg_.nms_thresh = config.ValueOr<float>("nms_thresh", 0.4f);
|
||||
det_cfg_.max_faces = config.ValueOr<int>("max_faces", 50);
|
||||
det_cfg_.output_landmarks = config.ValueOr<bool>("output_landmarks", true);
|
||||
|
||||
model_w_ = 640;
|
||||
model_h_ = 640;
|
||||
|
||||
// Initialize detector
|
||||
detector_.Init(model_w_, model_h_);
|
||||
|
||||
// Parse sliding windows config
|
||||
// If not configured, auto-calculate based on input resolution
|
||||
windows_.clear();
|
||||
if (const SimpleJson* win_arr = config.Find("windows"); win_arr && win_arr->IsArray()) {
|
||||
for (const auto& w : win_arr->AsArray()) {
|
||||
if (w.IsObject()) {
|
||||
Window win;
|
||||
win.x = w.ValueOr<int>("x", 0);
|
||||
win.y = w.ValueOr<int>("y", 0);
|
||||
win.w = w.ValueOr<int>("w", 640);
|
||||
win.h = w.ValueOr<int>("h", 640);
|
||||
windows_.push_back(win);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Target resize height (default 640)
|
||||
target_height_ = config.ValueOr<int>("target_height", 640);
|
||||
|
||||
input_queue_ = ctx.input_queue;
|
||||
output_queues_ = ctx.output_queues;
|
||||
if (!input_queue_) {
|
||||
LogError("[ai_scrfd_sliding] no input queue");
|
||||
return false;
|
||||
}
|
||||
|
||||
infer_backend_ = ctx.infer_backend;
|
||||
if (!infer_backend_) {
|
||||
LogError("[ai_scrfd_sliding] no infer backend");
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
std::string err;
|
||||
model_handle_ = infer_backend_->LoadModel(model_path_, err);
|
||||
if (model_handle_ == kInvalidModelHandle) {
|
||||
LogError("[ai_scrfd_sliding] failed to load model: " + err);
|
||||
return false;
|
||||
}
|
||||
|
||||
input_buf_.resize(model_w_ * model_h_ * 3);
|
||||
|
||||
LogInfo("[ai_scrfd_sliding] model loaded: " + model_path_);
|
||||
#else
|
||||
LogWarn("[ai_scrfd_sliding] RKNN disabled");
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Start() override {
|
||||
LogInfo("[ai_scrfd_sliding] start, windows=" + std::to_string(windows_.size()));
|
||||
return true;
|
||||
}
|
||||
|
||||
void Stop() override {
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
if (model_handle_ != kInvalidModelHandle) {
|
||||
infer_backend_->UnloadModel(model_handle_);
|
||||
model_handle_ = kInvalidModelHandle;
|
||||
}
|
||||
#endif
|
||||
LogInfo("[ai_scrfd_sliding] stop");
|
||||
}
|
||||
|
||||
NodeStatus Process(FramePtr frame) override {
|
||||
if (!frame) return NodeStatus::DROP;
|
||||
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
RunDetection(frame);
|
||||
#endif
|
||||
|
||||
Push(frame);
|
||||
return NodeStatus::OK;
|
||||
}
|
||||
|
||||
private:
|
||||
struct Window {
|
||||
int x, y, w, h;
|
||||
};
|
||||
|
||||
void Push(FramePtr frame) {
|
||||
for (auto& q : output_queues_) q->Push(frame);
|
||||
}
|
||||
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
|
||||
void RunDetection(FramePtr frame) {
|
||||
if (!frame->data || frame->data_size == 0) return;
|
||||
|
||||
const int src_w = frame->width;
|
||||
const int src_h = frame->height;
|
||||
|
||||
if (frame->DmaFd() >= 0) frame->SyncStart();
|
||||
|
||||
// Calculate windows if not pre-configured
|
||||
std::vector<Window> windows = windows_;
|
||||
if (windows.empty()) {
|
||||
windows = CalculateWindows(src_w, src_h);
|
||||
}
|
||||
|
||||
std::vector<FaceDetItem> all_detections;
|
||||
|
||||
const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
|
||||
const int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
|
||||
: (frame->stride > 0 ? frame->stride : frame->width * 3);
|
||||
|
||||
// Process each window - crop from original, then resize to 640x640
|
||||
for (size_t i = 0; i < windows.size(); ++i) {
|
||||
const auto& win = windows[i];
|
||||
auto dets = DetectWindowFromSource(src, src_w, src_h, src_stride, win);
|
||||
|
||||
// Detections are already in original coordinates
|
||||
all_detections.insert(all_detections.end(), dets.begin(), dets.end());
|
||||
}
|
||||
|
||||
// Apply NMS
|
||||
all_detections = detector_.ApplyNMS(all_detections, det_cfg_.nms_thresh);
|
||||
|
||||
if (all_detections.size() > static_cast<size_t>(det_cfg_.max_faces)) {
|
||||
all_detections.resize(det_cfg_.max_faces);
|
||||
}
|
||||
|
||||
FaceDetResult result;
|
||||
result.img_w = src_w;
|
||||
result.img_h = src_h;
|
||||
result.model_name = "scrfd_sliding";
|
||||
result.faces = std::move(all_detections);
|
||||
|
||||
frame->face_det = std::make_shared<FaceDetResult>(std::move(result));
|
||||
}
|
||||
|
||||
std::vector<Window> CalculateWindows(int src_w, int src_h) {
|
||||
std::vector<Window> windows;
|
||||
|
||||
// Strategy: Split source image into overlapping 640x640 regions
|
||||
// For 1080p: 1920x1080 -> 3x2 grid (6 windows)
|
||||
// For 1440p: 2560x1440 -> 4x2 grid (8 windows)
|
||||
|
||||
// Calculate step size (with overlap)
|
||||
int step_x = (src_w <= 640) ? src_w : (src_w - 640) / ((src_w + 639) / 640 - 1);
|
||||
int step_y = (src_h <= 640) ? src_h : (src_h - 640) / ((src_h + 639) / 640 - 1);
|
||||
|
||||
if (step_x < 640) step_x = 640;
|
||||
if (step_y < 640) step_y = 640;
|
||||
|
||||
for (int y = 0; y < src_h; y += step_y) {
|
||||
for (int x = 0; x < src_w; x += step_x) {
|
||||
Window win;
|
||||
win.x = x;
|
||||
win.y = y;
|
||||
win.w = 640;
|
||||
win.h = 640;
|
||||
windows.push_back(win);
|
||||
|
||||
// Stop if we've covered the width
|
||||
if (x + 640 >= src_w) break;
|
||||
}
|
||||
// Stop if we've covered the height
|
||||
if (y + 640 >= src_h) break;
|
||||
}
|
||||
|
||||
LogInfo("[ai_scrfd_sliding] Auto-calculated: " + std::to_string(windows.size()) + " windows for " + std::to_string(src_w) + "x" + std::to_string(src_h));
|
||||
|
||||
return windows;
|
||||
}
|
||||
|
||||
std::vector<FaceDetItem> DetectWindowFromSource(const uint8_t* src, int src_w, int src_h, int src_stride, const Window& win) {
|
||||
std::vector<FaceDetItem> dets;
|
||||
|
||||
// Clamp window to source bounds
|
||||
int win_x = std::max(0, std::min(win.x, src_w - 1));
|
||||
int win_y = std::max(0, std::min(win.y, src_h - 1));
|
||||
int win_w = std::min(win.w, src_w - win_x);
|
||||
int win_h = std::min(win.h, src_h - win_y);
|
||||
|
||||
if (win_w <= 0 || win_h <= 0) {
|
||||
LogWarn("[ai_scrfd_sliding] Invalid window");
|
||||
return dets;
|
||||
}
|
||||
|
||||
// Crop from source
|
||||
std::vector<uint8_t> crop_buf(static_cast<size_t>(win_w) * win_h * 3);
|
||||
for (int row = 0; row < win_h; ++row) {
|
||||
const uint8_t* src_row = src + (win_y + row) * src_stride + win_x * 3;
|
||||
uint8_t* dst_row = crop_buf.data() + row * win_w * 3;
|
||||
memcpy(dst_row, src_row, static_cast<size_t>(win_w) * 3);
|
||||
}
|
||||
|
||||
// Resize to 640x640
|
||||
std::vector<uint8_t> model_input(640 * 640 * 3);
|
||||
ResizeRgbBilinear(crop_buf.data(), win_w, win_h, win_w * 3,
|
||||
model_input.data(), 640, 640, false);
|
||||
|
||||
// NPU inference
|
||||
InferInput input;
|
||||
input.width = 640;
|
||||
input.height = 640;
|
||||
input.is_nhwc = true;
|
||||
input.data = model_input.data();
|
||||
input.size = model_input.size();
|
||||
input.type = RKNN_TENSOR_UINT8;
|
||||
|
||||
auto r = infer_backend_->InferBorrowed(model_handle_, input);
|
||||
if (!r.success || r.outputs.empty()) {
|
||||
LogWarn("[ai_scrfd_sliding] inference failed");
|
||||
return dets;
|
||||
}
|
||||
|
||||
// Decode (get detections in 640x640 coordinates)
|
||||
dets = detector_.Decode(r.outputs, 640, 640, det_cfg_);
|
||||
|
||||
// Map back to original coordinates
|
||||
float scale_x = static_cast<float>(win_w) / 640.0f;
|
||||
float scale_y = static_cast<float>(win_h) / 640.0f;
|
||||
|
||||
for (auto& det : dets) {
|
||||
det.bbox.x = win_x + det.bbox.x * scale_x;
|
||||
det.bbox.y = win_y + det.bbox.y * scale_y;
|
||||
det.bbox.w *= scale_x;
|
||||
det.bbox.h *= scale_y;
|
||||
if (det.has_landmarks) {
|
||||
for (auto& lm : det.landmarks) {
|
||||
lm.x = win_x + lm.x * scale_x;
|
||||
lm.y = win_y + lm.y * scale_y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dets;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
std::string id_;
|
||||
std::string model_path_;
|
||||
ScrfdConfig det_cfg_;
|
||||
ScrfdDetector detector_;
|
||||
int model_w_ = 640;
|
||||
int model_h_ = 640;
|
||||
int target_height_ = 640;
|
||||
|
||||
std::vector<Window> windows_;
|
||||
|
||||
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
|
||||
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
||||
std::shared_ptr<IInferBackend> infer_backend_;
|
||||
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
ModelHandle model_handle_ = kInvalidModelHandle;
|
||||
std::vector<uint8_t> input_buf_;
|
||||
#endif
|
||||
};
|
||||
|
||||
REGISTER_NODE(AiScrfdSlidingNode, "ai_scrfd_sliding");
|
||||
|
||||
} // namespace rk3588
|
||||
@ -428,10 +428,7 @@ public:
|
||||
for (const auto& d : frame->det->items) {
|
||||
if (d.cls_id == 10) no_boots_count++;
|
||||
}
|
||||
if (no_boots_count > 0 || processed_frames_ % 30 == 0) {
|
||||
LogInfo("[alarm] frame received, dets=" + std::to_string(frame->det->items.size()) +
|
||||
" no_boots=" + std::to_string(no_boots_count));
|
||||
}
|
||||
// Log throttled
|
||||
}
|
||||
|
||||
if (eval_interval_ms_ > 0 && frame->pts > 0) {
|
||||
|
||||
@ -128,6 +128,30 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
// 将检测坐标(相对于原始图像)映射到当前帧坐标
|
||||
Rect MapDetCoordToFrame(const Rect& det_bbox, FramePtr frame) {
|
||||
if (!frame->transform_meta || !frame->transform_meta->valid) {
|
||||
return det_bbox; // 无变换信息,直接使用
|
||||
}
|
||||
|
||||
const auto& meta = *frame->transform_meta;
|
||||
if (meta.src_w <= 0 || meta.src_h <= 0 || frame->width <= 0 || frame->height <= 0) {
|
||||
return det_bbox;
|
||||
}
|
||||
|
||||
// 计算缩放因子:检测坐标是基于 src_w x src_h 的
|
||||
float scale_x = static_cast<float>(frame->width) / meta.src_w;
|
||||
float scale_y = static_cast<float>(frame->height) / meta.src_h;
|
||||
|
||||
Rect mapped;
|
||||
mapped.x = det_bbox.x * scale_x;
|
||||
mapped.y = det_bbox.y * scale_y;
|
||||
mapped.w = det_bbox.w * scale_x;
|
||||
mapped.h = det_bbox.h * scale_y;
|
||||
|
||||
return mapped;
|
||||
}
|
||||
|
||||
void ProcessPpeBootsCheck(FramePtr frame) {
|
||||
const auto& detections = frame->det->items;
|
||||
|
||||
@ -145,7 +169,12 @@ private:
|
||||
|
||||
if (config_.debug) {
|
||||
LogInfo("[LogicGateNode] Persons=" + std::to_string(persons.size()) +
|
||||
" Boots=" + std::to_string(boots.size()));
|
||||
" Boots=" + std::to_string(boots.size()) +
|
||||
" Frame=" + std::to_string(frame->width) + "x" + std::to_string(frame->height));
|
||||
if (frame->transform_meta && frame->transform_meta->valid) {
|
||||
LogInfo("[LogicGateNode] TransformMeta: src=" + std::to_string(frame->transform_meta->src_w) +
|
||||
"x" + std::to_string(frame->transform_meta->src_h));
|
||||
}
|
||||
}
|
||||
|
||||
// 简化逻辑:必须同时检测到人和鞋,才开始判断
|
||||
@ -158,7 +187,21 @@ private:
|
||||
// 对每只鞋进行颜色检查
|
||||
for (const auto& boot : boots) {
|
||||
if (config_.enable_color_check && color_analyzer_) {
|
||||
auto color_result = color_analyzer_->Analyze(*frame, boot.bbox);
|
||||
// 将检测坐标映射到当前帧坐标
|
||||
Rect mapped_bbox = MapDetCoordToFrame(boot.bbox, frame);
|
||||
|
||||
if (config_.debug) {
|
||||
LogInfo("[LogicGateNode] Boot bbox: [" + std::to_string(static_cast<int>(boot.bbox.x)) +
|
||||
"," + std::to_string(static_cast<int>(boot.bbox.y)) +
|
||||
" " + std::to_string(static_cast<int>(boot.bbox.w)) +
|
||||
"x" + std::to_string(static_cast<int>(boot.bbox.h)) +
|
||||
"] -> Mapped: [" + std::to_string(static_cast<int>(mapped_bbox.x)) +
|
||||
"," + std::to_string(static_cast<int>(mapped_bbox.y)) +
|
||||
" " + std::to_string(static_cast<int>(mapped_bbox.w)) +
|
||||
"x" + std::to_string(static_cast<int>(mapped_bbox.h)) + "]");
|
||||
}
|
||||
|
||||
auto color_result = color_analyzer_->Analyze(*frame, mapped_bbox);
|
||||
|
||||
if (config_.debug) {
|
||||
LogInfo("[LogicGateNode] Boot brightness=" +
|
||||
|
||||
157
src/face/scrfd_detector.cpp
Normal file
157
src/face/scrfd_detector.cpp
Normal file
@ -0,0 +1,157 @@
|
||||
/**
|
||||
* SCRFD Detector Implementation
|
||||
*/
|
||||
|
||||
#include "face/scrfd_detector.h"
|
||||
#include "ai_scheduler.h" // For BorrowedOutput
|
||||
#include "face/face_detection_utils.h"
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
namespace rk3588 {
|
||||
|
||||
ScrfdDetector::ScrfdDetector() = default;
|
||||
ScrfdDetector::~ScrfdDetector() = default;
|
||||
|
||||
void ScrfdDetector::Init(int model_w, int model_h) {
|
||||
model_w_ = model_w;
|
||||
model_h_ = model_h;
|
||||
|
||||
// Generate center points
|
||||
const int strides[] = {8, 16, 32};
|
||||
|
||||
for (int stride : strides) {
|
||||
int num_grid = model_w_ / stride;
|
||||
for (int y = 0; y < num_grid; ++y) {
|
||||
for (int x = 0; x < num_grid; ++x) {
|
||||
// 2 anchors per location
|
||||
for (int a = 0; a < 2; ++a) {
|
||||
CenterPoint pt;
|
||||
pt.cx = static_cast<float>(x);
|
||||
pt.cy = static_cast<float>(y);
|
||||
pt.stride = static_cast<float>(stride);
|
||||
center_points_.push_back(pt);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<FaceDetItem> ScrfdDetector::Decode(
|
||||
const std::vector<AiScheduler::BorrowedOutput>& outputs,
|
||||
int src_w, int src_h,
|
||||
const ScrfdConfig& cfg) {
|
||||
|
||||
std::vector<FaceDetItem> detections;
|
||||
|
||||
if (outputs.size() != 9) return detections;
|
||||
|
||||
// Output order: score_8, score_16, score_32, bbox_8, bbox_16, bbox_32, kps_8, kps_16, kps_32
|
||||
const int anchor_counts[] = {12800, 3200, 800};
|
||||
const int strides[] = {8, 16, 32};
|
||||
|
||||
size_t anchor_idx = 0;
|
||||
|
||||
for (int s = 0; s < 3; ++s) {
|
||||
int stride = strides[s];
|
||||
int count = anchor_counts[s];
|
||||
|
||||
const auto& score_out = outputs[s];
|
||||
const auto& bbox_out = outputs[s + 3];
|
||||
const auto& kps_out = outputs[s + 6];
|
||||
|
||||
if (score_out.dims.size() < 3) continue;
|
||||
|
||||
const float* scores = reinterpret_cast<const float*>(score_out.data);
|
||||
const float* bboxes = reinterpret_cast<const float*>(bbox_out.data);
|
||||
const float* kps = reinterpret_cast<const float*>(kps_out.data);
|
||||
|
||||
if (!scores || !bboxes || !kps) continue;
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (anchor_idx >= center_points_.size()) break;
|
||||
|
||||
float score = scores[i];
|
||||
if (score < cfg.conf_thresh) {
|
||||
anchor_idx++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const CenterPoint& pt = center_points_[anchor_idx];
|
||||
|
||||
// BBox: [left, top, right, bottom] - distances from center
|
||||
float left = bboxes[i * 4 + 0];
|
||||
float top = bboxes[i * 4 + 1];
|
||||
float right = bboxes[i * 4 + 2];
|
||||
float bottom = bboxes[i * 4 + 3];
|
||||
|
||||
// Decode to image coordinates (640x640)
|
||||
float x1_640 = (pt.cx - left) * stride;
|
||||
float y1_640 = (pt.cy - top) * stride;
|
||||
float x2_640 = (pt.cx + right) * stride;
|
||||
float y2_640 = (pt.cy + bottom) * stride;
|
||||
|
||||
// Scale to original image size
|
||||
float scale_x = static_cast<float>(src_w) / model_w_;
|
||||
float scale_y = static_cast<float>(src_h) / model_h_;
|
||||
|
||||
FaceDetItem det;
|
||||
det.bbox.x = x1_640 * scale_x;
|
||||
det.bbox.y = y1_640 * scale_y;
|
||||
det.bbox.w = (x2_640 - x1_640) * scale_x;
|
||||
det.bbox.h = (y2_640 - y1_640) * scale_y;
|
||||
det.score = score;
|
||||
det.has_landmarks = cfg.output_landmarks;
|
||||
|
||||
// Keypoints
|
||||
if (cfg.output_landmarks) {
|
||||
for (int p = 0; p < 5; ++p) {
|
||||
float kps_x = kps[i * 10 + p * 2 + 0];
|
||||
float kps_y = kps[i * 10 + p * 2 + 1];
|
||||
float kx_640 = (pt.cx + kps_x) * stride;
|
||||
float ky_640 = (pt.cy + kps_y) * stride;
|
||||
det.landmarks[p].x = kx_640 * scale_x;
|
||||
det.landmarks[p].y = ky_640 * scale_y;
|
||||
}
|
||||
}
|
||||
|
||||
detections.push_back(det);
|
||||
anchor_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
return detections;
|
||||
}
|
||||
|
||||
std::vector<FaceDetItem> ScrfdDetector::ApplyNMS(
|
||||
std::vector<FaceDetItem>& dets,
|
||||
float nms_thresh) {
|
||||
|
||||
if (dets.empty()) return dets;
|
||||
|
||||
// Sort by score
|
||||
std::sort(dets.begin(), dets.end(),
|
||||
[](const FaceDetItem& a, const FaceDetItem& b) {
|
||||
return a.score > b.score;
|
||||
});
|
||||
|
||||
std::vector<FaceDetItem> keep;
|
||||
std::vector<bool> suppressed(dets.size(), false);
|
||||
|
||||
for (size_t i = 0; i < dets.size(); ++i) {
|
||||
if (suppressed[i]) continue;
|
||||
|
||||
keep.push_back(dets[i]);
|
||||
|
||||
for (size_t j = i + 1; j < dets.size(); ++j) {
|
||||
if (suppressed[j]) continue;
|
||||
if (face_detection::IoU(dets[i].bbox, dets[j].bbox) > nms_thresh) {
|
||||
suppressed[j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return keep;
|
||||
}
|
||||
|
||||
} // namespace rk3588
|
||||
@ -58,7 +58,6 @@
|
||||
|
||||
<script>
|
||||
const streams = [
|
||||
{ name: 'SCRFD Face Detection', url: '/hls/scrfd/index.m3u8' },
|
||||
{ name: 'Cam 1', url: '/hls/cam1/index.m3u8' },
|
||||
{ name: 'Cam 2', url: '/hls/cam2/index.m3u8' },
|
||||
{ name: 'Cam 3', url: '/hls/cam3/index.m3u8' },
|
||||
|
||||
Loading…
Reference in New Issue
Block a user