Compare commits
1 Commits
master
...
codex/pose
| Author | SHA1 | Date | |
|---|---|---|---|
| e0827e984e |
37
Readme.md
37
Readme.md
@ -1266,3 +1266,40 @@ rsync -avz build-cross/ user@board_ip:/opt/media-server/
|
|||||||
- [docs/models.md](docs/models.md) - 模型转换与部署指南
|
- [docs/models.md](docs/models.md) - 模型转换与部署指南
|
||||||
- [docs/Agent_API_Extensions.md](docs/Agent_API_Extensions.md) - Agent API 扩展说明
|
- [docs/Agent_API_Extensions.md](docs/Agent_API_Extensions.md) - Agent API 扩展说明
|
||||||
- [docs/API_Device_RemoteMgmt_InterfaceTable.md](docs/API_Device_RemoteMgmt_InterfaceTable.md) - 远程管理接口
|
- [docs/API_Device_RemoteMgmt_InterfaceTable.md](docs/API_Device_RemoteMgmt_InterfaceTable.md) - 远程管理接口
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pose Behavior Pipeline
|
||||||
|
|
||||||
|
The pose behavior pipeline is optional and does not replace the existing detection pipeline.
|
||||||
|
|
||||||
|
Baseline behavior graph:
|
||||||
|
|
||||||
|
```text
|
||||||
|
input_rtsp -> preprocess -> ai_yolo -> tracker -> region_event -> action_recog -> event_fusion
|
||||||
|
```
|
||||||
|
|
||||||
|
Pose-enabled behavior graph:
|
||||||
|
|
||||||
|
```text
|
||||||
|
input_rtsp -> preprocess -> ai_yolo -> tracker -> ai_pose -> pose_assoc -> region_event -> action_recog -> event_fusion
|
||||||
|
```
|
||||||
|
|
||||||
|
Engineering rules:
|
||||||
|
|
||||||
|
- `tracker` remains the source of stable `track_id` for the main pipeline.
|
||||||
|
- `ai_pose` only produces `Frame.pose` and does not replace `Frame.det`.
|
||||||
|
- `pose_assoc` is the canonical node for assigning `PoseItem.track_id`.
|
||||||
|
- `action_recog` may use pose-enhanced rules, but it must still run when pose nodes are absent.
|
||||||
|
|
||||||
|
Degradation semantics:
|
||||||
|
|
||||||
|
- If `ai_pose` is not present, the graph keeps the original bbox-based behavior logic.
|
||||||
|
- If `ai_pose` is present but inference fails on some frames, `Frame.pose` may be empty and downstream nodes must continue with bbox data.
|
||||||
|
- If `pose_assoc` is absent, downstream nodes should treat `pose.track_id` as optional and may fall back to bbox matching only as a compatibility path.
|
||||||
|
|
||||||
|
Sample configs:
|
||||||
|
|
||||||
|
- `configs/sample_region_behavior_intrusion.json`
|
||||||
|
- Baseline intrusion-only graph without pose nodes. This is the reference for the non-pose deployment path.
|
||||||
|
- `configs/sample_region_behavior_full.json`
|
||||||
|
- Full behavior graph with `ai_pose` and `pose_assoc`, intended for pose-aware `fall` and `fight`.
|
||||||
|
|||||||
@ -60,6 +60,24 @@
|
|||||||
"max_age_ms": 1200,
|
"max_age_ms": 1200,
|
||||||
"min_hits": 2
|
"min_hits": 2
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": "pose",
|
||||||
|
"type": "ai_pose",
|
||||||
|
"role": "filter",
|
||||||
|
"enable": true,
|
||||||
|
"model_path": "./models/yolov8n-pose.rknn",
|
||||||
|
"model_input_w": 640,
|
||||||
|
"model_input_h": 640,
|
||||||
|
"conf_thresh": 0.25,
|
||||||
|
"nms_thresh": 0.45
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "pose_assoc",
|
||||||
|
"type": "pose_assoc",
|
||||||
|
"role": "filter",
|
||||||
|
"enable": true,
|
||||||
|
"min_iou": 0.1
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": "region_evt",
|
"id": "region_evt",
|
||||||
"type": "region_event",
|
"type": "region_event",
|
||||||
@ -90,16 +108,38 @@
|
|||||||
{
|
{
|
||||||
"type": "fall",
|
"type": "fall",
|
||||||
"window_ms": 1500,
|
"window_ms": 1500,
|
||||||
|
"activate_duration_ms": 300,
|
||||||
|
"bbox": {
|
||||||
|
"enabled": true,
|
||||||
"min_drop_pixels": 120,
|
"min_drop_pixels": 120,
|
||||||
"min_aspect_ratio_delta": 0.35,
|
"min_aspect_ratio_delta": 0.35
|
||||||
"activate_duration_ms": 300
|
},
|
||||||
|
"pose": {
|
||||||
|
"enabled": true,
|
||||||
|
"min_torso_drop_pixels": 120,
|
||||||
|
"max_upright_ratio": 0.60
|
||||||
|
},
|
||||||
|
"fusion": {
|
||||||
|
"match_mode": "any"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "fight",
|
"type": "fight",
|
||||||
"window_ms": 1200,
|
"window_ms": 1200,
|
||||||
|
"activate_duration_ms": 200,
|
||||||
|
"bbox": {
|
||||||
|
"enabled": true,
|
||||||
"proximity_pixels": 220,
|
"proximity_pixels": 220,
|
||||||
"min_motion_pixels": 90,
|
"min_motion_pixels": 90
|
||||||
"activate_duration_ms": 200
|
},
|
||||||
|
"pose": {
|
||||||
|
"enabled": true,
|
||||||
|
"min_wrist_motion_pixels": 120,
|
||||||
|
"max_wrist_distance_pixels": 120
|
||||||
|
},
|
||||||
|
"fusion": {
|
||||||
|
"match_mode": "any"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -190,7 +230,9 @@
|
|||||||
["in", "pre"],
|
["in", "pre"],
|
||||||
["pre", "person_det"],
|
["pre", "person_det"],
|
||||||
["person_det", "trk"],
|
["person_det", "trk"],
|
||||||
["trk", "region_evt"],
|
["trk", "pose"],
|
||||||
|
["pose", "pose_assoc"],
|
||||||
|
["pose_assoc", "region_evt"],
|
||||||
["region_evt", "action_evt"],
|
["region_evt", "action_evt"],
|
||||||
["action_evt", "fusion"],
|
["action_evt", "fusion"],
|
||||||
["fusion", "osd"],
|
["fusion", "osd"],
|
||||||
|
|||||||
@ -554,3 +554,117 @@ input_rtsp
|
|||||||
|
|
||||||
**版本**:v2.0
|
**版本**:v2.0
|
||||||
**更新日期**:2026-03-15
|
**更新日期**:2026-03-15
|
||||||
|
## Pose-Enabled Behavior Configuration
|
||||||
|
|
||||||
|
This repository now supports two behavior deployment modes.
|
||||||
|
|
||||||
|
### Mode 1: Baseline Behavior Graph
|
||||||
|
|
||||||
|
Use this when you only need tracked detections and rule-based region behavior:
|
||||||
|
|
||||||
|
```text
|
||||||
|
input_rtsp -> preprocess -> ai_yolo -> tracker -> region_event -> action_recog -> event_fusion
|
||||||
|
```
|
||||||
|
|
||||||
|
Reference config:
|
||||||
|
|
||||||
|
- `configs/sample_region_behavior_intrusion.json`
|
||||||
|
|
||||||
|
Characteristics:
|
||||||
|
|
||||||
|
- No pose model dependency
|
||||||
|
- Lowest compute cost
|
||||||
|
- `fall` and `fight` rely on bbox-only rules
|
||||||
|
|
||||||
|
### Mode 2: Pose-Enabled Behavior Graph
|
||||||
|
|
||||||
|
Use this when you need pose-aware `fall` and `fight`:
|
||||||
|
|
||||||
|
```text
|
||||||
|
input_rtsp -> preprocess -> ai_yolo -> tracker -> ai_pose -> pose_assoc -> region_event -> action_recog -> event_fusion
|
||||||
|
```
|
||||||
|
|
||||||
|
Reference config:
|
||||||
|
|
||||||
|
- `configs/sample_region_behavior_full.json`
|
||||||
|
|
||||||
|
Characteristics:
|
||||||
|
|
||||||
|
- `ai_pose` writes `Frame.pose`
|
||||||
|
- `pose_assoc` assigns `PoseItem.track_id`
|
||||||
|
- `action_recog` can fuse bbox and pose signals per event rule
|
||||||
|
|
||||||
|
### Optional Integration Rules
|
||||||
|
|
||||||
|
- `ai_pose` is optional. Do not add it to graphs that only need intrusion or climb.
|
||||||
|
- `pose_assoc` should be placed after `ai_pose` and after tracked detections are already available on the frame.
|
||||||
|
- `action_recog` must remain runnable when `Frame.pose` is missing or empty.
|
||||||
|
|
||||||
|
### Degradation Semantics
|
||||||
|
|
||||||
|
- Without `ai_pose`, the graph must still behave correctly with bbox-only behavior logic.
|
||||||
|
- With `ai_pose` but without `pose_assoc`, `pose.track_id` is not guaranteed and downstream logic should treat it as optional compatibility mode.
|
||||||
|
- If pose inference returns no items for a frame, the rest of the graph must keep using `Frame.det` and `track_id`.
|
||||||
|
|
||||||
|
### Structured `action_recog` Rules
|
||||||
|
|
||||||
|
`action_recog` supports a structured event format with `bbox`, `pose`, and `fusion` sections.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "action_evt",
|
||||||
|
"type": "action_recog",
|
||||||
|
"events": [
|
||||||
|
{
|
||||||
|
"type": "fall",
|
||||||
|
"window_ms": 1500,
|
||||||
|
"activate_duration_ms": 300,
|
||||||
|
"bbox": {
|
||||||
|
"enabled": true,
|
||||||
|
"min_drop_pixels": 120,
|
||||||
|
"min_aspect_ratio_delta": 0.35
|
||||||
|
},
|
||||||
|
"pose": {
|
||||||
|
"enabled": true,
|
||||||
|
"min_torso_drop_pixels": 120,
|
||||||
|
"max_upright_ratio": 0.60
|
||||||
|
},
|
||||||
|
"fusion": {
|
||||||
|
"match_mode": "any"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "fight",
|
||||||
|
"window_ms": 1200,
|
||||||
|
"activate_duration_ms": 200,
|
||||||
|
"bbox": {
|
||||||
|
"enabled": true,
|
||||||
|
"proximity_pixels": 220,
|
||||||
|
"min_motion_pixels": 90
|
||||||
|
},
|
||||||
|
"pose": {
|
||||||
|
"enabled": true,
|
||||||
|
"min_wrist_motion_pixels": 120,
|
||||||
|
"max_wrist_distance_pixels": 120
|
||||||
|
},
|
||||||
|
"fusion": {
|
||||||
|
"match_mode": "any"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Rules:
|
||||||
|
|
||||||
|
- `bbox.enabled=false` disables bbox signal evaluation for that event.
|
||||||
|
- `pose.enabled=false` disables pose signal evaluation for that event.
|
||||||
|
- `fusion.match_mode="any"` means either bbox or pose may activate the event.
|
||||||
|
- `fusion.match_mode="all"` means bbox and pose must both activate the event.
|
||||||
|
|
||||||
|
Backward compatibility:
|
||||||
|
|
||||||
|
- Old flat keys such as `min_drop_pixels` and `pose_min_torso_drop_pixels` are still accepted.
|
||||||
|
- New configs should prefer the structured form above.
|
||||||
|
|||||||
@ -16,6 +16,7 @@ namespace rk3588 {
|
|||||||
|
|
||||||
struct FaceDetResult;
|
struct FaceDetResult;
|
||||||
struct FaceRecogResult;
|
struct FaceRecogResult;
|
||||||
|
struct PoseResult;
|
||||||
|
|
||||||
enum class PixelFormat {
|
enum class PixelFormat {
|
||||||
NV12,
|
NV12,
|
||||||
@ -82,6 +83,7 @@ struct Frame {
|
|||||||
|
|
||||||
std::shared_ptr<DetectionResult> det;
|
std::shared_ptr<DetectionResult> det;
|
||||||
std::shared_ptr<BehaviorEventResult> behavior_events;
|
std::shared_ptr<BehaviorEventResult> behavior_events;
|
||||||
|
std::shared_ptr<PoseResult> pose;
|
||||||
// Face recognition pipeline meta (kept separate from user_meta to avoid conflicts with publish).
|
// Face recognition pipeline meta (kept separate from user_meta to avoid conflicts with publish).
|
||||||
std::shared_ptr<FaceDetResult> face_det;
|
std::shared_ptr<FaceDetResult> face_det;
|
||||||
std::shared_ptr<FaceRecogResult> face_recog;
|
std::shared_ptr<FaceRecogResult> face_recog;
|
||||||
|
|||||||
34
include/pose/pose_result.h
Normal file
34
include/pose/pose_result.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "frame/rect.h"
|
||||||
|
|
||||||
|
namespace rk3588 {
|
||||||
|
|
||||||
|
struct PosePoint2f {
|
||||||
|
float x = 0.0f;
|
||||||
|
float y = 0.0f;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PoseKeypoint {
|
||||||
|
PosePoint2f point{};
|
||||||
|
float score = 0.0f;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PoseItem {
|
||||||
|
Rect bbox{};
|
||||||
|
float score = 0.0f;
|
||||||
|
int track_id = -1;
|
||||||
|
std::vector<PoseKeypoint> keypoints;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PoseResult {
|
||||||
|
std::vector<PoseItem> items;
|
||||||
|
int img_w = 0;
|
||||||
|
int img_h = 0;
|
||||||
|
std::string model_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rk3588
|
||||||
@ -482,6 +482,30 @@ set_target_properties(action_recog PROPERTIES
|
|||||||
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ai_pose plugin (pose inference scaffold)
|
||||||
|
add_library(ai_pose SHARED
|
||||||
|
ai_pose/ai_pose_node.cpp
|
||||||
|
)
|
||||||
|
target_include_directories(ai_pose PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
|
||||||
|
target_link_libraries(ai_pose PRIVATE project_options Threads::Threads)
|
||||||
|
set_target_properties(ai_pose PROPERTIES
|
||||||
|
OUTPUT_NAME "ai_pose"
|
||||||
|
LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||||
|
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
|
# pose_assoc plugin (assign pose.track_id from tracked detections)
|
||||||
|
add_library(pose_assoc SHARED
|
||||||
|
pose_assoc/pose_assoc_node.cpp
|
||||||
|
)
|
||||||
|
target_include_directories(pose_assoc PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
|
||||||
|
target_link_libraries(pose_assoc PRIVATE project_options Threads::Threads)
|
||||||
|
set_target_properties(pose_assoc PROPERTIES
|
||||||
|
OUTPUT_NAME "pose_assoc"
|
||||||
|
LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||||
|
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
# event_fusion plugin (behavior event id and lifecycle normalization)
|
# event_fusion plugin (behavior event id and lifecycle normalization)
|
||||||
add_library(event_fusion SHARED
|
add_library(event_fusion SHARED
|
||||||
event_fusion/event_fusion_node.cpp
|
event_fusion/event_fusion_node.cpp
|
||||||
@ -565,7 +589,7 @@ set_target_properties(ai_shoe_det PROPERTIES
|
|||||||
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
|
||||||
)
|
)
|
||||||
|
|
||||||
install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate region_event action_recog event_fusion storage ai_scheduler ai_shoe_det
|
install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate region_event action_recog ai_pose pose_assoc event_fusion storage ai_scheduler ai_shoe_det
|
||||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
|
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
|
||||||
RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
|
RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
|
||||||
)
|
)
|
||||||
|
|||||||
@ -4,6 +4,7 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
|
#include <limits>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <set>
|
#include <set>
|
||||||
@ -12,6 +13,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "behavior/behavior_event.h"
|
#include "behavior/behavior_event.h"
|
||||||
|
#include "pose/pose_result.h"
|
||||||
#include "utils/logger.h"
|
#include "utils/logger.h"
|
||||||
|
|
||||||
namespace rk3588 {
|
namespace rk3588 {
|
||||||
@ -22,21 +24,135 @@ enum class ActionEventKind {
|
|||||||
Fight
|
Fight
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class ActionSignalMode {
|
||||||
|
Any,
|
||||||
|
All
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FallBboxRule {
|
||||||
|
bool enabled = true;
|
||||||
|
float min_drop_pixels = 0.0f;
|
||||||
|
float min_aspect_ratio_delta = 0.0f;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FallPoseRule {
|
||||||
|
bool enabled = false;
|
||||||
|
float min_torso_drop_pixels = 0.0f;
|
||||||
|
float max_upright_ratio = 0.0f;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FightBboxRule {
|
||||||
|
bool enabled = true;
|
||||||
|
float proximity_pixels = 0.0f;
|
||||||
|
float min_motion_pixels = 0.0f;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FightPoseRule {
|
||||||
|
bool enabled = false;
|
||||||
|
float min_wrist_motion_pixels = 0.0f;
|
||||||
|
float max_wrist_distance_pixels = 0.0f;
|
||||||
|
};
|
||||||
|
|
||||||
struct ActionEventRule {
|
struct ActionEventRule {
|
||||||
ActionEventKind kind = ActionEventKind::Fall;
|
ActionEventKind kind = ActionEventKind::Fall;
|
||||||
uint64_t window_ms = 1000;
|
uint64_t window_ms = 1000;
|
||||||
uint64_t activate_duration_ms = 0;
|
uint64_t activate_duration_ms = 0;
|
||||||
float min_drop_pixels = 0.0f;
|
ActionSignalMode signal_mode = ActionSignalMode::Any;
|
||||||
float min_aspect_ratio_delta = 0.0f;
|
FallBboxRule fall_bbox{};
|
||||||
float proximity_pixels = 0.0f;
|
FallPoseRule fall_pose{};
|
||||||
float min_motion_pixels = 0.0f;
|
FightBboxRule fight_bbox{};
|
||||||
|
FightPoseRule fight_pose{};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PoseTrackSample {
|
||||||
|
bool matched = false;
|
||||||
|
float torso_center_y = 0.0f;
|
||||||
|
float upright_ratio = 0.0f;
|
||||||
|
float left_wrist_x = 0.0f;
|
||||||
|
float left_wrist_y = 0.0f;
|
||||||
|
float right_wrist_x = 0.0f;
|
||||||
|
float right_wrist_y = 0.0f;
|
||||||
|
bool has_left_wrist = false;
|
||||||
|
bool has_right_wrist = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TrackSample {
|
struct TrackSample {
|
||||||
uint64_t pts = 0;
|
uint64_t pts = 0;
|
||||||
Rect bbox{};
|
Rect bbox{};
|
||||||
|
PoseTrackSample pose{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static ActionSignalMode ParseSignalMode(const std::string& value) {
|
||||||
|
if (value == "all") return ActionSignalMode::All;
|
||||||
|
return ActionSignalMode::Any;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool EvaluateSignalMode(ActionSignalMode mode, bool bbox_trigger, bool pose_trigger,
|
||||||
|
bool bbox_enabled, bool pose_enabled) {
|
||||||
|
if (bbox_enabled && pose_enabled) {
|
||||||
|
return mode == ActionSignalMode::All ? (bbox_trigger && pose_trigger) : (bbox_trigger || pose_trigger);
|
||||||
|
}
|
||||||
|
if (bbox_enabled) return bbox_trigger;
|
||||||
|
if (pose_enabled) return pose_trigger;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ParseFallRule(const SimpleJson& ev, ActionEventRule& rule) {
|
||||||
|
rule.kind = ActionEventKind::Fall;
|
||||||
|
rule.fall_bbox.min_drop_pixels = ev.ValueOr<float>("min_drop_pixels", 0.0f);
|
||||||
|
rule.fall_bbox.min_aspect_ratio_delta = ev.ValueOr<float>("min_aspect_ratio_delta", 0.0f);
|
||||||
|
rule.fall_pose.min_torso_drop_pixels = ev.ValueOr<float>("pose_min_torso_drop_pixels", 0.0f);
|
||||||
|
rule.fall_pose.max_upright_ratio = ev.ValueOr<float>("pose_max_upright_ratio", 0.0f);
|
||||||
|
|
||||||
|
if (const SimpleJson* bbox = ev.Find("bbox"); bbox && bbox->IsObject()) {
|
||||||
|
rule.fall_bbox.enabled = bbox->ValueOr<bool>("enabled", true);
|
||||||
|
rule.fall_bbox.min_drop_pixels = bbox->ValueOr<float>("min_drop_pixels", rule.fall_bbox.min_drop_pixels);
|
||||||
|
rule.fall_bbox.min_aspect_ratio_delta =
|
||||||
|
bbox->ValueOr<float>("min_aspect_ratio_delta", rule.fall_bbox.min_aspect_ratio_delta);
|
||||||
|
} else {
|
||||||
|
rule.fall_bbox.enabled = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const SimpleJson* pose = ev.Find("pose"); pose && pose->IsObject()) {
|
||||||
|
rule.fall_pose.enabled = pose->ValueOr<bool>("enabled", true);
|
||||||
|
rule.fall_pose.min_torso_drop_pixels =
|
||||||
|
pose->ValueOr<float>("min_torso_drop_pixels", rule.fall_pose.min_torso_drop_pixels);
|
||||||
|
rule.fall_pose.max_upright_ratio =
|
||||||
|
pose->ValueOr<float>("max_upright_ratio", rule.fall_pose.max_upright_ratio);
|
||||||
|
} else {
|
||||||
|
rule.fall_pose.enabled = rule.fall_pose.min_torso_drop_pixels > 0.0f || rule.fall_pose.max_upright_ratio > 0.0f;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ParseFightRule(const SimpleJson& ev, ActionEventRule& rule) {
|
||||||
|
rule.kind = ActionEventKind::Fight;
|
||||||
|
rule.fight_bbox.proximity_pixels = ev.ValueOr<float>("proximity_pixels", 0.0f);
|
||||||
|
rule.fight_bbox.min_motion_pixels = ev.ValueOr<float>("min_motion_pixels", 0.0f);
|
||||||
|
rule.fight_pose.min_wrist_motion_pixels = ev.ValueOr<float>("pose_min_wrist_motion_pixels", 0.0f);
|
||||||
|
rule.fight_pose.max_wrist_distance_pixels = ev.ValueOr<float>("pose_max_wrist_distance_pixels", 0.0f);
|
||||||
|
|
||||||
|
if (const SimpleJson* bbox = ev.Find("bbox"); bbox && bbox->IsObject()) {
|
||||||
|
rule.fight_bbox.enabled = bbox->ValueOr<bool>("enabled", true);
|
||||||
|
rule.fight_bbox.proximity_pixels = bbox->ValueOr<float>("proximity_pixels", rule.fight_bbox.proximity_pixels);
|
||||||
|
rule.fight_bbox.min_motion_pixels = bbox->ValueOr<float>("min_motion_pixels", rule.fight_bbox.min_motion_pixels);
|
||||||
|
} else {
|
||||||
|
rule.fight_bbox.enabled = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const SimpleJson* pose = ev.Find("pose"); pose && pose->IsObject()) {
|
||||||
|
rule.fight_pose.enabled = pose->ValueOr<bool>("enabled", true);
|
||||||
|
rule.fight_pose.min_wrist_motion_pixels =
|
||||||
|
pose->ValueOr<float>("min_wrist_motion_pixels", rule.fight_pose.min_wrist_motion_pixels);
|
||||||
|
rule.fight_pose.max_wrist_distance_pixels =
|
||||||
|
pose->ValueOr<float>("max_wrist_distance_pixels", rule.fight_pose.max_wrist_distance_pixels);
|
||||||
|
} else {
|
||||||
|
rule.fight_pose.enabled =
|
||||||
|
rule.fight_pose.min_wrist_motion_pixels > 0.0f || rule.fight_pose.max_wrist_distance_pixels > 0.0f;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool ParseRules(const SimpleJson& config, std::vector<ActionEventRule>& out, std::string& err) {
|
static bool ParseRules(const SimpleJson& config, std::vector<ActionEventRule>& out, std::string& err) {
|
||||||
const SimpleJson* events = config.Find("events");
|
const SimpleJson* events = config.Find("events");
|
||||||
if (!events || !events->IsArray()) {
|
if (!events || !events->IsArray()) {
|
||||||
@ -55,15 +171,15 @@ static bool ParseRules(const SimpleJson& config, std::vector<ActionEventRule>& o
|
|||||||
const std::string type = ev.ValueOr<std::string>("type", "");
|
const std::string type = ev.ValueOr<std::string>("type", "");
|
||||||
rule.window_ms = static_cast<uint64_t>(std::max(0, ev.ValueOr<int>("window_ms", 1000)));
|
rule.window_ms = static_cast<uint64_t>(std::max(0, ev.ValueOr<int>("window_ms", 1000)));
|
||||||
rule.activate_duration_ms = static_cast<uint64_t>(std::max(0, ev.ValueOr<int>("activate_duration_ms", 0)));
|
rule.activate_duration_ms = static_cast<uint64_t>(std::max(0, ev.ValueOr<int>("activate_duration_ms", 0)));
|
||||||
|
rule.signal_mode = ParseSignalMode(ev.ValueOr<std::string>("signal_mode", "any"));
|
||||||
|
if (const SimpleJson* fusion = ev.Find("fusion"); fusion && fusion->IsObject()) {
|
||||||
|
rule.signal_mode = ParseSignalMode(fusion->ValueOr<std::string>("match_mode", "any"));
|
||||||
|
}
|
||||||
|
|
||||||
if (type == "fall") {
|
if (type == "fall") {
|
||||||
rule.kind = ActionEventKind::Fall;
|
ParseFallRule(ev, rule);
|
||||||
rule.min_drop_pixels = ev.ValueOr<float>("min_drop_pixels", 0.0f);
|
|
||||||
rule.min_aspect_ratio_delta = ev.ValueOr<float>("min_aspect_ratio_delta", 0.0f);
|
|
||||||
} else if (type == "fight") {
|
} else if (type == "fight") {
|
||||||
rule.kind = ActionEventKind::Fight;
|
ParseFightRule(ev, rule);
|
||||||
rule.proximity_pixels = ev.ValueOr<float>("proximity_pixels", 0.0f);
|
|
||||||
rule.min_motion_pixels = ev.ValueOr<float>("min_motion_pixels", 0.0f);
|
|
||||||
} else {
|
} else {
|
||||||
err = "unsupported event type: " + type;
|
err = "unsupported event type: " + type;
|
||||||
return false;
|
return false;
|
||||||
@ -98,6 +214,129 @@ static float AspectRatio(const Rect& rect) {
|
|||||||
return rect.h > 0.0f ? (rect.w / rect.h) : 0.0f;
|
return rect.h > 0.0f ? (rect.w / rect.h) : 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static float BboxIoU(const Rect& lhs, const Rect& rhs) {
|
||||||
|
const float x1 = std::max(lhs.x, rhs.x);
|
||||||
|
const float y1 = std::max(lhs.y, rhs.y);
|
||||||
|
const float x2 = std::min(lhs.x + lhs.w, rhs.x + rhs.w);
|
||||||
|
const float y2 = std::min(lhs.y + lhs.h, rhs.y + rhs.h);
|
||||||
|
const float iw = std::max(0.0f, x2 - x1);
|
||||||
|
const float ih = std::max(0.0f, y2 - y1);
|
||||||
|
const float inter = iw * ih;
|
||||||
|
const float uni = lhs.w * lhs.h + rhs.w * rhs.h - inter;
|
||||||
|
return uni <= 0.0f ? 0.0f : (inter / uni);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool IsValidKeypoint(const PoseItem& pose, int index) {
|
||||||
|
return index >= 0 &&
|
||||||
|
static_cast<size_t>(index) < pose.keypoints.size() &&
|
||||||
|
pose.keypoints[static_cast<size_t>(index)].score > 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PoseTrackSample BuildPoseTrackSample(const PoseItem& pose) {
|
||||||
|
PoseTrackSample sample;
|
||||||
|
sample.matched = true;
|
||||||
|
|
||||||
|
const bool has_left_shoulder = IsValidKeypoint(pose, 5);
|
||||||
|
const bool has_right_shoulder = IsValidKeypoint(pose, 6);
|
||||||
|
const bool has_left_hip = IsValidKeypoint(pose, 11);
|
||||||
|
const bool has_right_hip = IsValidKeypoint(pose, 12);
|
||||||
|
if (has_left_shoulder && has_right_shoulder && has_left_hip && has_right_hip) {
|
||||||
|
const PosePoint2f& ls = pose.keypoints[5].point;
|
||||||
|
const PosePoint2f& rs = pose.keypoints[6].point;
|
||||||
|
const PosePoint2f& lh = pose.keypoints[11].point;
|
||||||
|
const PosePoint2f& rh = pose.keypoints[12].point;
|
||||||
|
const float shoulder_center_y = (ls.y + rs.y) * 0.5f;
|
||||||
|
const float hip_center_y = (lh.y + rh.y) * 0.5f;
|
||||||
|
sample.torso_center_y = (shoulder_center_y + hip_center_y) * 0.5f;
|
||||||
|
const float min_x = std::min(std::min(ls.x, rs.x), std::min(lh.x, rh.x));
|
||||||
|
const float max_x = std::max(std::max(ls.x, rs.x), std::max(lh.x, rh.x));
|
||||||
|
const float min_y = std::min(std::min(ls.y, rs.y), std::min(lh.y, rh.y));
|
||||||
|
const float max_y = std::max(std::max(ls.y, rs.y), std::max(lh.y, rh.y));
|
||||||
|
const float horizontal_span = std::fabs(max_x - min_x);
|
||||||
|
const float vertical_span = std::fabs(max_y - min_y);
|
||||||
|
sample.upright_ratio = vertical_span > 1e-3f ? (horizontal_span / vertical_span) : std::numeric_limits<float>::infinity();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IsValidKeypoint(pose, 9)) {
|
||||||
|
sample.left_wrist_x = pose.keypoints[9].point.x;
|
||||||
|
sample.left_wrist_y = pose.keypoints[9].point.y;
|
||||||
|
sample.has_left_wrist = true;
|
||||||
|
}
|
||||||
|
if (IsValidKeypoint(pose, 10)) {
|
||||||
|
sample.right_wrist_x = pose.keypoints[10].point.x;
|
||||||
|
sample.right_wrist_y = pose.keypoints[10].point.y;
|
||||||
|
sample.has_right_wrist = true;
|
||||||
|
}
|
||||||
|
return sample;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PoseTrackSample MatchPoseToDetection(const Rect& bbox, const std::shared_ptr<PoseResult>& pose_result) {
|
||||||
|
if (!pose_result) return {};
|
||||||
|
|
||||||
|
float best_iou = 0.0f;
|
||||||
|
const PoseItem* best_pose = nullptr;
|
||||||
|
for (const auto& pose : pose_result->items) {
|
||||||
|
const float iou = BboxIoU(bbox, pose.bbox);
|
||||||
|
if (iou > best_iou) {
|
||||||
|
best_iou = iou;
|
||||||
|
best_pose = &pose;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!best_pose || best_iou <= 0.0f) return {};
|
||||||
|
return BuildPoseTrackSample(*best_pose);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PoseTrackSample MatchPoseToTrackId(int track_id, const Rect& bbox, const std::shared_ptr<PoseResult>& pose_result) {
|
||||||
|
if (!pose_result) return {};
|
||||||
|
|
||||||
|
const PoseItem* best_pose = nullptr;
|
||||||
|
float best_iou = 0.0f;
|
||||||
|
for (const auto& pose : pose_result->items) {
|
||||||
|
if (pose.track_id != track_id) continue;
|
||||||
|
const float iou = BboxIoU(bbox, pose.bbox);
|
||||||
|
if (iou >= best_iou) {
|
||||||
|
best_iou = iou;
|
||||||
|
best_pose = &pose;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (best_pose) return BuildPoseTrackSample(*best_pose);
|
||||||
|
return MatchPoseToDetection(bbox, pose_result);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float WristMotion(const PoseTrackSample& newer, const PoseTrackSample& older) {
|
||||||
|
float motion = 0.0f;
|
||||||
|
if (newer.has_left_wrist && older.has_left_wrist) {
|
||||||
|
const float dx = newer.left_wrist_x - older.left_wrist_x;
|
||||||
|
const float dy = newer.left_wrist_y - older.left_wrist_y;
|
||||||
|
motion += std::sqrt(dx * dx + dy * dy);
|
||||||
|
}
|
||||||
|
if (newer.has_right_wrist && older.has_right_wrist) {
|
||||||
|
const float dx = newer.right_wrist_x - older.right_wrist_x;
|
||||||
|
const float dy = newer.right_wrist_y - older.right_wrist_y;
|
||||||
|
motion += std::sqrt(dx * dx + dy * dy);
|
||||||
|
}
|
||||||
|
return motion;
|
||||||
|
}
|
||||||
|
|
||||||
|
static float MinimumWristDistance(const PoseTrackSample& lhs, const PoseTrackSample& rhs) {
|
||||||
|
float best = std::numeric_limits<float>::infinity();
|
||||||
|
auto update = [&](bool has_a, float ax, float ay, bool has_b, float bx, float by) {
|
||||||
|
if (!has_a || !has_b) return;
|
||||||
|
const float dx = ax - bx;
|
||||||
|
const float dy = ay - by;
|
||||||
|
best = std::min(best, std::sqrt(dx * dx + dy * dy));
|
||||||
|
};
|
||||||
|
update(lhs.has_left_wrist, lhs.left_wrist_x, lhs.left_wrist_y,
|
||||||
|
rhs.has_left_wrist, rhs.left_wrist_x, rhs.left_wrist_y);
|
||||||
|
update(lhs.has_left_wrist, lhs.left_wrist_x, lhs.left_wrist_y,
|
||||||
|
rhs.has_right_wrist, rhs.right_wrist_x, rhs.right_wrist_y);
|
||||||
|
update(lhs.has_right_wrist, lhs.right_wrist_x, lhs.right_wrist_y,
|
||||||
|
rhs.has_left_wrist, rhs.left_wrist_x, rhs.left_wrist_y);
|
||||||
|
update(lhs.has_right_wrist, lhs.right_wrist_x, lhs.right_wrist_y,
|
||||||
|
rhs.has_right_wrist, rhs.right_wrist_x, rhs.right_wrist_y);
|
||||||
|
return std::isfinite(best) ? best : std::numeric_limits<float>::infinity();
|
||||||
|
}
|
||||||
|
|
||||||
static BehaviorEventType ToBehaviorEventType(ActionEventKind kind) {
|
static BehaviorEventType ToBehaviorEventType(ActionEventKind kind) {
|
||||||
return kind == ActionEventKind::Fall ? BehaviorEventType::Fall : BehaviorEventType::Fight;
|
return kind == ActionEventKind::Fall ? BehaviorEventType::Fall : BehaviorEventType::Fight;
|
||||||
}
|
}
|
||||||
@ -152,7 +391,7 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
|
|||||||
if (det.track_id < 0) continue;
|
if (det.track_id < 0) continue;
|
||||||
current_tracks[det.track_id] = det.bbox;
|
current_tracks[det.track_id] = det.bbox;
|
||||||
auto& history = impl_->history[det.track_id];
|
auto& history = impl_->history[det.track_id];
|
||||||
history.push_back(TrackSample{frame->pts, det.bbox});
|
history.push_back(TrackSample{frame->pts, det.bbox, MatchPoseToTrackId(det.track_id, det.bbox, frame->pose)});
|
||||||
while (!history.empty() && frame->pts > history.front().pts &&
|
while (!history.empty() && frame->pts > history.front().pts &&
|
||||||
(frame->pts - history.front().pts) > impl_->rules.front().window_ms) {
|
(frame->pts - history.front().pts) > impl_->rules.front().window_ms) {
|
||||||
history.pop_front();
|
history.pop_front();
|
||||||
@ -168,8 +407,18 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
|
|||||||
const float drop = CenterY(bbox) - CenterY(first.bbox);
|
const float drop = CenterY(bbox) - CenterY(first.bbox);
|
||||||
const float aspect_delta = AspectRatio(bbox) - AspectRatio(first.bbox);
|
const float aspect_delta = AspectRatio(bbox) - AspectRatio(first.bbox);
|
||||||
const uint64_t duration = frame->pts >= first.pts ? (frame->pts - first.pts) : 0;
|
const uint64_t duration = frame->pts >= first.pts ? (frame->pts - first.pts) : 0;
|
||||||
if (drop < rule.min_drop_pixels) continue;
|
const bool bbox_enabled = rule.fall_bbox.enabled;
|
||||||
if (aspect_delta < rule.min_aspect_ratio_delta) continue;
|
const bool pose_enabled = rule.fall_pose.enabled;
|
||||||
|
const bool bbox_trigger = drop >= rule.fall_bbox.min_drop_pixels &&
|
||||||
|
aspect_delta >= rule.fall_bbox.min_aspect_ratio_delta;
|
||||||
|
bool pose_trigger = false;
|
||||||
|
if (pose_enabled &&
|
||||||
|
first.pose.matched && it->second.back().pose.matched) {
|
||||||
|
const float torso_drop = it->second.back().pose.torso_center_y - first.pose.torso_center_y;
|
||||||
|
pose_trigger = torso_drop >= rule.fall_pose.min_torso_drop_pixels &&
|
||||||
|
it->second.back().pose.upright_ratio >= rule.fall_pose.max_upright_ratio;
|
||||||
|
}
|
||||||
|
if (!EvaluateSignalMode(rule.signal_mode, bbox_trigger, pose_trigger, bbox_enabled, pose_enabled)) continue;
|
||||||
if (duration < rule.activate_duration_ms) continue;
|
if (duration < rule.activate_duration_ms) continue;
|
||||||
|
|
||||||
BehaviorEventItem event;
|
BehaviorEventItem event;
|
||||||
@ -189,7 +438,7 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
|
|||||||
for (auto left = current_tracks.begin(); left != current_tracks.end(); ++left) {
|
for (auto left = current_tracks.begin(); left != current_tracks.end(); ++left) {
|
||||||
for (auto right = std::next(left); right != current_tracks.end(); ++right) {
|
for (auto right = std::next(left); right != current_tracks.end(); ++right) {
|
||||||
const float proximity = CenterDistance(left->second, right->second);
|
const float proximity = CenterDistance(left->second, right->second);
|
||||||
if (proximity > rule.proximity_pixels) continue;
|
if (proximity > rule.fight_bbox.proximity_pixels) continue;
|
||||||
|
|
||||||
const auto it_left = impl_->history.find(left->first);
|
const auto it_left = impl_->history.find(left->first);
|
||||||
const auto it_right = impl_->history.find(right->first);
|
const auto it_right = impl_->history.find(right->first);
|
||||||
@ -203,7 +452,22 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
|
|||||||
const uint64_t duration = std::min(duration_left, duration_right);
|
const uint64_t duration = std::min(duration_left, duration_right);
|
||||||
|
|
||||||
const float combined_motion = left_motion + right_motion;
|
const float combined_motion = left_motion + right_motion;
|
||||||
if (combined_motion < rule.min_motion_pixels) continue;
|
const bool bbox_enabled = rule.fight_bbox.enabled;
|
||||||
|
const bool pose_enabled = rule.fight_pose.enabled;
|
||||||
|
const bool bbox_trigger = combined_motion >= rule.fight_bbox.min_motion_pixels;
|
||||||
|
bool pose_trigger = false;
|
||||||
|
const auto& left_pose_now = it_left->second.back().pose;
|
||||||
|
const auto& right_pose_now = it_right->second.back().pose;
|
||||||
|
if (pose_enabled &&
|
||||||
|
it_left->second.front().pose.matched && it_right->second.front().pose.matched &&
|
||||||
|
left_pose_now.matched && right_pose_now.matched) {
|
||||||
|
const float pose_motion = WristMotion(left_pose_now, it_left->second.front().pose) +
|
||||||
|
WristMotion(right_pose_now, it_right->second.front().pose);
|
||||||
|
const float wrist_distance = MinimumWristDistance(left_pose_now, right_pose_now);
|
||||||
|
pose_trigger = pose_motion >= rule.fight_pose.min_wrist_motion_pixels &&
|
||||||
|
wrist_distance <= rule.fight_pose.max_wrist_distance_pixels;
|
||||||
|
}
|
||||||
|
if (!EvaluateSignalMode(rule.signal_mode, bbox_trigger, pose_trigger, bbox_enabled, pose_enabled)) continue;
|
||||||
if (duration < rule.activate_duration_ms) continue;
|
if (duration < rule.activate_duration_ms) continue;
|
||||||
|
|
||||||
const auto pair_key = std::make_pair(left->first, right->first);
|
const auto pair_key = std::make_pair(left->first, right->first);
|
||||||
|
|||||||
466
plugins/ai_pose/ai_pose_node.cpp
Normal file
466
plugins/ai_pose/ai_pose_node.cpp
Normal file
@ -0,0 +1,466 @@
|
|||||||
|
#include "ai_pose_node.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
#include <limits>
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "face/face_detection_utils.h"
|
||||||
|
#include "hw/i_infer_backend.h"
|
||||||
|
#include "pose/pose_result.h"
|
||||||
|
#include "utils/logger.h"
|
||||||
|
|
||||||
|
namespace rk3588 {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using face_detection::HalfToFloat;
|
||||||
|
using face_detection::ResizeRgbBilinear;
|
||||||
|
|
||||||
|
constexpr int kPoseOutputCount = 4;
|
||||||
|
constexpr int kPoseLocChannels = 64;
|
||||||
|
constexpr int kPoseScoreChannel = 64;
|
||||||
|
constexpr int kDflBins = 16;
|
||||||
|
constexpr int kMaxPoseDetections = 32;
|
||||||
|
constexpr uint8_t kLetterboxValue = 114;
|
||||||
|
|
||||||
|
struct PoseConfig {
|
||||||
|
bool enabled = true;
|
||||||
|
std::string model_path;
|
||||||
|
int model_input_w = 640;
|
||||||
|
int model_input_h = 640;
|
||||||
|
int expected_keypoints = 17;
|
||||||
|
float conf_thresh = 0.25f;
|
||||||
|
float nms_thresh = 0.45f;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct LetterboxInfo {
|
||||||
|
float scale = 1.0f;
|
||||||
|
float pad_x = 0.0f;
|
||||||
|
float pad_y = 0.0f;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PoseCandidate {
|
||||||
|
Rect bbox{};
|
||||||
|
float score = 0.0f;
|
||||||
|
int keypoint_index = -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool ParseConfig(const SimpleJson& config, PoseConfig& out, std::string& err) {
|
||||||
|
out.enabled = config.ValueOr<bool>("enabled", true);
|
||||||
|
out.model_path = config.ValueOr<std::string>("model_path", "");
|
||||||
|
out.model_input_w = config.ValueOr<int>("model_input_w", 640);
|
||||||
|
out.model_input_h = config.ValueOr<int>("model_input_h", 640);
|
||||||
|
out.expected_keypoints = config.ValueOr<int>("expected_keypoints", 17);
|
||||||
|
out.conf_thresh = config.ValueOr<float>("conf_thresh", 0.25f);
|
||||||
|
out.nms_thresh = config.ValueOr<float>("nms_thresh", 0.45f);
|
||||||
|
|
||||||
|
if (out.model_input_w <= 0 || out.model_input_h <= 0) {
|
||||||
|
err = "model_input_w and model_input_h must be positive";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (out.expected_keypoints <= 0) {
|
||||||
|
err = "expected_keypoints must be positive";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (out.conf_thresh <= 0.0f || out.conf_thresh >= 1.0f) {
|
||||||
|
err = "conf_thresh must be in (0, 1)";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (out.nms_thresh <= 0.0f || out.nms_thresh >= 1.0f) {
|
||||||
|
err = "nms_thresh must be in (0, 1)";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (out.enabled && out.model_path.empty()) {
|
||||||
|
err = "model_path is required when enabled=true";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PushToDownstream(const std::vector<std::shared_ptr<SpscQueue<FramePtr>>>& output_queues, const FramePtr& frame) {
|
||||||
|
for (const auto& q : output_queues) {
|
||||||
|
if (q) q->Push(frame);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline float Sigmoid(float x) {
|
||||||
|
return 1.0f / (1.0f + std::exp(-x));
|
||||||
|
}
|
||||||
|
|
||||||
|
float Unsigmoid(float y) {
|
||||||
|
return -std::log((1.0f / y) - 1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
float ClampFloat(float v, float lo, float hi) {
|
||||||
|
return std::max(lo, std::min(v, hi));
|
||||||
|
}
|
||||||
|
|
||||||
|
float CalculateIoU(const Rect& lhs, const Rect& rhs) {
|
||||||
|
const float x1 = std::max(lhs.x, rhs.x);
|
||||||
|
const float y1 = std::max(lhs.y, rhs.y);
|
||||||
|
const float x2 = std::min(lhs.x + lhs.w, rhs.x + rhs.w);
|
||||||
|
const float y2 = std::min(lhs.y + lhs.h, rhs.y + rhs.h);
|
||||||
|
const float inter_w = std::max(0.0f, x2 - x1);
|
||||||
|
const float inter_h = std::max(0.0f, y2 - y1);
|
||||||
|
const float inter = inter_w * inter_h;
|
||||||
|
const float union_area = lhs.w * lhs.h + rhs.w * rhs.h - inter;
|
||||||
|
return union_area <= 0.0f ? 0.0f : (inter / union_area);
|
||||||
|
}
|
||||||
|
|
||||||
|
float ReadTensorValue(const InferOutput& output, size_t index) {
|
||||||
|
#if defined(RK3588_ENABLE_RKNN)
|
||||||
|
switch (output.type) {
|
||||||
|
case RKNN_TENSOR_FLOAT32: {
|
||||||
|
const auto* p = reinterpret_cast<const float*>(output.data.data());
|
||||||
|
return p[index];
|
||||||
|
}
|
||||||
|
case RKNN_TENSOR_FLOAT16: {
|
||||||
|
const auto* p = reinterpret_cast<const uint16_t*>(output.data.data());
|
||||||
|
return HalfToFloat(p[index]);
|
||||||
|
}
|
||||||
|
case RKNN_TENSOR_INT8: {
|
||||||
|
const auto* p = reinterpret_cast<const int8_t*>(output.data.data());
|
||||||
|
return (static_cast<float>(p[index]) - static_cast<float>(output.zp)) * output.scale;
|
||||||
|
}
|
||||||
|
case RKNN_TENSOR_UINT8: {
|
||||||
|
const auto* p = reinterpret_cast<const uint8_t*>(output.data.data());
|
||||||
|
return (static_cast<float>(p[index]) - static_cast<float>(output.zp)) * output.scale;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
const auto* p = reinterpret_cast<const float*>(output.data.data());
|
||||||
|
return p[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BuildLetterboxedRgbInput(const Frame& frame, int dst_w, int dst_h,
|
||||||
|
std::vector<uint8_t>& out, LetterboxInfo& letterbox) {
|
||||||
|
if (frame.width <= 0 || frame.height <= 0 || dst_w <= 0 || dst_h <= 0) return false;
|
||||||
|
if (frame.format != PixelFormat::RGB && frame.format != PixelFormat::BGR) return false;
|
||||||
|
|
||||||
|
const uint8_t* src = frame.planes[0].data ? frame.planes[0].data : frame.data;
|
||||||
|
if (!src) return false;
|
||||||
|
const int src_stride = frame.planes[0].stride > 0 ? frame.planes[0].stride : frame.width * 3;
|
||||||
|
|
||||||
|
out.assign(static_cast<size_t>(dst_w) * static_cast<size_t>(dst_h) * 3, kLetterboxValue);
|
||||||
|
|
||||||
|
const float scale = std::min(static_cast<float>(dst_w) / static_cast<float>(frame.width),
|
||||||
|
static_cast<float>(dst_h) / static_cast<float>(frame.height));
|
||||||
|
const int resized_w = std::max(1, static_cast<int>(std::round(frame.width * scale)));
|
||||||
|
const int resized_h = std::max(1, static_cast<int>(std::round(frame.height * scale)));
|
||||||
|
const int pad_x = (dst_w - resized_w) / 2;
|
||||||
|
const int pad_y = (dst_h - resized_h) / 2;
|
||||||
|
|
||||||
|
std::vector<uint8_t> resized(static_cast<size_t>(resized_w) * static_cast<size_t>(resized_h) * 3);
|
||||||
|
const bool swap_rb = (frame.format == PixelFormat::BGR);
|
||||||
|
ResizeRgbBilinear(src, frame.width, frame.height, src_stride, resized.data(), resized_w, resized_h, swap_rb);
|
||||||
|
|
||||||
|
for (int y = 0; y < resized_h; ++y) {
|
||||||
|
uint8_t* dst_row = out.data() + (static_cast<size_t>(y + pad_y) * static_cast<size_t>(dst_w) + pad_x) * 3;
|
||||||
|
const uint8_t* src_row = resized.data() + static_cast<size_t>(y) * static_cast<size_t>(resized_w) * 3;
|
||||||
|
std::memcpy(dst_row, src_row, static_cast<size_t>(resized_w) * 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
letterbox.scale = scale;
|
||||||
|
letterbox.pad_x = static_cast<float>(pad_x);
|
||||||
|
letterbox.pad_y = static_cast<float>(pad_y);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
float DecodeDfl(const InferOutput& output, int cell_index, int cells_per_head, int channel_group) {
|
||||||
|
float max_value = -std::numeric_limits<float>::infinity();
|
||||||
|
float logits[kDflBins]{};
|
||||||
|
for (int i = 0; i < kDflBins; ++i) {
|
||||||
|
const size_t offset = static_cast<size_t>((channel_group * kDflBins + i) * cells_per_head + cell_index);
|
||||||
|
logits[i] = ReadTensorValue(output, offset);
|
||||||
|
max_value = std::max(max_value, logits[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
float denom = 0.0f;
|
||||||
|
float numer = 0.0f;
|
||||||
|
for (int i = 0; i < kDflBins; ++i) {
|
||||||
|
const float e = std::exp(logits[i] - max_value);
|
||||||
|
denom += e;
|
||||||
|
numer += e * static_cast<float>(i);
|
||||||
|
}
|
||||||
|
return denom > 0.0f ? (numer / denom) : 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<PoseCandidate> DecodePoseCandidates(const std::vector<InferOutput>& outputs,
|
||||||
|
const PoseConfig& config) {
|
||||||
|
std::vector<PoseCandidate> candidates;
|
||||||
|
if (outputs.size() < kPoseOutputCount) return candidates;
|
||||||
|
|
||||||
|
const float score_threshold = Unsigmoid(config.conf_thresh);
|
||||||
|
int keypoint_base_index = 0;
|
||||||
|
|
||||||
|
for (int head = 0; head < 3; ++head) {
|
||||||
|
const int stride = (head == 0) ? 8 : (head == 1 ? 16 : 32);
|
||||||
|
const int grid_h = config.model_input_h / stride;
|
||||||
|
const int grid_w = config.model_input_w / stride;
|
||||||
|
const int cells = grid_h * grid_w;
|
||||||
|
if (cells <= 0) continue;
|
||||||
|
|
||||||
|
const InferOutput& output = outputs[static_cast<size_t>(head)];
|
||||||
|
const size_t elem_count = sizeof(float) > 0 ? (output.data.size() / sizeof(float)) : 0;
|
||||||
|
#if defined(RK3588_ENABLE_RKNN)
|
||||||
|
size_t bytes_per_elem = 1;
|
||||||
|
switch (output.type) {
|
||||||
|
case RKNN_TENSOR_FLOAT32: bytes_per_elem = sizeof(float); break;
|
||||||
|
case RKNN_TENSOR_FLOAT16: bytes_per_elem = sizeof(uint16_t); break;
|
||||||
|
default: bytes_per_elem = 1; break;
|
||||||
|
}
|
||||||
|
const size_t actual_elem_count = bytes_per_elem > 0 ? (output.data.size() / bytes_per_elem) : 0;
|
||||||
|
#else
|
||||||
|
const size_t actual_elem_count = elem_count;
|
||||||
|
#endif
|
||||||
|
if (actual_elem_count < static_cast<size_t>(65 * cells)) {
|
||||||
|
keypoint_base_index += cells;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int cell = 0; cell < cells; ++cell) {
|
||||||
|
const size_t score_offset = static_cast<size_t>(kPoseScoreChannel * cells + cell);
|
||||||
|
const float score_logit = ReadTensorValue(output, score_offset);
|
||||||
|
if (score_logit < score_threshold) continue;
|
||||||
|
|
||||||
|
const float left = DecodeDfl(output, cell, cells, 0);
|
||||||
|
const float top = DecodeDfl(output, cell, cells, 1);
|
||||||
|
const float right = DecodeDfl(output, cell, cells, 2);
|
||||||
|
const float bottom = DecodeDfl(output, cell, cells, 3);
|
||||||
|
|
||||||
|
const int grid_x = cell % grid_w;
|
||||||
|
const int grid_y = cell / grid_w;
|
||||||
|
const float center_x = (static_cast<float>(grid_x) + 0.5f) * stride;
|
||||||
|
const float center_y = (static_cast<float>(grid_y) + 0.5f) * stride;
|
||||||
|
const float x1 = center_x - left * stride;
|
||||||
|
const float y1 = center_y - top * stride;
|
||||||
|
const float x2 = center_x + right * stride;
|
||||||
|
const float y2 = center_y + bottom * stride;
|
||||||
|
|
||||||
|
PoseCandidate candidate;
|
||||||
|
candidate.bbox.x = x1;
|
||||||
|
candidate.bbox.y = y1;
|
||||||
|
candidate.bbox.w = std::max(0.0f, x2 - x1);
|
||||||
|
candidate.bbox.h = std::max(0.0f, y2 - y1);
|
||||||
|
candidate.score = Sigmoid(score_logit);
|
||||||
|
candidate.keypoint_index = keypoint_base_index + cell;
|
||||||
|
candidates.push_back(candidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
keypoint_base_index += cells;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(candidates.begin(), candidates.end(), [](const PoseCandidate& lhs, const PoseCandidate& rhs) {
|
||||||
|
return lhs.score > rhs.score;
|
||||||
|
});
|
||||||
|
|
||||||
|
std::vector<PoseCandidate> kept;
|
||||||
|
kept.reserve(candidates.size());
|
||||||
|
for (const auto& candidate : candidates) {
|
||||||
|
bool suppressed = false;
|
||||||
|
for (const auto& accepted : kept) {
|
||||||
|
if (CalculateIoU(candidate.bbox, accepted.bbox) > config.nms_thresh) {
|
||||||
|
suppressed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!suppressed) {
|
||||||
|
kept.push_back(candidate);
|
||||||
|
if (static_cast<int>(kept.size()) >= kMaxPoseDetections) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return kept;
|
||||||
|
}
|
||||||
|
|
||||||
|
float MapToSourceCoord(float value, float pad, float scale, int limit) {
|
||||||
|
if (scale <= 0.0f) return 0.0f;
|
||||||
|
return ClampFloat((value - pad) / scale, 0.0f, static_cast<float>(std::max(0, limit)));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<PoseResult> BuildPoseResult(const std::vector<InferOutput>& outputs,
|
||||||
|
const PoseConfig& config,
|
||||||
|
int frame_w, int frame_h,
|
||||||
|
const LetterboxInfo& letterbox) {
|
||||||
|
if (outputs.size() < kPoseOutputCount) return nullptr;
|
||||||
|
|
||||||
|
const InferOutput& keypoint_output = outputs[3];
|
||||||
|
const int total_points = (config.model_input_w / 8) * (config.model_input_h / 8) +
|
||||||
|
(config.model_input_w / 16) * (config.model_input_h / 16) +
|
||||||
|
(config.model_input_w / 32) * (config.model_input_h / 32);
|
||||||
|
const int values_per_point = config.expected_keypoints * 3;
|
||||||
|
#if defined(RK3588_ENABLE_RKNN)
|
||||||
|
size_t kp_bytes_per_elem = 1;
|
||||||
|
switch (keypoint_output.type) {
|
||||||
|
case RKNN_TENSOR_FLOAT32: kp_bytes_per_elem = sizeof(float); break;
|
||||||
|
case RKNN_TENSOR_FLOAT16: kp_bytes_per_elem = sizeof(uint16_t); break;
|
||||||
|
default: kp_bytes_per_elem = 1; break;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
const size_t kp_bytes_per_elem = sizeof(float);
|
||||||
|
#endif
|
||||||
|
const size_t kp_elem_count = kp_bytes_per_elem > 0 ? (keypoint_output.data.size() / kp_bytes_per_elem) : 0;
|
||||||
|
if (kp_elem_count < static_cast<size_t>(values_per_point * total_points)) return nullptr;
|
||||||
|
|
||||||
|
auto result = std::make_shared<PoseResult>();
|
||||||
|
result->img_w = frame_w;
|
||||||
|
result->img_h = frame_h;
|
||||||
|
result->model_name = "yolov8n-pose";
|
||||||
|
|
||||||
|
const std::vector<PoseCandidate> candidates = DecodePoseCandidates(outputs, config);
|
||||||
|
result->items.reserve(candidates.size());
|
||||||
|
for (const auto& candidate : candidates) {
|
||||||
|
if (candidate.keypoint_index < 0 || candidate.keypoint_index >= total_points) continue;
|
||||||
|
|
||||||
|
PoseItem item;
|
||||||
|
item.score = candidate.score;
|
||||||
|
item.track_id = -1;
|
||||||
|
item.bbox.x = MapToSourceCoord(candidate.bbox.x, letterbox.pad_x, letterbox.scale, frame_w);
|
||||||
|
item.bbox.y = MapToSourceCoord(candidate.bbox.y, letterbox.pad_y, letterbox.scale, frame_h);
|
||||||
|
const float x2 = MapToSourceCoord(candidate.bbox.x + candidate.bbox.w, letterbox.pad_x, letterbox.scale, frame_w);
|
||||||
|
const float y2 = MapToSourceCoord(candidate.bbox.y + candidate.bbox.h, letterbox.pad_y, letterbox.scale, frame_h);
|
||||||
|
item.bbox.w = std::max(0.0f, x2 - item.bbox.x);
|
||||||
|
item.bbox.h = std::max(0.0f, y2 - item.bbox.y);
|
||||||
|
|
||||||
|
item.keypoints.reserve(static_cast<size_t>(config.expected_keypoints));
|
||||||
|
for (int k = 0; k < config.expected_keypoints; ++k) {
|
||||||
|
const size_t x_offset = static_cast<size_t>(k * 3 * total_points + candidate.keypoint_index);
|
||||||
|
const size_t y_offset = static_cast<size_t>(k * 3 * total_points + total_points + candidate.keypoint_index);
|
||||||
|
const size_t s_offset = static_cast<size_t>(k * 3 * total_points + 2 * total_points + candidate.keypoint_index);
|
||||||
|
|
||||||
|
PoseKeypoint kp;
|
||||||
|
kp.point.x = MapToSourceCoord(ReadTensorValue(keypoint_output, x_offset), letterbox.pad_x, letterbox.scale, frame_w);
|
||||||
|
kp.point.y = MapToSourceCoord(ReadTensorValue(keypoint_output, y_offset), letterbox.pad_y, letterbox.scale, frame_h);
|
||||||
|
kp.score = ReadTensorValue(keypoint_output, s_offset);
|
||||||
|
item.keypoints.push_back(kp);
|
||||||
|
}
|
||||||
|
result->items.push_back(std::move(item));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
struct AiPoseNode::Impl {
|
||||||
|
PoseConfig config;
|
||||||
|
std::string init_err;
|
||||||
|
std::shared_ptr<IInferBackend> infer_backend;
|
||||||
|
ModelHandle model_handle = kInvalidModelHandle;
|
||||||
|
bool started = false;
|
||||||
|
std::vector<uint8_t> input_rgb;
|
||||||
|
};
|
||||||
|
|
||||||
|
AiPoseNode::AiPoseNode() : impl_(std::make_unique<Impl>()) {}
|
||||||
|
|
||||||
|
AiPoseNode::~AiPoseNode() = default;
|
||||||
|
|
||||||
|
std::string AiPoseNode::Id() const {
|
||||||
|
return id_;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string AiPoseNode::Type() const {
|
||||||
|
return "ai_pose";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AiPoseNode::Init(const SimpleJson& config, const NodeContext& ctx) {
|
||||||
|
id_ = config.ValueOr<std::string>("id", "ai_pose");
|
||||||
|
if (!ParseConfig(config, impl_->config, impl_->init_err)) {
|
||||||
|
LogError("[ai_pose] invalid config: " + impl_->init_err);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
impl_->infer_backend = ctx.infer_backend;
|
||||||
|
output_queues_ = ctx.output_queues;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AiPoseNode::Start() {
|
||||||
|
if (!impl_->config.enabled) {
|
||||||
|
impl_->started = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!impl_->infer_backend) {
|
||||||
|
LogError("[ai_pose] infer_backend is required when enabled=true");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string err;
|
||||||
|
impl_->model_handle = impl_->infer_backend->LoadModel(impl_->config.model_path, err);
|
||||||
|
if (impl_->model_handle == kInvalidModelHandle) {
|
||||||
|
LogError("[ai_pose] failed to load model: " + impl_->config.model_path + " err=" + err);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_->started = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void AiPoseNode::Stop() {
|
||||||
|
if (impl_->infer_backend && impl_->model_handle != kInvalidModelHandle) {
|
||||||
|
impl_->infer_backend->UnloadModel(impl_->model_handle);
|
||||||
|
impl_->model_handle = kInvalidModelHandle;
|
||||||
|
}
|
||||||
|
impl_->started = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
NodeStatus AiPoseNode::Process(FramePtr frame) {
|
||||||
|
if (!frame) return NodeStatus::DROP;
|
||||||
|
|
||||||
|
if (!impl_->config.enabled) {
|
||||||
|
PushToDownstream(output_queues_, frame);
|
||||||
|
return NodeStatus::OK;
|
||||||
|
}
|
||||||
|
if (!impl_->started || !impl_->infer_backend || impl_->model_handle == kInvalidModelHandle) {
|
||||||
|
LogWarn("[ai_pose] process called before node is started");
|
||||||
|
PushToDownstream(output_queues_, frame);
|
||||||
|
return NodeStatus::ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
LetterboxInfo letterbox;
|
||||||
|
if (!BuildLetterboxedRgbInput(*frame, impl_->config.model_input_w, impl_->config.model_input_h,
|
||||||
|
impl_->input_rgb, letterbox)) {
|
||||||
|
LogWarn("[ai_pose] only RGB/BGR frames are supported currently");
|
||||||
|
PushToDownstream(output_queues_, frame);
|
||||||
|
return NodeStatus::OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
InferInput input;
|
||||||
|
input.data = impl_->input_rgb.data();
|
||||||
|
input.size = impl_->input_rgb.size();
|
||||||
|
input.width = impl_->config.model_input_w;
|
||||||
|
input.height = impl_->config.model_input_h;
|
||||||
|
input.is_nhwc = true;
|
||||||
|
input.dma_fd = -1;
|
||||||
|
input.dma_offset = 0;
|
||||||
|
|
||||||
|
InferResult infer_result = impl_->infer_backend->Infer(impl_->model_handle, input);
|
||||||
|
if (!infer_result.success) {
|
||||||
|
LogWarn("[ai_pose] inference failed: " + infer_result.error);
|
||||||
|
PushToDownstream(output_queues_, frame);
|
||||||
|
return NodeStatus::OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
frame->pose = BuildPoseResult(infer_result.outputs, impl_->config, frame->width, frame->height, letterbox);
|
||||||
|
if (!frame->pose) {
|
||||||
|
frame->pose = std::make_shared<PoseResult>();
|
||||||
|
frame->pose->img_w = frame->width;
|
||||||
|
frame->pose->img_h = frame->height;
|
||||||
|
frame->pose->model_name = "yolov8n-pose";
|
||||||
|
}
|
||||||
|
|
||||||
|
PushToDownstream(output_queues_, frame);
|
||||||
|
return NodeStatus::OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef RK3588_TEST_BUILD
|
||||||
|
REGISTER_NODE(AiPoseNode, "ai_pose");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace rk3588
|
||||||
30
plugins/ai_pose/ai_pose_node.h
Normal file
30
plugins/ai_pose/ai_pose_node.h
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "node.h"
|
||||||
|
|
||||||
|
namespace rk3588 {
|
||||||
|
|
||||||
|
class AiPoseNode final : public INode {
|
||||||
|
public:
|
||||||
|
AiPoseNode();
|
||||||
|
~AiPoseNode() override;
|
||||||
|
|
||||||
|
std::string Id() const override;
|
||||||
|
std::string Type() const override;
|
||||||
|
bool Init(const SimpleJson& config, const NodeContext& ctx) override;
|
||||||
|
bool Start() override;
|
||||||
|
void Stop() override;
|
||||||
|
NodeStatus Process(FramePtr frame) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Impl;
|
||||||
|
std::unique_ptr<Impl> impl_;
|
||||||
|
std::string id_;
|
||||||
|
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rk3588
|
||||||
139
plugins/pose_assoc/pose_assoc_node.cpp
Normal file
139
plugins/pose_assoc/pose_assoc_node.cpp
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
#include "pose_assoc_node.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "pose/pose_result.h"
|
||||||
|
#include "utils/logger.h"
|
||||||
|
|
||||||
|
namespace rk3588 {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct PoseAssocConfig {
|
||||||
|
float min_iou = 0.1f;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MatchCandidate {
|
||||||
|
float iou = 0.0f;
|
||||||
|
int pose_index = -1;
|
||||||
|
int det_index = -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool ParseConfig(const SimpleJson& config, PoseAssocConfig& out, std::string& err) {
|
||||||
|
out.min_iou = config.ValueOr<float>("min_iou", 0.1f);
|
||||||
|
if (out.min_iou < 0.0f || out.min_iou > 1.0f) {
|
||||||
|
err = "min_iou must be in [0, 1]";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
float IoU(const Rect& lhs, const Rect& rhs) {
|
||||||
|
const float x1 = std::max(lhs.x, rhs.x);
|
||||||
|
const float y1 = std::max(lhs.y, rhs.y);
|
||||||
|
const float x2 = std::min(lhs.x + lhs.w, rhs.x + rhs.w);
|
||||||
|
const float y2 = std::min(lhs.y + lhs.h, rhs.y + rhs.h);
|
||||||
|
const float iw = std::max(0.0f, x2 - x1);
|
||||||
|
const float ih = std::max(0.0f, y2 - y1);
|
||||||
|
const float inter = iw * ih;
|
||||||
|
const float area_l = std::max(0.0f, lhs.w) * std::max(0.0f, lhs.h);
|
||||||
|
const float area_r = std::max(0.0f, rhs.w) * std::max(0.0f, rhs.h);
|
||||||
|
const float uni = area_l + area_r - inter;
|
||||||
|
return uni <= 0.0f ? 0.0f : (inter / uni);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PushToDownstream(const std::vector<std::shared_ptr<SpscQueue<FramePtr>>>& output_queues, const FramePtr& frame) {
|
||||||
|
for (const auto& q : output_queues) {
|
||||||
|
if (q) q->Push(frame);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
struct PoseAssocNode::Impl {
|
||||||
|
PoseAssocConfig config;
|
||||||
|
std::string init_err;
|
||||||
|
};
|
||||||
|
|
||||||
|
PoseAssocNode::PoseAssocNode() : impl_(std::make_unique<Impl>()) {}
|
||||||
|
|
||||||
|
PoseAssocNode::~PoseAssocNode() = default;
|
||||||
|
|
||||||
|
std::string PoseAssocNode::Id() const {
|
||||||
|
return id_;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string PoseAssocNode::Type() const {
|
||||||
|
return "pose_assoc";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PoseAssocNode::Init(const SimpleJson& config, const NodeContext& ctx) {
|
||||||
|
id_ = config.ValueOr<std::string>("id", "pose_assoc");
|
||||||
|
if (!ParseConfig(config, impl_->config, impl_->init_err)) {
|
||||||
|
LogError("[pose_assoc] invalid config: " + impl_->init_err);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
output_queues_ = ctx.output_queues;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PoseAssocNode::Start() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PoseAssocNode::Stop() {}
|
||||||
|
|
||||||
|
NodeStatus PoseAssocNode::Process(FramePtr frame) {
|
||||||
|
if (!frame) return NodeStatus::DROP;
|
||||||
|
|
||||||
|
if (frame->pose && frame->det) {
|
||||||
|
for (auto& pose : frame->pose->items) {
|
||||||
|
pose.track_id = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<MatchCandidate> candidates;
|
||||||
|
candidates.reserve(frame->pose->items.size() * frame->det->items.size());
|
||||||
|
for (size_t pi = 0; pi < frame->pose->items.size(); ++pi) {
|
||||||
|
const Rect& pose_bbox = frame->pose->items[pi].bbox;
|
||||||
|
for (size_t di = 0; di < frame->det->items.size(); ++di) {
|
||||||
|
const auto& det = frame->det->items[di];
|
||||||
|
if (det.track_id < 0) continue;
|
||||||
|
const float iou = IoU(pose_bbox, det.bbox);
|
||||||
|
if (iou >= impl_->config.min_iou) {
|
||||||
|
candidates.push_back(MatchCandidate{iou, static_cast<int>(pi), static_cast<int>(di)});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(candidates.begin(), candidates.end(), [](const MatchCandidate& lhs, const MatchCandidate& rhs) {
|
||||||
|
if (lhs.iou != rhs.iou) return lhs.iou > rhs.iou;
|
||||||
|
if (lhs.pose_index != rhs.pose_index) return lhs.pose_index < rhs.pose_index;
|
||||||
|
return lhs.det_index < rhs.det_index;
|
||||||
|
});
|
||||||
|
|
||||||
|
std::vector<bool> used_pose(frame->pose->items.size(), false);
|
||||||
|
std::vector<bool> used_det(frame->det->items.size(), false);
|
||||||
|
for (const auto& candidate : candidates) {
|
||||||
|
if (used_pose[static_cast<size_t>(candidate.pose_index)] ||
|
||||||
|
used_det[static_cast<size_t>(candidate.det_index)]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
frame->pose->items[static_cast<size_t>(candidate.pose_index)].track_id =
|
||||||
|
frame->det->items[static_cast<size_t>(candidate.det_index)].track_id;
|
||||||
|
used_pose[static_cast<size_t>(candidate.pose_index)] = true;
|
||||||
|
used_det[static_cast<size_t>(candidate.det_index)] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PushToDownstream(output_queues_, frame);
|
||||||
|
return NodeStatus::OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef RK3588_TEST_BUILD
|
||||||
|
REGISTER_NODE(PoseAssocNode, "pose_assoc");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace rk3588
|
||||||
30
plugins/pose_assoc/pose_assoc_node.h
Normal file
30
plugins/pose_assoc/pose_assoc_node.h
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "node.h"
|
||||||
|
|
||||||
|
namespace rk3588 {
|
||||||
|
|
||||||
|
class PoseAssocNode final : public INode {
|
||||||
|
public:
|
||||||
|
PoseAssocNode();
|
||||||
|
~PoseAssocNode() override;
|
||||||
|
|
||||||
|
std::string Id() const override;
|
||||||
|
std::string Type() const override;
|
||||||
|
bool Init(const SimpleJson& config, const NodeContext& ctx) override;
|
||||||
|
bool Start() override;
|
||||||
|
void Stop() override;
|
||||||
|
NodeStatus Process(FramePtr frame) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Impl;
|
||||||
|
std::unique_ptr<Impl> impl_;
|
||||||
|
std::string id_;
|
||||||
|
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rk3588
|
||||||
@ -40,6 +40,8 @@ add_executable(rk3588_gtests
|
|||||||
test_behavior_event_model.cpp
|
test_behavior_event_model.cpp
|
||||||
test_region_event.cpp
|
test_region_event.cpp
|
||||||
test_action_recog.cpp
|
test_action_recog.cpp
|
||||||
|
test_ai_pose.cpp
|
||||||
|
test_pose_assoc.cpp
|
||||||
test_event_fusion.cpp
|
test_event_fusion.cpp
|
||||||
test_alarm_behavior_events.cpp
|
test_alarm_behavior_events.cpp
|
||||||
test_infer_backend.cpp
|
test_infer_backend.cpp
|
||||||
@ -53,6 +55,8 @@ add_executable(rk3588_gtests
|
|||||||
${CMAKE_SOURCE_DIR}/src/utils/dma_alloc.cpp
|
${CMAKE_SOURCE_DIR}/src/utils/dma_alloc.cpp
|
||||||
${CMAKE_SOURCE_DIR}/plugins/region_event/region_event_node.cpp
|
${CMAKE_SOURCE_DIR}/plugins/region_event/region_event_node.cpp
|
||||||
${CMAKE_SOURCE_DIR}/plugins/action_recog/action_recog_node.cpp
|
${CMAKE_SOURCE_DIR}/plugins/action_recog/action_recog_node.cpp
|
||||||
|
${CMAKE_SOURCE_DIR}/plugins/ai_pose/ai_pose_node.cpp
|
||||||
|
${CMAKE_SOURCE_DIR}/plugins/pose_assoc/pose_assoc_node.cpp
|
||||||
${CMAKE_SOURCE_DIR}/plugins/event_fusion/event_fusion_node.cpp
|
${CMAKE_SOURCE_DIR}/plugins/event_fusion/event_fusion_node.cpp
|
||||||
${CMAKE_SOURCE_DIR}/plugins/alarm/rule_engine.cpp
|
${CMAKE_SOURCE_DIR}/plugins/alarm/rule_engine.cpp
|
||||||
)
|
)
|
||||||
|
|||||||
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
#include "frame/frame.h"
|
#include "frame/frame.h"
|
||||||
#include "node.h"
|
#include "node.h"
|
||||||
|
#include "pose/pose_result.h"
|
||||||
#include "utils/simple_json.h"
|
#include "utils/simple_json.h"
|
||||||
#include "../plugins/action_recog/action_recog_node.h"
|
#include "../plugins/action_recog/action_recog_node.h"
|
||||||
|
|
||||||
@ -124,5 +125,385 @@ TEST(ActionRecogTest, EmitsFightWhenTwoTracksStayCloseWithRepeatedMotion) {
|
|||||||
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[1], 32);
|
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[1], 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(ActionRecogTest, EmitsFallFromPoseSequenceWhenBboxShapeChangeIsSmall) {
|
||||||
|
ActionRecogNode node;
|
||||||
|
|
||||||
|
const std::string config_text = R"({
|
||||||
|
"id": "action_evt",
|
||||||
|
"events": [
|
||||||
|
{
|
||||||
|
"type": "fall",
|
||||||
|
"window_ms": 1500,
|
||||||
|
"min_drop_pixels": 120,
|
||||||
|
"min_aspect_ratio_delta": 10.0,
|
||||||
|
"activate_duration_ms": 0,
|
||||||
|
"pose_min_torso_drop_pixels": 120,
|
||||||
|
"pose_max_upright_ratio": 0.6
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})";
|
||||||
|
|
||||||
|
SimpleJson config = ParseActionConfig(config_text);
|
||||||
|
NodeContext ctx;
|
||||||
|
auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
|
||||||
|
ctx.output_queues.push_back(out);
|
||||||
|
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
ASSERT_TRUE(node.Start());
|
||||||
|
|
||||||
|
auto frame1 = std::make_shared<Frame>();
|
||||||
|
frame1->width = 1920;
|
||||||
|
frame1->height = 1080;
|
||||||
|
frame1->pts = 1000;
|
||||||
|
frame1->det = std::make_shared<DetectionResult>();
|
||||||
|
frame1->det->img_w = 1920;
|
||||||
|
frame1->det->img_h = 1080;
|
||||||
|
frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
|
||||||
|
frame1->pose = std::make_shared<PoseResult>();
|
||||||
|
frame1->pose->img_w = 1920;
|
||||||
|
frame1->pose->img_h = 1080;
|
||||||
|
PoseItem pose1;
|
||||||
|
pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
|
||||||
|
pose1.score = 0.9f;
|
||||||
|
pose1.keypoints.resize(17);
|
||||||
|
pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
|
||||||
|
pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
|
||||||
|
pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
|
||||||
|
pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
|
||||||
|
frame1->pose->items.push_back(pose1);
|
||||||
|
|
||||||
|
auto frame2 = std::make_shared<Frame>();
|
||||||
|
frame2->width = 1920;
|
||||||
|
frame2->height = 1080;
|
||||||
|
frame2->pts = 1600;
|
||||||
|
frame2->det = std::make_shared<DetectionResult>();
|
||||||
|
frame2->det->img_w = 1920;
|
||||||
|
frame2->det->img_h = 1080;
|
||||||
|
frame2->det->items.push_back(Detection{0, 0.93f, Rect{780.0f, 420.0f, 200.0f, 280.0f}, 17});
|
||||||
|
frame2->pose = std::make_shared<PoseResult>();
|
||||||
|
frame2->pose->img_w = 1920;
|
||||||
|
frame2->pose->img_h = 1080;
|
||||||
|
PoseItem pose2;
|
||||||
|
pose2.bbox = Rect{780.0f, 420.0f, 200.0f, 280.0f};
|
||||||
|
pose2.score = 0.91f;
|
||||||
|
pose2.keypoints.resize(17);
|
||||||
|
pose2.keypoints[5] = PoseKeypoint{PosePoint2f{790.0f, 520.0f}, 0.9f};
|
||||||
|
pose2.keypoints[6] = PoseKeypoint{PosePoint2f{970.0f, 520.0f}, 0.9f};
|
||||||
|
pose2.keypoints[11] = PoseKeypoint{PosePoint2f{820.0f, 590.0f}, 0.9f};
|
||||||
|
pose2.keypoints[12] = PoseKeypoint{PosePoint2f{940.0f, 590.0f}, 0.9f};
|
||||||
|
frame2->pose->items.push_back(pose2);
|
||||||
|
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
|
||||||
|
ASSERT_NE(frame2->behavior_events, nullptr);
|
||||||
|
ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
|
||||||
|
EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fall);
|
||||||
|
ASSERT_EQ(frame2->behavior_events->items[0].track_ids.size(), 1u);
|
||||||
|
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[0], 17);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ActionRecogTest, EmitsFightFromPoseMotionWhenBboxMotionIsSmall) {
|
||||||
|
ActionRecogNode node;
|
||||||
|
|
||||||
|
const std::string config_text = R"({
|
||||||
|
"id": "action_evt",
|
||||||
|
"events": [
|
||||||
|
{
|
||||||
|
"type": "fight",
|
||||||
|
"window_ms": 1200,
|
||||||
|
"proximity_pixels": 220,
|
||||||
|
"min_motion_pixels": 1000,
|
||||||
|
"activate_duration_ms": 0,
|
||||||
|
"pose_min_wrist_motion_pixels": 120,
|
||||||
|
"pose_max_wrist_distance_pixels": 120
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})";
|
||||||
|
|
||||||
|
SimpleJson config = ParseActionConfig(config_text);
|
||||||
|
NodeContext ctx;
|
||||||
|
auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
|
||||||
|
ctx.output_queues.push_back(out);
|
||||||
|
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
ASSERT_TRUE(node.Start());
|
||||||
|
|
||||||
|
auto frame1 = std::make_shared<Frame>();
|
||||||
|
frame1->width = 1920;
|
||||||
|
frame1->height = 1080;
|
||||||
|
frame1->pts = 1000;
|
||||||
|
frame1->det = std::make_shared<DetectionResult>();
|
||||||
|
frame1->det->img_w = 1920;
|
||||||
|
frame1->det->img_h = 1080;
|
||||||
|
frame1->det->items.push_back(Detection{0, 0.90f, Rect{700.0f, 320.0f, 120.0f, 300.0f}, 31});
|
||||||
|
frame1->det->items.push_back(Detection{0, 0.89f, Rect{860.0f, 320.0f, 120.0f, 300.0f}, 32});
|
||||||
|
frame1->pose = std::make_shared<PoseResult>();
|
||||||
|
frame1->pose->img_w = 1920;
|
||||||
|
frame1->pose->img_h = 1080;
|
||||||
|
PoseItem left1;
|
||||||
|
left1.bbox = Rect{700.0f, 320.0f, 120.0f, 300.0f};
|
||||||
|
left1.keypoints.resize(17);
|
||||||
|
left1.keypoints[9] = PoseKeypoint{PosePoint2f{780.0f, 430.0f}, 0.9f};
|
||||||
|
left1.keypoints[10] = PoseKeypoint{PosePoint2f{790.0f, 450.0f}, 0.9f};
|
||||||
|
PoseItem right1;
|
||||||
|
right1.bbox = Rect{860.0f, 320.0f, 120.0f, 300.0f};
|
||||||
|
right1.keypoints.resize(17);
|
||||||
|
right1.keypoints[9] = PoseKeypoint{PosePoint2f{900.0f, 430.0f}, 0.9f};
|
||||||
|
right1.keypoints[10] = PoseKeypoint{PosePoint2f{910.0f, 450.0f}, 0.9f};
|
||||||
|
frame1->pose->items.push_back(left1);
|
||||||
|
frame1->pose->items.push_back(right1);
|
||||||
|
|
||||||
|
auto frame2 = std::make_shared<Frame>();
|
||||||
|
frame2->width = 1920;
|
||||||
|
frame2->height = 1080;
|
||||||
|
frame2->pts = 1300;
|
||||||
|
frame2->det = std::make_shared<DetectionResult>();
|
||||||
|
frame2->det->img_w = 1920;
|
||||||
|
frame2->det->img_h = 1080;
|
||||||
|
frame2->det->items.push_back(Detection{0, 0.91f, Rect{710.0f, 325.0f, 120.0f, 300.0f}, 31});
|
||||||
|
frame2->det->items.push_back(Detection{0, 0.92f, Rect{850.0f, 315.0f, 120.0f, 300.0f}, 32});
|
||||||
|
frame2->pose = std::make_shared<PoseResult>();
|
||||||
|
frame2->pose->img_w = 1920;
|
||||||
|
frame2->pose->img_h = 1080;
|
||||||
|
PoseItem left2;
|
||||||
|
left2.bbox = Rect{710.0f, 325.0f, 120.0f, 300.0f};
|
||||||
|
left2.keypoints.resize(17);
|
||||||
|
left2.keypoints[9] = PoseKeypoint{PosePoint2f{860.0f, 430.0f}, 0.9f};
|
||||||
|
left2.keypoints[10] = PoseKeypoint{PosePoint2f{870.0f, 450.0f}, 0.9f};
|
||||||
|
PoseItem right2;
|
||||||
|
right2.bbox = Rect{850.0f, 315.0f, 120.0f, 300.0f};
|
||||||
|
right2.keypoints.resize(17);
|
||||||
|
right2.keypoints[9] = PoseKeypoint{PosePoint2f{840.0f, 430.0f}, 0.9f};
|
||||||
|
right2.keypoints[10] = PoseKeypoint{PosePoint2f{830.0f, 450.0f}, 0.9f};
|
||||||
|
frame2->pose->items.push_back(left2);
|
||||||
|
frame2->pose->items.push_back(right2);
|
||||||
|
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
|
||||||
|
ASSERT_NE(frame2->behavior_events, nullptr);
|
||||||
|
ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
|
||||||
|
EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fight);
|
||||||
|
ASSERT_EQ(frame2->behavior_events->items[0].track_ids.size(), 2u);
|
||||||
|
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[0], 31);
|
||||||
|
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[1], 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ActionRecogTest, SupportsStructuredFusionConfigForFall) {
|
||||||
|
ActionRecogNode node;
|
||||||
|
|
||||||
|
const std::string config_text = R"({
|
||||||
|
"id": "action_evt",
|
||||||
|
"events": [
|
||||||
|
{
|
||||||
|
"type": "fall",
|
||||||
|
"window_ms": 1500,
|
||||||
|
"activate_duration_ms": 0,
|
||||||
|
"bbox": {
|
||||||
|
"enabled": false
|
||||||
|
},
|
||||||
|
"pose": {
|
||||||
|
"enabled": true,
|
||||||
|
"min_torso_drop_pixels": 120,
|
||||||
|
"max_upright_ratio": 0.6
|
||||||
|
},
|
||||||
|
"fusion": {
|
||||||
|
"match_mode": "any"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})";
|
||||||
|
|
||||||
|
SimpleJson config = ParseActionConfig(config_text);
|
||||||
|
NodeContext ctx;
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
ASSERT_TRUE(node.Start());
|
||||||
|
|
||||||
|
auto frame1 = std::make_shared<Frame>();
|
||||||
|
frame1->width = 1920;
|
||||||
|
frame1->height = 1080;
|
||||||
|
frame1->pts = 1000;
|
||||||
|
frame1->det = std::make_shared<DetectionResult>();
|
||||||
|
frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
|
||||||
|
frame1->pose = std::make_shared<PoseResult>();
|
||||||
|
PoseItem pose1;
|
||||||
|
pose1.track_id = 17;
|
||||||
|
pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
|
||||||
|
pose1.keypoints.resize(17);
|
||||||
|
pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
|
||||||
|
pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
|
||||||
|
pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
|
||||||
|
pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
|
||||||
|
frame1->pose->items.push_back(pose1);
|
||||||
|
|
||||||
|
auto frame2 = std::make_shared<Frame>();
|
||||||
|
frame2->width = 1920;
|
||||||
|
frame2->height = 1080;
|
||||||
|
frame2->pts = 1600;
|
||||||
|
frame2->det = std::make_shared<DetectionResult>();
|
||||||
|
frame2->det->items.push_back(Detection{0, 0.93f, Rect{790.0f, 400.0f, 180.0f, 300.0f}, 17});
|
||||||
|
frame2->pose = std::make_shared<PoseResult>();
|
||||||
|
PoseItem pose2;
|
||||||
|
pose2.track_id = 17;
|
||||||
|
pose2.bbox = Rect{790.0f, 400.0f, 180.0f, 300.0f};
|
||||||
|
pose2.keypoints.resize(17);
|
||||||
|
pose2.keypoints[5] = PoseKeypoint{PosePoint2f{790.0f, 520.0f}, 0.9f};
|
||||||
|
pose2.keypoints[6] = PoseKeypoint{PosePoint2f{970.0f, 520.0f}, 0.9f};
|
||||||
|
pose2.keypoints[11] = PoseKeypoint{PosePoint2f{820.0f, 590.0f}, 0.9f};
|
||||||
|
pose2.keypoints[12] = PoseKeypoint{PosePoint2f{940.0f, 590.0f}, 0.9f};
|
||||||
|
frame2->pose->items.push_back(pose2);
|
||||||
|
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
|
||||||
|
ASSERT_NE(frame2->behavior_events, nullptr);
|
||||||
|
ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
|
||||||
|
EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fall);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ActionRecogTest, SuppressesFallFalsePositiveWhenPoseShapeConditionFails) {
|
||||||
|
ActionRecogNode node;
|
||||||
|
|
||||||
|
const std::string config_text = R"({
|
||||||
|
"id": "action_evt",
|
||||||
|
"events": [
|
||||||
|
{
|
||||||
|
"type": "fall",
|
||||||
|
"window_ms": 1500,
|
||||||
|
"activate_duration_ms": 0,
|
||||||
|
"bbox": {
|
||||||
|
"enabled": false
|
||||||
|
},
|
||||||
|
"pose": {
|
||||||
|
"enabled": true,
|
||||||
|
"min_torso_drop_pixels": 120,
|
||||||
|
"max_upright_ratio": 0.8
|
||||||
|
},
|
||||||
|
"fusion": {
|
||||||
|
"match_mode": "any"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})";
|
||||||
|
|
||||||
|
SimpleJson config = ParseActionConfig(config_text);
|
||||||
|
NodeContext ctx;
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
ASSERT_TRUE(node.Start());
|
||||||
|
|
||||||
|
auto frame1 = std::make_shared<Frame>();
|
||||||
|
frame1->pts = 1000;
|
||||||
|
frame1->det = std::make_shared<DetectionResult>();
|
||||||
|
frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
|
||||||
|
frame1->pose = std::make_shared<PoseResult>();
|
||||||
|
PoseItem pose1;
|
||||||
|
pose1.track_id = 17;
|
||||||
|
pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
|
||||||
|
pose1.keypoints.resize(17);
|
||||||
|
pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
|
||||||
|
pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
|
||||||
|
pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
|
||||||
|
pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
|
||||||
|
frame1->pose->items.push_back(pose1);
|
||||||
|
|
||||||
|
auto frame2 = std::make_shared<Frame>();
|
||||||
|
frame2->pts = 1600;
|
||||||
|
frame2->det = std::make_shared<DetectionResult>();
|
||||||
|
frame2->det->items.push_back(Detection{0, 0.93f, Rect{790.0f, 400.0f, 180.0f, 300.0f}, 17});
|
||||||
|
frame2->pose = std::make_shared<PoseResult>();
|
||||||
|
PoseItem pose2;
|
||||||
|
pose2.track_id = 17;
|
||||||
|
pose2.bbox = Rect{790.0f, 400.0f, 180.0f, 300.0f};
|
||||||
|
pose2.keypoints.resize(17);
|
||||||
|
pose2.keypoints[5] = PoseKeypoint{PosePoint2f{860.0f, 520.0f}, 0.9f};
|
||||||
|
pose2.keypoints[6] = PoseKeypoint{PosePoint2f{900.0f, 520.0f}, 0.9f};
|
||||||
|
pose2.keypoints[11] = PoseKeypoint{PosePoint2f{865.0f, 700.0f}, 0.9f};
|
||||||
|
pose2.keypoints[12] = PoseKeypoint{PosePoint2f{905.0f, 700.0f}, 0.9f};
|
||||||
|
frame2->pose->items.push_back(pose2);
|
||||||
|
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
|
||||||
|
ASSERT_NE(frame2->behavior_events, nullptr);
|
||||||
|
EXPECT_TRUE(frame2->behavior_events->items.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(ActionRecogTest, SuppressesFightFalsePositiveWhenWristsStayFarApart) {
|
||||||
|
ActionRecogNode node;
|
||||||
|
|
||||||
|
const std::string config_text = R"({
|
||||||
|
"id": "action_evt",
|
||||||
|
"events": [
|
||||||
|
{
|
||||||
|
"type": "fight",
|
||||||
|
"window_ms": 1200,
|
||||||
|
"activate_duration_ms": 0,
|
||||||
|
"bbox": {
|
||||||
|
"enabled": false,
|
||||||
|
"proximity_pixels": 220,
|
||||||
|
"min_motion_pixels": 0
|
||||||
|
},
|
||||||
|
"pose": {
|
||||||
|
"enabled": true,
|
||||||
|
"min_wrist_motion_pixels": 120,
|
||||||
|
"max_wrist_distance_pixels": 60
|
||||||
|
},
|
||||||
|
"fusion": {
|
||||||
|
"match_mode": "any"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})";
|
||||||
|
|
||||||
|
SimpleJson config = ParseActionConfig(config_text);
|
||||||
|
NodeContext ctx;
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
ASSERT_TRUE(node.Start());
|
||||||
|
|
||||||
|
auto frame1 = std::make_shared<Frame>();
|
||||||
|
frame1->pts = 1000;
|
||||||
|
frame1->det = std::make_shared<DetectionResult>();
|
||||||
|
frame1->det->items.push_back(Detection{0, 0.90f, Rect{700.0f, 320.0f, 120.0f, 300.0f}, 31});
|
||||||
|
frame1->det->items.push_back(Detection{0, 0.89f, Rect{860.0f, 320.0f, 120.0f, 300.0f}, 32});
|
||||||
|
frame1->pose = std::make_shared<PoseResult>();
|
||||||
|
PoseItem left1;
|
||||||
|
left1.track_id = 31;
|
||||||
|
left1.bbox = Rect{700.0f, 320.0f, 120.0f, 300.0f};
|
||||||
|
left1.keypoints.resize(17);
|
||||||
|
left1.keypoints[9] = PoseKeypoint{PosePoint2f{760.0f, 430.0f}, 0.9f};
|
||||||
|
left1.keypoints[10] = PoseKeypoint{PosePoint2f{770.0f, 450.0f}, 0.9f};
|
||||||
|
PoseItem right1;
|
||||||
|
right1.track_id = 32;
|
||||||
|
right1.bbox = Rect{860.0f, 320.0f, 120.0f, 300.0f};
|
||||||
|
right1.keypoints.resize(17);
|
||||||
|
right1.keypoints[9] = PoseKeypoint{PosePoint2f{960.0f, 430.0f}, 0.9f};
|
||||||
|
right1.keypoints[10] = PoseKeypoint{PosePoint2f{970.0f, 450.0f}, 0.9f};
|
||||||
|
frame1->pose->items.push_back(left1);
|
||||||
|
frame1->pose->items.push_back(right1);
|
||||||
|
|
||||||
|
auto frame2 = std::make_shared<Frame>();
|
||||||
|
frame2->pts = 1300;
|
||||||
|
frame2->det = std::make_shared<DetectionResult>();
|
||||||
|
frame2->det->items.push_back(Detection{0, 0.91f, Rect{710.0f, 325.0f, 120.0f, 300.0f}, 31});
|
||||||
|
frame2->det->items.push_back(Detection{0, 0.92f, Rect{850.0f, 315.0f, 120.0f, 300.0f}, 32});
|
||||||
|
frame2->pose = std::make_shared<PoseResult>();
|
||||||
|
PoseItem left2;
|
||||||
|
left2.track_id = 31;
|
||||||
|
left2.bbox = Rect{710.0f, 325.0f, 120.0f, 300.0f};
|
||||||
|
left2.keypoints.resize(17);
|
||||||
|
left2.keypoints[9] = PoseKeypoint{PosePoint2f{820.0f, 430.0f}, 0.9f};
|
||||||
|
left2.keypoints[10] = PoseKeypoint{PosePoint2f{830.0f, 450.0f}, 0.9f};
|
||||||
|
PoseItem right2;
|
||||||
|
right2.track_id = 32;
|
||||||
|
right2.bbox = Rect{850.0f, 315.0f, 120.0f, 300.0f};
|
||||||
|
right2.keypoints.resize(17);
|
||||||
|
right2.keypoints[9] = PoseKeypoint{PosePoint2f{980.0f, 430.0f}, 0.9f};
|
||||||
|
right2.keypoints[10] = PoseKeypoint{PosePoint2f{990.0f, 450.0f}, 0.9f};
|
||||||
|
frame2->pose->items.push_back(left2);
|
||||||
|
frame2->pose->items.push_back(right2);
|
||||||
|
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
|
||||||
|
ASSERT_NE(frame2->behavior_events, nullptr);
|
||||||
|
EXPECT_TRUE(frame2->behavior_events->items.empty());
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace rk3588
|
} // namespace rk3588
|
||||||
|
|||||||
232
tests/test_ai_pose.cpp
Normal file
232
tests/test_ai_pose.cpp
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "hw/i_infer_backend.h"
|
||||||
|
#include "node.h"
|
||||||
|
#include "pose/pose_result.h"
|
||||||
|
#include "utils/simple_json.h"
|
||||||
|
#include "../plugins/ai_pose/ai_pose_node.h"
|
||||||
|
|
||||||
|
namespace rk3588 {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
SimpleJson ParsePoseConfig(const std::string& text) {
|
||||||
|
SimpleJson config;
|
||||||
|
std::string err;
|
||||||
|
const bool ok = ParseSimpleJson(text, config, err);
|
||||||
|
EXPECT_TRUE(ok);
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
|
class FakeInferBackend final : public IInferBackend {
|
||||||
|
public:
|
||||||
|
ModelHandle LoadModel(const std::string& /*model_path*/, std::string& /*err*/) override {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void UnloadModel(ModelHandle /*handle*/) override {}
|
||||||
|
|
||||||
|
bool GetModelInfo(ModelHandle /*handle*/, ModelInfo& info) const override {
|
||||||
|
info.input_width = 640;
|
||||||
|
info.input_height = 640;
|
||||||
|
info.input_channels = 3;
|
||||||
|
info.n_input = 1;
|
||||||
|
info.n_output = 4;
|
||||||
|
info.name = "fake_yolov8_pose";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
InferResult Infer(ModelHandle /*handle*/, const InferInput& /*input*/) override {
|
||||||
|
InferResult result;
|
||||||
|
result.success = true;
|
||||||
|
result.outputs = outputs_;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
AiScheduler::BorrowedInferResult InferBorrowed(ModelHandle /*handle*/, const InferInput& /*input*/) override {
|
||||||
|
AiScheduler::BorrowedInferResult result;
|
||||||
|
result.success = false;
|
||||||
|
result.error = "not used in unit test";
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<InferOutput> outputs_;
|
||||||
|
};
|
||||||
|
|
||||||
|
InferOutput MakeFloatOutput(const std::vector<float>& values) {
|
||||||
|
InferOutput out;
|
||||||
|
out.size = values.size() * sizeof(float);
|
||||||
|
out.data.resize(out.size);
|
||||||
|
std::memcpy(out.data.data(), values.data(), out.size);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(AiPoseNodeTest, RejectsEnabledConfigWithoutModelPath) {
|
||||||
|
AiPoseNode node;
|
||||||
|
SimpleJson config = ParsePoseConfig(R"({
|
||||||
|
"id": "pose0",
|
||||||
|
"enabled": true
|
||||||
|
})");
|
||||||
|
|
||||||
|
NodeContext ctx;
|
||||||
|
EXPECT_FALSE(node.Init(config, ctx));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(AiPoseNodeTest, DisabledNodeStartsAndPassesFramesThrough) {
|
||||||
|
AiPoseNode node;
|
||||||
|
SimpleJson config = ParsePoseConfig(R"({
|
||||||
|
"id": "pose0",
|
||||||
|
"enabled": false
|
||||||
|
})");
|
||||||
|
|
||||||
|
NodeContext ctx;
|
||||||
|
auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
|
||||||
|
ctx.output_queues.push_back(out);
|
||||||
|
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
ASSERT_TRUE(node.Start());
|
||||||
|
|
||||||
|
auto frame = std::make_shared<Frame>();
|
||||||
|
frame->width = 1280;
|
||||||
|
frame->height = 720;
|
||||||
|
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
|
||||||
|
|
||||||
|
FramePtr forwarded;
|
||||||
|
ASSERT_TRUE(out->TryPop(forwarded));
|
||||||
|
EXPECT_EQ(forwarded.get(), frame.get());
|
||||||
|
EXPECT_EQ(frame->pose, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(AiPoseNodeTest, EnabledNodeRequiresInferBackendAtStart) {
|
||||||
|
AiPoseNode node;
|
||||||
|
SimpleJson config = ParsePoseConfig(R"({
|
||||||
|
"id": "pose0",
|
||||||
|
"enabled": true,
|
||||||
|
"model_path": "models/yolov8n-pose.rknn"
|
||||||
|
})");
|
||||||
|
|
||||||
|
NodeContext ctx;
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
EXPECT_FALSE(node.Start());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(AiPoseNodeTest, DecodePoseOutputsToOriginalCoordinates) {
|
||||||
|
const int num_keypoints = 17;
|
||||||
|
const int total_points = 8400;
|
||||||
|
|
||||||
|
std::vector<float> head0(static_cast<size_t>(65 * 80 * 80), -10.0f);
|
||||||
|
std::vector<float> head1(static_cast<size_t>(65 * 40 * 40), -10.0f);
|
||||||
|
std::vector<float> head2(static_cast<size_t>(65 * 20 * 20), -10.0f);
|
||||||
|
std::vector<float> keypoints(static_cast<size_t>(num_keypoints * 3 * total_points), 0.0f);
|
||||||
|
|
||||||
|
const int cell_x = 10;
|
||||||
|
const int cell_y = 15;
|
||||||
|
const int grid_w = 80;
|
||||||
|
const int point_index = cell_y * grid_w + cell_x;
|
||||||
|
|
||||||
|
auto set_dfl_bin = [&](int channel_group, int bin) {
|
||||||
|
for (int i = 0; i < 16; ++i) {
|
||||||
|
head0[static_cast<size_t>((channel_group * 16 + i) * grid_w * 80 + point_index)] = (i == bin) ? 10.0f : -10.0f;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
set_dfl_bin(0, 4);
|
||||||
|
set_dfl_bin(1, 6);
|
||||||
|
set_dfl_bin(2, 8);
|
||||||
|
set_dfl_bin(3, 10);
|
||||||
|
head0[static_cast<size_t>(64 * grid_w * 80 + point_index)] = 10.0f;
|
||||||
|
|
||||||
|
for (int k = 0; k < num_keypoints; ++k) {
|
||||||
|
keypoints[static_cast<size_t>(k * 3 * total_points + point_index)] = 120.0f + static_cast<float>(k);
|
||||||
|
keypoints[static_cast<size_t>(k * 3 * total_points + total_points + point_index)] = 140.0f + static_cast<float>(k * 2);
|
||||||
|
keypoints[static_cast<size_t>(k * 3 * total_points + 2 * total_points + point_index)] = 0.9f;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto backend = std::make_shared<FakeInferBackend>();
|
||||||
|
backend->outputs_ = {
|
||||||
|
MakeFloatOutput(head0),
|
||||||
|
MakeFloatOutput(head1),
|
||||||
|
MakeFloatOutput(head2),
|
||||||
|
MakeFloatOutput(keypoints),
|
||||||
|
};
|
||||||
|
|
||||||
|
AiPoseNode node;
|
||||||
|
SimpleJson config = ParsePoseConfig(R"({
|
||||||
|
"id": "pose0",
|
||||||
|
"enabled": true,
|
||||||
|
"model_path": "models/yolov8n-pose.rknn",
|
||||||
|
"model_input_w": 640,
|
||||||
|
"model_input_h": 640,
|
||||||
|
"conf_thresh": 0.25,
|
||||||
|
"nms_thresh": 0.45
|
||||||
|
})");
|
||||||
|
|
||||||
|
NodeContext ctx;
|
||||||
|
ctx.infer_backend = backend;
|
||||||
|
auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
|
||||||
|
ctx.output_queues.push_back(out);
|
||||||
|
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
ASSERT_TRUE(node.Start());
|
||||||
|
|
||||||
|
auto rgb = std::make_shared<std::vector<uint8_t>>(static_cast<size_t>(320 * 320 * 3), 127);
|
||||||
|
auto frame = std::make_shared<Frame>();
|
||||||
|
frame->width = 320;
|
||||||
|
frame->height = 320;
|
||||||
|
frame->format = PixelFormat::RGB;
|
||||||
|
frame->data = rgb->data();
|
||||||
|
frame->data_size = rgb->size();
|
||||||
|
frame->stride = 320 * 3;
|
||||||
|
frame->plane_count = 1;
|
||||||
|
frame->planes[0] = {frame->data, frame->stride, static_cast<int>(frame->data_size), 0};
|
||||||
|
frame->data_owner = rgb;
|
||||||
|
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
|
||||||
|
|
||||||
|
ASSERT_NE(frame->pose, nullptr);
|
||||||
|
ASSERT_EQ(frame->pose->items.size(), 1u);
|
||||||
|
const PoseItem& item = frame->pose->items[0];
|
||||||
|
EXPECT_NEAR(item.bbox.x, 26.0f, 1.0f);
|
||||||
|
EXPECT_NEAR(item.bbox.y, 38.0f, 1.0f);
|
||||||
|
EXPECT_NEAR(item.bbox.w, 48.0f, 1.0f);
|
||||||
|
EXPECT_NEAR(item.bbox.h, 64.0f, 1.0f);
|
||||||
|
ASSERT_EQ(item.keypoints.size(), static_cast<size_t>(num_keypoints));
|
||||||
|
EXPECT_NEAR(item.keypoints[0].point.x, 60.0f, 1.0f);
|
||||||
|
EXPECT_NEAR(item.keypoints[0].point.y, 70.0f, 1.0f);
|
||||||
|
EXPECT_NEAR(item.keypoints[16].point.x, 68.0f, 1.0f);
|
||||||
|
EXPECT_NEAR(item.keypoints[16].point.y, 86.0f, 1.0f);
|
||||||
|
|
||||||
|
FramePtr forwarded;
|
||||||
|
ASSERT_TRUE(out->TryPop(forwarded));
|
||||||
|
EXPECT_EQ(forwarded.get(), frame.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PoseResultTest, SupportsPerTrackKeypoints) {
|
||||||
|
PoseResult result;
|
||||||
|
result.img_w = 1920;
|
||||||
|
result.img_h = 1080;
|
||||||
|
result.model_name = "pose_model";
|
||||||
|
|
||||||
|
PoseItem item;
|
||||||
|
item.track_id = 42;
|
||||||
|
item.score = 0.93f;
|
||||||
|
item.bbox = Rect{100.0f, 200.0f, 300.0f, 400.0f};
|
||||||
|
item.keypoints.push_back(PoseKeypoint{PosePoint2f{120.0f, 220.0f}, 0.99f});
|
||||||
|
item.keypoints.push_back(PoseKeypoint{PosePoint2f{140.0f, 260.0f}, 0.95f});
|
||||||
|
result.items.push_back(item);
|
||||||
|
|
||||||
|
ASSERT_EQ(result.items.size(), 1u);
|
||||||
|
EXPECT_EQ(result.items[0].track_id, 42);
|
||||||
|
ASSERT_EQ(result.items[0].keypoints.size(), 2u);
|
||||||
|
EXPECT_FLOAT_EQ(result.items[0].keypoints[0].point.x, 120.0f);
|
||||||
|
EXPECT_FLOAT_EQ(result.items[0].keypoints[1].score, 0.95f);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace rk3588
|
||||||
117
tests/test_pose_assoc.cpp
Normal file
117
tests/test_pose_assoc.cpp
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "frame/frame.h"
|
||||||
|
#include "node.h"
|
||||||
|
#include "pose/pose_result.h"
|
||||||
|
#include "utils/simple_json.h"
|
||||||
|
#include "../plugins/pose_assoc/pose_assoc_node.h"
|
||||||
|
|
||||||
|
namespace rk3588 {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
SimpleJson ParsePoseAssocConfig(const std::string& text) {
|
||||||
|
SimpleJson config;
|
||||||
|
std::string err;
|
||||||
|
const bool ok = ParseSimpleJson(text, config, err);
|
||||||
|
EXPECT_TRUE(ok);
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PoseAssocNodeTest, AssociatesPoseItemsToTrackedDetections) {
|
||||||
|
PoseAssocNode node;
|
||||||
|
SimpleJson config = ParsePoseAssocConfig(R"({
|
||||||
|
"id": "pose_assoc0",
|
||||||
|
"min_iou": 0.1
|
||||||
|
})");
|
||||||
|
|
||||||
|
NodeContext ctx;
|
||||||
|
auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
|
||||||
|
ctx.output_queues.push_back(out);
|
||||||
|
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
ASSERT_TRUE(node.Start());
|
||||||
|
|
||||||
|
auto frame = std::make_shared<Frame>();
|
||||||
|
frame->det = std::make_shared<DetectionResult>();
|
||||||
|
frame->det->items.push_back(Detection{0, 0.95f, Rect{100.0f, 100.0f, 120.0f, 240.0f}, 7});
|
||||||
|
frame->det->items.push_back(Detection{0, 0.94f, Rect{320.0f, 120.0f, 120.0f, 240.0f}, 8});
|
||||||
|
|
||||||
|
frame->pose = std::make_shared<PoseResult>();
|
||||||
|
PoseItem pose0;
|
||||||
|
pose0.bbox = Rect{110.0f, 110.0f, 110.0f, 220.0f};
|
||||||
|
PoseItem pose1;
|
||||||
|
pose1.bbox = Rect{330.0f, 130.0f, 100.0f, 210.0f};
|
||||||
|
frame->pose->items.push_back(pose0);
|
||||||
|
frame->pose->items.push_back(pose1);
|
||||||
|
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
|
||||||
|
ASSERT_EQ(frame->pose->items.size(), 2u);
|
||||||
|
EXPECT_EQ(frame->pose->items[0].track_id, 7);
|
||||||
|
EXPECT_EQ(frame->pose->items[1].track_id, 8);
|
||||||
|
|
||||||
|
FramePtr forwarded;
|
||||||
|
ASSERT_TRUE(out->TryPop(forwarded));
|
||||||
|
EXPECT_EQ(forwarded.get(), frame.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PoseAssocNodeTest, UsesOneToOneAssignmentForOverlappingCandidates) {
|
||||||
|
PoseAssocNode node;
|
||||||
|
SimpleJson config = ParsePoseAssocConfig(R"({
|
||||||
|
"id": "pose_assoc0",
|
||||||
|
"min_iou": 0.05
|
||||||
|
})");
|
||||||
|
|
||||||
|
NodeContext ctx;
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
ASSERT_TRUE(node.Start());
|
||||||
|
|
||||||
|
auto frame = std::make_shared<Frame>();
|
||||||
|
frame->det = std::make_shared<DetectionResult>();
|
||||||
|
frame->det->items.push_back(Detection{0, 0.95f, Rect{100.0f, 100.0f, 150.0f, 250.0f}, 11});
|
||||||
|
frame->det->items.push_back(Detection{0, 0.94f, Rect{130.0f, 120.0f, 150.0f, 250.0f}, 12});
|
||||||
|
|
||||||
|
frame->pose = std::make_shared<PoseResult>();
|
||||||
|
PoseItem pose0;
|
||||||
|
pose0.bbox = Rect{105.0f, 110.0f, 145.0f, 245.0f};
|
||||||
|
PoseItem pose1;
|
||||||
|
pose1.bbox = Rect{132.0f, 125.0f, 148.0f, 245.0f};
|
||||||
|
frame->pose->items.push_back(pose0);
|
||||||
|
frame->pose->items.push_back(pose1);
|
||||||
|
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
|
||||||
|
EXPECT_NE(frame->pose->items[0].track_id, frame->pose->items[1].track_id);
|
||||||
|
EXPECT_TRUE(frame->pose->items[0].track_id == 11 || frame->pose->items[0].track_id == 12);
|
||||||
|
EXPECT_TRUE(frame->pose->items[1].track_id == 11 || frame->pose->items[1].track_id == 12);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PoseAssocNodeTest, PassesThroughWhenPoseOrDetectionMissing) {
|
||||||
|
PoseAssocNode node;
|
||||||
|
SimpleJson config = ParsePoseAssocConfig(R"({
|
||||||
|
"id": "pose_assoc0"
|
||||||
|
})");
|
||||||
|
|
||||||
|
NodeContext ctx;
|
||||||
|
ASSERT_TRUE(node.Init(config, ctx));
|
||||||
|
ASSERT_TRUE(node.Start());
|
||||||
|
|
||||||
|
auto no_pose = std::make_shared<Frame>();
|
||||||
|
no_pose->det = std::make_shared<DetectionResult>();
|
||||||
|
no_pose->det->items.push_back(Detection{0, 0.9f, Rect{0.0f, 0.0f, 10.0f, 10.0f}, 1});
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(no_pose)), static_cast<int>(NodeStatus::OK));
|
||||||
|
EXPECT_EQ(no_pose->pose, nullptr);
|
||||||
|
|
||||||
|
auto no_det = std::make_shared<Frame>();
|
||||||
|
no_det->pose = std::make_shared<PoseResult>();
|
||||||
|
PoseItem pose;
|
||||||
|
pose.bbox = Rect{0.0f, 0.0f, 10.0f, 10.0f};
|
||||||
|
no_det->pose->items.push_back(pose);
|
||||||
|
EXPECT_EQ(static_cast<int>(node.Process(no_det)), static_cast<int>(NodeStatus::OK));
|
||||||
|
ASSERT_EQ(no_det->pose->items.size(), 1u);
|
||||||
|
EXPECT_EQ(no_det->pose->items[0].track_id, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
} // namespace rk3588
|
||||||
Loading…
Reference in New Issue
Block a user