OrangePi3588Media/tests/test_action_recog.cpp

510 lines
20 KiB
C++

#include <gtest/gtest.h>
#include <memory>
#include <string>
#include "frame/frame.h"
#include "node.h"
#include "pose/pose_result.h"
#include "utils/simple_json.h"
#include "../plugins/action_recog/action_recog_node.h"
namespace rk3588 {
namespace {
SimpleJson ParseActionConfig(const std::string& text) {
SimpleJson config;
std::string err;
const bool ok = ParseSimpleJson(text, config, err);
EXPECT_TRUE(ok);
return config;
}
TEST(ActionRecogTest, EmitsFallAfterRapidDropAndLowPosePersistence) {
ActionRecogNode node;
const std::string config_text = R"({
"id": "action_evt",
"events": [
{
"type": "fall",
"window_ms": 1500,
"min_drop_pixels": 120,
"min_aspect_ratio_delta": 0.35,
"activate_duration_ms": 0
}
]
})";
SimpleJson config = ParseActionConfig(config_text);
NodeContext ctx;
auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
ctx.output_queues.push_back(out);
ASSERT_TRUE(node.Init(config, ctx));
ASSERT_TRUE(node.Start());
auto frame1 = std::make_shared<Frame>();
frame1->width = 1920;
frame1->height = 1080;
frame1->pts = 1000;
frame1->det = std::make_shared<DetectionResult>();
frame1->det->img_w = 1920;
frame1->det->img_h = 1080;
frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 240.0f, 120.0f, 320.0f}, 17});
auto frame2 = std::make_shared<Frame>();
frame2->width = 1920;
frame2->height = 1080;
frame2->pts = 1600;
frame2->det = std::make_shared<DetectionResult>();
frame2->det->img_w = 1920;
frame2->det->img_h = 1080;
frame2->det->items.push_back(Detection{0, 0.94f, Rect{760.0f, 460.0f, 260.0f, 140.0f}, 17});
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
ASSERT_NE(frame2->behavior_events, nullptr);
ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fall);
ASSERT_EQ(frame2->behavior_events->items[0].track_ids.size(), 1u);
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[0], 17);
}
TEST(ActionRecogTest, EmitsFightWhenTwoTracksStayCloseWithRepeatedMotion) {
ActionRecogNode node;
const std::string config_text = R"({
"id": "action_evt",
"events": [
{
"type": "fight",
"window_ms": 1200,
"proximity_pixels": 220,
"min_motion_pixels": 70,
"activate_duration_ms": 0
}
]
})";
SimpleJson config = ParseActionConfig(config_text);
NodeContext ctx;
auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
ctx.output_queues.push_back(out);
ASSERT_TRUE(node.Init(config, ctx));
ASSERT_TRUE(node.Start());
auto frame1 = std::make_shared<Frame>();
frame1->width = 1920;
frame1->height = 1080;
frame1->pts = 1000;
frame1->det = std::make_shared<DetectionResult>();
frame1->det->img_w = 1920;
frame1->det->img_h = 1080;
frame1->det->items.push_back(Detection{0, 0.90f, Rect{700.0f, 320.0f, 120.0f, 300.0f}, 31});
frame1->det->items.push_back(Detection{0, 0.89f, Rect{860.0f, 320.0f, 120.0f, 300.0f}, 32});
auto frame2 = std::make_shared<Frame>();
frame2->width = 1920;
frame2->height = 1080;
frame2->pts = 1300;
frame2->det = std::make_shared<DetectionResult>();
frame2->det->img_w = 1920;
frame2->det->img_h = 1080;
frame2->det->items.push_back(Detection{0, 0.91f, Rect{780.0f, 340.0f, 120.0f, 300.0f}, 31});
frame2->det->items.push_back(Detection{0, 0.92f, Rect{820.0f, 300.0f, 120.0f, 300.0f}, 32});
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
ASSERT_NE(frame2->behavior_events, nullptr);
ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fight);
ASSERT_EQ(frame2->behavior_events->items[0].track_ids.size(), 2u);
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[0], 31);
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[1], 32);
}
TEST(ActionRecogTest, EmitsFallFromPoseSequenceWhenBboxShapeChangeIsSmall) {
ActionRecogNode node;
const std::string config_text = R"({
"id": "action_evt",
"events": [
{
"type": "fall",
"window_ms": 1500,
"min_drop_pixels": 120,
"min_aspect_ratio_delta": 10.0,
"activate_duration_ms": 0,
"pose_min_torso_drop_pixels": 120,
"pose_max_upright_ratio": 0.6
}
]
})";
SimpleJson config = ParseActionConfig(config_text);
NodeContext ctx;
auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
ctx.output_queues.push_back(out);
ASSERT_TRUE(node.Init(config, ctx));
ASSERT_TRUE(node.Start());
auto frame1 = std::make_shared<Frame>();
frame1->width = 1920;
frame1->height = 1080;
frame1->pts = 1000;
frame1->det = std::make_shared<DetectionResult>();
frame1->det->img_w = 1920;
frame1->det->img_h = 1080;
frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
frame1->pose = std::make_shared<PoseResult>();
frame1->pose->img_w = 1920;
frame1->pose->img_h = 1080;
PoseItem pose1;
pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
pose1.score = 0.9f;
pose1.keypoints.resize(17);
pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
frame1->pose->items.push_back(pose1);
auto frame2 = std::make_shared<Frame>();
frame2->width = 1920;
frame2->height = 1080;
frame2->pts = 1600;
frame2->det = std::make_shared<DetectionResult>();
frame2->det->img_w = 1920;
frame2->det->img_h = 1080;
frame2->det->items.push_back(Detection{0, 0.93f, Rect{780.0f, 420.0f, 200.0f, 280.0f}, 17});
frame2->pose = std::make_shared<PoseResult>();
frame2->pose->img_w = 1920;
frame2->pose->img_h = 1080;
PoseItem pose2;
pose2.bbox = Rect{780.0f, 420.0f, 200.0f, 280.0f};
pose2.score = 0.91f;
pose2.keypoints.resize(17);
pose2.keypoints[5] = PoseKeypoint{PosePoint2f{790.0f, 520.0f}, 0.9f};
pose2.keypoints[6] = PoseKeypoint{PosePoint2f{970.0f, 520.0f}, 0.9f};
pose2.keypoints[11] = PoseKeypoint{PosePoint2f{820.0f, 590.0f}, 0.9f};
pose2.keypoints[12] = PoseKeypoint{PosePoint2f{940.0f, 590.0f}, 0.9f};
frame2->pose->items.push_back(pose2);
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
ASSERT_NE(frame2->behavior_events, nullptr);
ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fall);
ASSERT_EQ(frame2->behavior_events->items[0].track_ids.size(), 1u);
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[0], 17);
}
TEST(ActionRecogTest, EmitsFightFromPoseMotionWhenBboxMotionIsSmall) {
ActionRecogNode node;
const std::string config_text = R"({
"id": "action_evt",
"events": [
{
"type": "fight",
"window_ms": 1200,
"proximity_pixels": 220,
"min_motion_pixels": 1000,
"activate_duration_ms": 0,
"pose_min_wrist_motion_pixels": 120,
"pose_max_wrist_distance_pixels": 120
}
]
})";
SimpleJson config = ParseActionConfig(config_text);
NodeContext ctx;
auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
ctx.output_queues.push_back(out);
ASSERT_TRUE(node.Init(config, ctx));
ASSERT_TRUE(node.Start());
auto frame1 = std::make_shared<Frame>();
frame1->width = 1920;
frame1->height = 1080;
frame1->pts = 1000;
frame1->det = std::make_shared<DetectionResult>();
frame1->det->img_w = 1920;
frame1->det->img_h = 1080;
frame1->det->items.push_back(Detection{0, 0.90f, Rect{700.0f, 320.0f, 120.0f, 300.0f}, 31});
frame1->det->items.push_back(Detection{0, 0.89f, Rect{860.0f, 320.0f, 120.0f, 300.0f}, 32});
frame1->pose = std::make_shared<PoseResult>();
frame1->pose->img_w = 1920;
frame1->pose->img_h = 1080;
PoseItem left1;
left1.bbox = Rect{700.0f, 320.0f, 120.0f, 300.0f};
left1.keypoints.resize(17);
left1.keypoints[9] = PoseKeypoint{PosePoint2f{780.0f, 430.0f}, 0.9f};
left1.keypoints[10] = PoseKeypoint{PosePoint2f{790.0f, 450.0f}, 0.9f};
PoseItem right1;
right1.bbox = Rect{860.0f, 320.0f, 120.0f, 300.0f};
right1.keypoints.resize(17);
right1.keypoints[9] = PoseKeypoint{PosePoint2f{900.0f, 430.0f}, 0.9f};
right1.keypoints[10] = PoseKeypoint{PosePoint2f{910.0f, 450.0f}, 0.9f};
frame1->pose->items.push_back(left1);
frame1->pose->items.push_back(right1);
auto frame2 = std::make_shared<Frame>();
frame2->width = 1920;
frame2->height = 1080;
frame2->pts = 1300;
frame2->det = std::make_shared<DetectionResult>();
frame2->det->img_w = 1920;
frame2->det->img_h = 1080;
frame2->det->items.push_back(Detection{0, 0.91f, Rect{710.0f, 325.0f, 120.0f, 300.0f}, 31});
frame2->det->items.push_back(Detection{0, 0.92f, Rect{850.0f, 315.0f, 120.0f, 300.0f}, 32});
frame2->pose = std::make_shared<PoseResult>();
frame2->pose->img_w = 1920;
frame2->pose->img_h = 1080;
PoseItem left2;
left2.bbox = Rect{710.0f, 325.0f, 120.0f, 300.0f};
left2.keypoints.resize(17);
left2.keypoints[9] = PoseKeypoint{PosePoint2f{860.0f, 430.0f}, 0.9f};
left2.keypoints[10] = PoseKeypoint{PosePoint2f{870.0f, 450.0f}, 0.9f};
PoseItem right2;
right2.bbox = Rect{850.0f, 315.0f, 120.0f, 300.0f};
right2.keypoints.resize(17);
right2.keypoints[9] = PoseKeypoint{PosePoint2f{840.0f, 430.0f}, 0.9f};
right2.keypoints[10] = PoseKeypoint{PosePoint2f{830.0f, 450.0f}, 0.9f};
frame2->pose->items.push_back(left2);
frame2->pose->items.push_back(right2);
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
ASSERT_NE(frame2->behavior_events, nullptr);
ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fight);
ASSERT_EQ(frame2->behavior_events->items[0].track_ids.size(), 2u);
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[0], 31);
EXPECT_EQ(frame2->behavior_events->items[0].track_ids[1], 32);
}
TEST(ActionRecogTest, SupportsStructuredFusionConfigForFall) {
ActionRecogNode node;
const std::string config_text = R"({
"id": "action_evt",
"events": [
{
"type": "fall",
"window_ms": 1500,
"activate_duration_ms": 0,
"bbox": {
"enabled": false
},
"pose": {
"enabled": true,
"min_torso_drop_pixels": 120,
"max_upright_ratio": 0.6
},
"fusion": {
"match_mode": "any"
}
}
]
})";
SimpleJson config = ParseActionConfig(config_text);
NodeContext ctx;
ASSERT_TRUE(node.Init(config, ctx));
ASSERT_TRUE(node.Start());
auto frame1 = std::make_shared<Frame>();
frame1->width = 1920;
frame1->height = 1080;
frame1->pts = 1000;
frame1->det = std::make_shared<DetectionResult>();
frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
frame1->pose = std::make_shared<PoseResult>();
PoseItem pose1;
pose1.track_id = 17;
pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
pose1.keypoints.resize(17);
pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
frame1->pose->items.push_back(pose1);
auto frame2 = std::make_shared<Frame>();
frame2->width = 1920;
frame2->height = 1080;
frame2->pts = 1600;
frame2->det = std::make_shared<DetectionResult>();
frame2->det->items.push_back(Detection{0, 0.93f, Rect{790.0f, 400.0f, 180.0f, 300.0f}, 17});
frame2->pose = std::make_shared<PoseResult>();
PoseItem pose2;
pose2.track_id = 17;
pose2.bbox = Rect{790.0f, 400.0f, 180.0f, 300.0f};
pose2.keypoints.resize(17);
pose2.keypoints[5] = PoseKeypoint{PosePoint2f{790.0f, 520.0f}, 0.9f};
pose2.keypoints[6] = PoseKeypoint{PosePoint2f{970.0f, 520.0f}, 0.9f};
pose2.keypoints[11] = PoseKeypoint{PosePoint2f{820.0f, 590.0f}, 0.9f};
pose2.keypoints[12] = PoseKeypoint{PosePoint2f{940.0f, 590.0f}, 0.9f};
frame2->pose->items.push_back(pose2);
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
ASSERT_NE(frame2->behavior_events, nullptr);
ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fall);
}
TEST(ActionRecogTest, SuppressesFallFalsePositiveWhenPoseShapeConditionFails) {
ActionRecogNode node;
const std::string config_text = R"({
"id": "action_evt",
"events": [
{
"type": "fall",
"window_ms": 1500,
"activate_duration_ms": 0,
"bbox": {
"enabled": false
},
"pose": {
"enabled": true,
"min_torso_drop_pixels": 120,
"max_upright_ratio": 0.8
},
"fusion": {
"match_mode": "any"
}
}
]
})";
SimpleJson config = ParseActionConfig(config_text);
NodeContext ctx;
ASSERT_TRUE(node.Init(config, ctx));
ASSERT_TRUE(node.Start());
auto frame1 = std::make_shared<Frame>();
frame1->pts = 1000;
frame1->det = std::make_shared<DetectionResult>();
frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
frame1->pose = std::make_shared<PoseResult>();
PoseItem pose1;
pose1.track_id = 17;
pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
pose1.keypoints.resize(17);
pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
frame1->pose->items.push_back(pose1);
auto frame2 = std::make_shared<Frame>();
frame2->pts = 1600;
frame2->det = std::make_shared<DetectionResult>();
frame2->det->items.push_back(Detection{0, 0.93f, Rect{790.0f, 400.0f, 180.0f, 300.0f}, 17});
frame2->pose = std::make_shared<PoseResult>();
PoseItem pose2;
pose2.track_id = 17;
pose2.bbox = Rect{790.0f, 400.0f, 180.0f, 300.0f};
pose2.keypoints.resize(17);
pose2.keypoints[5] = PoseKeypoint{PosePoint2f{860.0f, 520.0f}, 0.9f};
pose2.keypoints[6] = PoseKeypoint{PosePoint2f{900.0f, 520.0f}, 0.9f};
pose2.keypoints[11] = PoseKeypoint{PosePoint2f{865.0f, 700.0f}, 0.9f};
pose2.keypoints[12] = PoseKeypoint{PosePoint2f{905.0f, 700.0f}, 0.9f};
frame2->pose->items.push_back(pose2);
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
ASSERT_NE(frame2->behavior_events, nullptr);
EXPECT_TRUE(frame2->behavior_events->items.empty());
}
TEST(ActionRecogTest, SuppressesFightFalsePositiveWhenWristsStayFarApart) {
ActionRecogNode node;
const std::string config_text = R"({
"id": "action_evt",
"events": [
{
"type": "fight",
"window_ms": 1200,
"activate_duration_ms": 0,
"bbox": {
"enabled": false,
"proximity_pixels": 220,
"min_motion_pixels": 0
},
"pose": {
"enabled": true,
"min_wrist_motion_pixels": 120,
"max_wrist_distance_pixels": 60
},
"fusion": {
"match_mode": "any"
}
}
]
})";
SimpleJson config = ParseActionConfig(config_text);
NodeContext ctx;
ASSERT_TRUE(node.Init(config, ctx));
ASSERT_TRUE(node.Start());
auto frame1 = std::make_shared<Frame>();
frame1->pts = 1000;
frame1->det = std::make_shared<DetectionResult>();
frame1->det->items.push_back(Detection{0, 0.90f, Rect{700.0f, 320.0f, 120.0f, 300.0f}, 31});
frame1->det->items.push_back(Detection{0, 0.89f, Rect{860.0f, 320.0f, 120.0f, 300.0f}, 32});
frame1->pose = std::make_shared<PoseResult>();
PoseItem left1;
left1.track_id = 31;
left1.bbox = Rect{700.0f, 320.0f, 120.0f, 300.0f};
left1.keypoints.resize(17);
left1.keypoints[9] = PoseKeypoint{PosePoint2f{760.0f, 430.0f}, 0.9f};
left1.keypoints[10] = PoseKeypoint{PosePoint2f{770.0f, 450.0f}, 0.9f};
PoseItem right1;
right1.track_id = 32;
right1.bbox = Rect{860.0f, 320.0f, 120.0f, 300.0f};
right1.keypoints.resize(17);
right1.keypoints[9] = PoseKeypoint{PosePoint2f{960.0f, 430.0f}, 0.9f};
right1.keypoints[10] = PoseKeypoint{PosePoint2f{970.0f, 450.0f}, 0.9f};
frame1->pose->items.push_back(left1);
frame1->pose->items.push_back(right1);
auto frame2 = std::make_shared<Frame>();
frame2->pts = 1300;
frame2->det = std::make_shared<DetectionResult>();
frame2->det->items.push_back(Detection{0, 0.91f, Rect{710.0f, 325.0f, 120.0f, 300.0f}, 31});
frame2->det->items.push_back(Detection{0, 0.92f, Rect{850.0f, 315.0f, 120.0f, 300.0f}, 32});
frame2->pose = std::make_shared<PoseResult>();
PoseItem left2;
left2.track_id = 31;
left2.bbox = Rect{710.0f, 325.0f, 120.0f, 300.0f};
left2.keypoints.resize(17);
left2.keypoints[9] = PoseKeypoint{PosePoint2f{820.0f, 430.0f}, 0.9f};
left2.keypoints[10] = PoseKeypoint{PosePoint2f{830.0f, 450.0f}, 0.9f};
PoseItem right2;
right2.track_id = 32;
right2.bbox = Rect{850.0f, 315.0f, 120.0f, 300.0f};
right2.keypoints.resize(17);
right2.keypoints[9] = PoseKeypoint{PosePoint2f{980.0f, 430.0f}, 0.9f};
right2.keypoints[10] = PoseKeypoint{PosePoint2f{990.0f, 450.0f}, 0.9f};
frame2->pose->items.push_back(left2);
frame2->pose->items.push_back(right2);
EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
ASSERT_NE(frame2->behavior_events, nullptr);
EXPECT_TRUE(frame2->behavior_events->items.empty());
}
} // namespace
} // namespace rk3588