OrangePi3588Media/plugins/action_recog/action_recog_node.cpp

255 lines
9.1 KiB
C++

#include "action_recog_node.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <deque>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "behavior/behavior_event.h"
#include "utils/logger.h"
namespace rk3588 {
namespace {
enum class ActionEventKind {
Fall,
Fight
};
struct ActionEventRule {
ActionEventKind kind = ActionEventKind::Fall;
uint64_t window_ms = 1000;
uint64_t activate_duration_ms = 0;
float min_drop_pixels = 0.0f;
float min_aspect_ratio_delta = 0.0f;
float proximity_pixels = 0.0f;
float min_motion_pixels = 0.0f;
};
struct TrackSample {
uint64_t pts = 0;
Rect bbox{};
};
static bool ParseRules(const SimpleJson& config, std::vector<ActionEventRule>& out, std::string& err) {
const SimpleJson* events = config.Find("events");
if (!events || !events->IsArray()) {
err = "events must be array";
return false;
}
out.clear();
for (const auto& ev : events->AsArray()) {
if (!ev.IsObject()) {
err = "event entry must be object";
return false;
}
ActionEventRule rule;
const std::string type = ev.ValueOr<std::string>("type", "");
rule.window_ms = static_cast<uint64_t>(std::max(0, ev.ValueOr<int>("window_ms", 1000)));
rule.activate_duration_ms = static_cast<uint64_t>(std::max(0, ev.ValueOr<int>("activate_duration_ms", 0)));
if (type == "fall") {
rule.kind = ActionEventKind::Fall;
rule.min_drop_pixels = ev.ValueOr<float>("min_drop_pixels", 0.0f);
rule.min_aspect_ratio_delta = ev.ValueOr<float>("min_aspect_ratio_delta", 0.0f);
} else if (type == "fight") {
rule.kind = ActionEventKind::Fight;
rule.proximity_pixels = ev.ValueOr<float>("proximity_pixels", 0.0f);
rule.min_motion_pixels = ev.ValueOr<float>("min_motion_pixels", 0.0f);
} else {
err = "unsupported event type: " + type;
return false;
}
out.push_back(std::move(rule));
}
return true;
}
static float CenterX(const Rect& rect) {
return rect.x + (rect.w * 0.5f);
}
static float CenterY(const Rect& rect) {
return rect.y + (rect.h * 0.5f);
}
static float CenterDistance(const Rect& lhs, const Rect& rhs) {
const float dx = CenterX(lhs) - CenterX(rhs);
const float dy = CenterY(lhs) - CenterY(rhs);
return std::sqrt((dx * dx) + (dy * dy));
}
static float MotionDistance(const Rect& lhs, const Rect& rhs) {
const float dx = CenterX(lhs) - CenterX(rhs);
const float dy = CenterY(lhs) - CenterY(rhs);
return std::sqrt((dx * dx) + (dy * dy));
}
static float AspectRatio(const Rect& rect) {
return rect.h > 0.0f ? (rect.w / rect.h) : 0.0f;
}
static BehaviorEventType ToBehaviorEventType(ActionEventKind kind) {
return kind == ActionEventKind::Fall ? BehaviorEventType::Fall : BehaviorEventType::Fight;
}
} // namespace
struct ActionRecogNode::Impl {
std::string init_err;
std::vector<ActionEventRule> rules;
std::map<int, std::deque<TrackSample>> history;
};
ActionRecogNode::ActionRecogNode() : impl_(std::make_unique<Impl>()) {}
ActionRecogNode::~ActionRecogNode() = default;
std::string ActionRecogNode::Id() const {
return id_;
}
std::string ActionRecogNode::Type() const {
return "action_recog";
}
bool ActionRecogNode::Init(const SimpleJson& config, const NodeContext& ctx) {
id_ = config.ValueOr<std::string>("id", "action_recog");
if (!ParseRules(config, impl_->rules, impl_->init_err)) {
LogError("[action_recog] invalid config: " + impl_->init_err);
return false;
}
output_queues_ = ctx.output_queues;
return true;
}
bool ActionRecogNode::Start() {
return true;
}
void ActionRecogNode::Stop() {}
NodeStatus ActionRecogNode::Process(FramePtr frame) {
if (!frame) return NodeStatus::DROP;
EnsureBehaviorEvents(*frame);
if (!frame->det) {
PushToDownstream(frame);
return NodeStatus::OK;
}
std::map<int, Rect> current_tracks;
for (const auto& det : frame->det->items) {
if (det.track_id < 0) continue;
current_tracks[det.track_id] = det.bbox;
auto& history = impl_->history[det.track_id];
history.push_back(TrackSample{frame->pts, det.bbox});
while (!history.empty() && frame->pts > history.front().pts &&
(frame->pts - history.front().pts) > impl_->rules.front().window_ms) {
history.pop_front();
}
}
for (const auto& rule : impl_->rules) {
if (rule.kind == ActionEventKind::Fall) {
for (const auto& [track_id, bbox] : current_tracks) {
auto it = impl_->history.find(track_id);
if (it == impl_->history.end() || it->second.empty()) continue;
const auto& first = it->second.front();
const float drop = CenterY(bbox) - CenterY(first.bbox);
const float aspect_delta = AspectRatio(bbox) - AspectRatio(first.bbox);
const uint64_t duration = frame->pts >= first.pts ? (frame->pts - first.pts) : 0;
if (drop < rule.min_drop_pixels) continue;
if (aspect_delta < rule.min_aspect_ratio_delta) continue;
if (duration < rule.activate_duration_ms) continue;
BehaviorEventItem event;
event.type = ToBehaviorEventType(rule.kind);
event.status = BehaviorEventStatus::Active;
event.score = 1.0f;
event.bbox = bbox;
event.track_ids.push_back(track_id);
event.start_pts = first.pts;
event.last_pts = frame->pts;
event.duration_ms = duration;
event.source = "action_recog";
frame->behavior_events->items.push_back(std::move(event));
}
} else {
std::set<std::pair<int, int>> emitted_pairs;
for (auto left = current_tracks.begin(); left != current_tracks.end(); ++left) {
for (auto right = std::next(left); right != current_tracks.end(); ++right) {
const float proximity = CenterDistance(left->second, right->second);
if (proximity > rule.proximity_pixels) continue;
const auto it_left = impl_->history.find(left->first);
const auto it_right = impl_->history.find(right->first);
if (it_left == impl_->history.end() || it_right == impl_->history.end()) continue;
if (it_left->second.empty() || it_right->second.empty()) continue;
const float left_motion = MotionDistance(left->second, it_left->second.front().bbox);
const float right_motion = MotionDistance(right->second, it_right->second.front().bbox);
const uint64_t duration_left = frame->pts >= it_left->second.front().pts ? (frame->pts - it_left->second.front().pts) : 0;
const uint64_t duration_right = frame->pts >= it_right->second.front().pts ? (frame->pts - it_right->second.front().pts) : 0;
const uint64_t duration = std::min(duration_left, duration_right);
const float combined_motion = left_motion + right_motion;
if (combined_motion < rule.min_motion_pixels) continue;
if (duration < rule.activate_duration_ms) continue;
const auto pair_key = std::make_pair(left->first, right->first);
if (!emitted_pairs.insert(pair_key).second) continue;
BehaviorEventItem event;
event.type = ToBehaviorEventType(rule.kind);
event.status = BehaviorEventStatus::Active;
event.score = 1.0f;
event.bbox = Rect{
std::min(left->second.x, right->second.x),
std::min(left->second.y, right->second.y),
std::max(left->second.x + left->second.w, right->second.x + right->second.w) - std::min(left->second.x, right->second.x),
std::max(left->second.y + left->second.h, right->second.y + right->second.h) - std::min(left->second.y, right->second.y)
};
event.track_ids.push_back(left->first);
event.track_ids.push_back(right->first);
event.start_pts = frame->pts - duration;
event.last_pts = frame->pts;
event.duration_ms = duration;
event.source = "action_recog";
frame->behavior_events->items.push_back(std::move(event));
}
}
}
}
PushToDownstream(frame);
return NodeStatus::OK;
}
void ActionRecogNode::EnsureBehaviorEvents(Frame& frame) {
if (!frame.behavior_events) {
frame.behavior_events = std::make_shared<BehaviorEventResult>();
}
}
void ActionRecogNode::PushToDownstream(const FramePtr& frame) {
for (auto& q : output_queues_) {
if (q) q->Push(frame);
}
}
#ifndef RK3588_TEST_BUILD
REGISTER_NODE(ActionRecogNode, "action_recog");
#endif
} // namespace rk3588