#include "action_recog_node.h" #include #include #include #include #include #include #include #include #include #include #include "behavior/behavior_event.h" #include "utils/logger.h" namespace rk3588 { namespace { enum class ActionEventKind { Fall, Fight }; struct ActionEventRule { ActionEventKind kind = ActionEventKind::Fall; uint64_t window_ms = 1000; uint64_t activate_duration_ms = 0; float min_drop_pixels = 0.0f; float min_aspect_ratio_delta = 0.0f; float proximity_pixels = 0.0f; float min_motion_pixels = 0.0f; }; struct TrackSample { uint64_t pts = 0; Rect bbox{}; }; static bool ParseRules(const SimpleJson& config, std::vector& out, std::string& err) { const SimpleJson* events = config.Find("events"); if (!events || !events->IsArray()) { err = "events must be array"; return false; } out.clear(); for (const auto& ev : events->AsArray()) { if (!ev.IsObject()) { err = "event entry must be object"; return false; } ActionEventRule rule; const std::string type = ev.ValueOr("type", ""); rule.window_ms = static_cast(std::max(0, ev.ValueOr("window_ms", 1000))); rule.activate_duration_ms = static_cast(std::max(0, ev.ValueOr("activate_duration_ms", 0))); if (type == "fall") { rule.kind = ActionEventKind::Fall; rule.min_drop_pixels = ev.ValueOr("min_drop_pixels", 0.0f); rule.min_aspect_ratio_delta = ev.ValueOr("min_aspect_ratio_delta", 0.0f); } else if (type == "fight") { rule.kind = ActionEventKind::Fight; rule.proximity_pixels = ev.ValueOr("proximity_pixels", 0.0f); rule.min_motion_pixels = ev.ValueOr("min_motion_pixels", 0.0f); } else { err = "unsupported event type: " + type; return false; } out.push_back(std::move(rule)); } return true; } static float CenterX(const Rect& rect) { return rect.x + (rect.w * 0.5f); } static float CenterY(const Rect& rect) { return rect.y + (rect.h * 0.5f); } static float CenterDistance(const Rect& lhs, const Rect& rhs) { const float dx = CenterX(lhs) - CenterX(rhs); const float dy = CenterY(lhs) - CenterY(rhs); return std::sqrt((dx * dx) + (dy * dy)); } static float MotionDistance(const Rect& lhs, const Rect& rhs) { const float dx = CenterX(lhs) - CenterX(rhs); const float dy = CenterY(lhs) - CenterY(rhs); return std::sqrt((dx * dx) + (dy * dy)); } static float AspectRatio(const Rect& rect) { return rect.h > 0.0f ? (rect.w / rect.h) : 0.0f; } static BehaviorEventType ToBehaviorEventType(ActionEventKind kind) { return kind == ActionEventKind::Fall ? BehaviorEventType::Fall : BehaviorEventType::Fight; } } // namespace struct ActionRecogNode::Impl { std::string init_err; std::vector rules; std::map> history; }; ActionRecogNode::ActionRecogNode() : impl_(std::make_unique()) {} ActionRecogNode::~ActionRecogNode() = default; std::string ActionRecogNode::Id() const { return id_; } std::string ActionRecogNode::Type() const { return "action_recog"; } bool ActionRecogNode::Init(const SimpleJson& config, const NodeContext& ctx) { id_ = config.ValueOr("id", "action_recog"); if (!ParseRules(config, impl_->rules, impl_->init_err)) { LogError("[action_recog] invalid config: " + impl_->init_err); return false; } output_queues_ = ctx.output_queues; return true; } bool ActionRecogNode::Start() { return true; } void ActionRecogNode::Stop() {} NodeStatus ActionRecogNode::Process(FramePtr frame) { if (!frame) return NodeStatus::DROP; EnsureBehaviorEvents(*frame); if (!frame->det) { PushToDownstream(frame); return NodeStatus::OK; } std::map current_tracks; for (const auto& det : frame->det->items) { if (det.track_id < 0) continue; current_tracks[det.track_id] = det.bbox; auto& history = impl_->history[det.track_id]; history.push_back(TrackSample{frame->pts, det.bbox}); while (!history.empty() && frame->pts > history.front().pts && (frame->pts - history.front().pts) > impl_->rules.front().window_ms) { history.pop_front(); } } for (const auto& rule : impl_->rules) { if (rule.kind == ActionEventKind::Fall) { for (const auto& [track_id, bbox] : current_tracks) { auto it = impl_->history.find(track_id); if (it == impl_->history.end() || it->second.empty()) continue; const auto& first = it->second.front(); const float drop = CenterY(bbox) - CenterY(first.bbox); const float aspect_delta = AspectRatio(bbox) - AspectRatio(first.bbox); const uint64_t duration = frame->pts >= first.pts ? (frame->pts - first.pts) : 0; if (drop < rule.min_drop_pixels) continue; if (aspect_delta < rule.min_aspect_ratio_delta) continue; if (duration < rule.activate_duration_ms) continue; BehaviorEventItem event; event.type = ToBehaviorEventType(rule.kind); event.status = BehaviorEventStatus::Active; event.score = 1.0f; event.bbox = bbox; event.track_ids.push_back(track_id); event.start_pts = first.pts; event.last_pts = frame->pts; event.duration_ms = duration; event.source = "action_recog"; frame->behavior_events->items.push_back(std::move(event)); } } else { std::set> emitted_pairs; for (auto left = current_tracks.begin(); left != current_tracks.end(); ++left) { for (auto right = std::next(left); right != current_tracks.end(); ++right) { const float proximity = CenterDistance(left->second, right->second); if (proximity > rule.proximity_pixels) continue; const auto it_left = impl_->history.find(left->first); const auto it_right = impl_->history.find(right->first); if (it_left == impl_->history.end() || it_right == impl_->history.end()) continue; if (it_left->second.empty() || it_right->second.empty()) continue; const float left_motion = MotionDistance(left->second, it_left->second.front().bbox); const float right_motion = MotionDistance(right->second, it_right->second.front().bbox); const uint64_t duration_left = frame->pts >= it_left->second.front().pts ? (frame->pts - it_left->second.front().pts) : 0; const uint64_t duration_right = frame->pts >= it_right->second.front().pts ? (frame->pts - it_right->second.front().pts) : 0; const uint64_t duration = std::min(duration_left, duration_right); const float combined_motion = left_motion + right_motion; if (combined_motion < rule.min_motion_pixels) continue; if (duration < rule.activate_duration_ms) continue; const auto pair_key = std::make_pair(left->first, right->first); if (!emitted_pairs.insert(pair_key).second) continue; BehaviorEventItem event; event.type = ToBehaviorEventType(rule.kind); event.status = BehaviorEventStatus::Active; event.score = 1.0f; event.bbox = Rect{ std::min(left->second.x, right->second.x), std::min(left->second.y, right->second.y), std::max(left->second.x + left->second.w, right->second.x + right->second.w) - std::min(left->second.x, right->second.x), std::max(left->second.y + left->second.h, right->second.y + right->second.h) - std::min(left->second.y, right->second.y) }; event.track_ids.push_back(left->first); event.track_ids.push_back(right->first); event.start_pts = frame->pts - duration; event.last_pts = frame->pts; event.duration_ms = duration; event.source = "action_recog"; frame->behavior_events->items.push_back(std::move(event)); } } } } PushToDownstream(frame); return NodeStatus::OK; } void ActionRecogNode::EnsureBehaviorEvents(Frame& frame) { if (!frame.behavior_events) { frame.behavior_events = std::make_shared(); } } void ActionRecogNode::PushToDownstream(const FramePtr& frame) { for (auto& q : output_queues_) { if (q) q->Push(frame); } } #ifndef RK3588_TEST_BUILD REGISTER_NODE(ActionRecogNode, "action_recog"); #endif } // namespace rk3588