feat: add pose behavior pipeline

2026-04-02 17:44:36 +08:00
15 changed files with 1939 additions and 23 deletions
--- a/Readme.md
+++ b/Readme.md
@ -1266,3 +1266,40 @@ rsync -avz build-cross/ user@board_ip:/opt/media-server/
 - [docs/models.md](docs/models.md) - 模型转换与部署指南
 - [docs/Agent_API_Extensions.md](docs/Agent_API_Extensions.md) - Agent API 扩展说明
 - [docs/API_Device_RemoteMgmt_InterfaceTable.md](docs/API_Device_RemoteMgmt_InterfaceTable.md) - 远程管理接口
 ---
 ## Pose Behavior Pipeline
 The pose behavior pipeline is optional and does not replace the existing detection pipeline.
 Baseline behavior graph:
 ```text
 input_rtsp -> preprocess -> ai_yolo -> tracker -> region_event -> action_recog -> event_fusion
 ```
 Pose-enabled behavior graph:
 ```text
 input_rtsp -> preprocess -> ai_yolo -> tracker -> ai_pose -> pose_assoc -> region_event -> action_recog -> event_fusion
 ```
 Engineering rules:
 - `tracker` remains the source of stable `track_id` for the main pipeline.
 - `ai_pose` only produces `Frame.pose` and does not replace `Frame.det`.
 - `pose_assoc` is the canonical node for assigning `PoseItem.track_id`.
 - `action_recog` may use pose-enhanced rules, but it must still run when pose nodes are absent.
 Degradation semantics:
 - If `ai_pose` is not present, the graph keeps the original bbox-based behavior logic.
 - If `ai_pose` is present but inference fails on some frames, `Frame.pose` may be empty and downstream nodes must continue with bbox data.
 - If `pose_assoc` is absent, downstream nodes should treat `pose.track_id` as optional and may fall back to bbox matching only as a compatibility path.
 Sample configs:
 - `configs/sample_region_behavior_intrusion.json`
  - Baseline intrusion-only graph without pose nodes. This is the reference for the non-pose deployment path.
 - `configs/sample_region_behavior_full.json`
  - Full behavior graph with `ai_pose` and `pose_assoc`, intended for pose-aware `fall` and `fight`.
--- a/configs/sample_region_behavior_full.json
+++ b/configs/sample_region_behavior_full.json
@ -60,6 +60,24 @@
          "max_age_ms": 1200,
          "min_hits": 2
        },
        {
          "id": "pose",
          "type": "ai_pose",
          "role": "filter",
          "enable": true,
          "model_path": "./models/yolov8n-pose.rknn",
          "model_input_w": 640,
          "model_input_h": 640,
          "conf_thresh": 0.25,
          "nms_thresh": 0.45
        },
        {
          "id": "pose_assoc",
          "type": "pose_assoc",
          "role": "filter",
          "enable": true,
          "min_iou": 0.1
        },
        {
          "id": "region_evt",
          "type": "region_event",
@ -90,16 +108,38 @@
            {
              "type": "fall",
              "window_ms": 1500,
              "activate_duration_ms": 300,
              "bbox": {
                "enabled": true,
                "min_drop_pixels": 120,
-              "min_aspect_ratio_delta": 0.35,
+                "min_aspect_ratio_delta": 0.35
-              "activate_duration_ms": 300
+              },
              "pose": {
                "enabled": true,
                "min_torso_drop_pixels": 120,
                "max_upright_ratio": 0.60
              },
              "fusion": {
                "match_mode": "any"
              }
            },
            {
              "type": "fight",
              "window_ms": 1200,
              "activate_duration_ms": 200,
              "bbox": {
                "enabled": true,
                "proximity_pixels": 220,
-              "min_motion_pixels": 90,
+                "min_motion_pixels": 90
-              "activate_duration_ms": 200
+              },
              "pose": {
                "enabled": true,
                "min_wrist_motion_pixels": 120,
                "max_wrist_distance_pixels": 120
              },
              "fusion": {
                "match_mode": "any"
              }
            }
          ]
        },
@ -190,7 +230,9 @@
        ["in", "pre"],
        ["pre", "person_det"],
        ["person_det", "trk"],
-        ["trk", "region_evt"],
+        ["trk", "pose"],
        ["pose", "pose_assoc"],
        ["pose_assoc", "region_evt"],
        ["region_evt", "action_evt"],
        ["action_evt", "fusion"],
        ["fusion", "osd"],
--- a/docs/config_guide.md
+++ b/docs/config_guide.md
@ -554,3 +554,117 @@ input_rtsp
 **版本**：v2.0  
 **更新日期**：2026-03-15
 ## Pose-Enabled Behavior Configuration
 This repository now supports two behavior deployment modes.
 ### Mode 1: Baseline Behavior Graph
 Use this when you only need tracked detections and rule-based region behavior:
 ```text
 input_rtsp -> preprocess -> ai_yolo -> tracker -> region_event -> action_recog -> event_fusion
 ```
 Reference config:
 - `configs/sample_region_behavior_intrusion.json`
 Characteristics:
 - No pose model dependency
 - Lowest compute cost
 - `fall` and `fight` rely on bbox-only rules
 ### Mode 2: Pose-Enabled Behavior Graph
 Use this when you need pose-aware `fall` and `fight`:
 ```text
 input_rtsp -> preprocess -> ai_yolo -> tracker -> ai_pose -> pose_assoc -> region_event -> action_recog -> event_fusion
 ```
 Reference config:
 - `configs/sample_region_behavior_full.json`
 Characteristics:
 - `ai_pose` writes `Frame.pose`
 - `pose_assoc` assigns `PoseItem.track_id`
 - `action_recog` can fuse bbox and pose signals per event rule
 ### Optional Integration Rules
 - `ai_pose` is optional. Do not add it to graphs that only need intrusion or climb.
 - `pose_assoc` should be placed after `ai_pose` and after tracked detections are already available on the frame.
 - `action_recog` must remain runnable when `Frame.pose` is missing or empty.
 ### Degradation Semantics
 - Without `ai_pose`, the graph must still behave correctly with bbox-only behavior logic.
 - With `ai_pose` but without `pose_assoc`, `pose.track_id` is not guaranteed and downstream logic should treat it as optional compatibility mode.
 - If pose inference returns no items for a frame, the rest of the graph must keep using `Frame.det` and `track_id`.
 ### Structured `action_recog` Rules
 `action_recog` supports a structured event format with `bbox`, `pose`, and `fusion` sections.
 Example:
 ```json
 {
  "id": "action_evt",
  "type": "action_recog",
  "events": [
    {
      "type": "fall",
      "window_ms": 1500,
      "activate_duration_ms": 300,
      "bbox": {
        "enabled": true,
        "min_drop_pixels": 120,
        "min_aspect_ratio_delta": 0.35
      },
      "pose": {
        "enabled": true,
        "min_torso_drop_pixels": 120,
        "max_upright_ratio": 0.60
      },
      "fusion": {
        "match_mode": "any"
      }
    },
    {
      "type": "fight",
      "window_ms": 1200,
      "activate_duration_ms": 200,
      "bbox": {
        "enabled": true,
        "proximity_pixels": 220,
        "min_motion_pixels": 90
      },
      "pose": {
        "enabled": true,
        "min_wrist_motion_pixels": 120,
        "max_wrist_distance_pixels": 120
      },
      "fusion": {
        "match_mode": "any"
      }
    }
  ]
 }
 ```
 Rules:
 - `bbox.enabled=false` disables bbox signal evaluation for that event.
 - `pose.enabled=false` disables pose signal evaluation for that event.
 - `fusion.match_mode="any"` means either bbox or pose may activate the event.
 - `fusion.match_mode="all"` means bbox and pose must both activate the event.
 Backward compatibility:
 - Old flat keys such as `min_drop_pixels` and `pose_min_torso_drop_pixels` are still accepted.
 - New configs should prefer the structured form above.
--- a/include/frame/frame.h
+++ b/include/frame/frame.h
@ -16,6 +16,7 @@ namespace rk3588 {
 struct FaceDetResult;
 struct FaceRecogResult;
 struct PoseResult;
 enum class PixelFormat {
    NV12,
@ -82,6 +83,7 @@ struct Frame {
    std::shared_ptr<DetectionResult> det;
    std::shared_ptr<BehaviorEventResult> behavior_events;
    std::shared_ptr<PoseResult> pose;
    // Face recognition pipeline meta (kept separate from user_meta to avoid conflicts with publish).
    std::shared_ptr<FaceDetResult> face_det;
    std::shared_ptr<FaceRecogResult> face_recog;
--- a/include/pose/pose_result.h
+++ b/include/pose/pose_result.h
@ -0,0 +1,34 @@
 #pragma once
 #include <string>
 #include <vector>
 #include "frame/rect.h"
 namespace rk3588 {
 struct PosePoint2f {
    float x = 0.0f;
    float y = 0.0f;
 };
 struct PoseKeypoint {
    PosePoint2f point{};
    float score = 0.0f;
 };
 struct PoseItem {
    Rect bbox{};
    float score = 0.0f;
    int track_id = -1;
    std::vector<PoseKeypoint> keypoints;
 };
 struct PoseResult {
    std::vector<PoseItem> items;
    int img_w = 0;
    int img_h = 0;
    std::string model_name;
 };
 }  // namespace rk3588
--- a/plugins/CMakeLists.txt
+++ b/plugins/CMakeLists.txt
@ -482,6 +482,30 @@ set_target_properties(action_recog PROPERTIES
    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
 )
 # ai_pose plugin (pose inference scaffold)
 add_library(ai_pose SHARED
    ai_pose/ai_pose_node.cpp
 )
 target_include_directories(ai_pose PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
 target_link_libraries(ai_pose PRIVATE project_options Threads::Threads)
 set_target_properties(ai_pose PROPERTIES
    OUTPUT_NAME "ai_pose"
    LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
 )
 # pose_assoc plugin (assign pose.track_id from tracked detections)
 add_library(pose_assoc SHARED
    pose_assoc/pose_assoc_node.cpp
 )
 target_include_directories(pose_assoc PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
 target_link_libraries(pose_assoc PRIVATE project_options Threads::Threads)
 set_target_properties(pose_assoc PROPERTIES
    OUTPUT_NAME "pose_assoc"
    LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
 )
 # event_fusion plugin (behavior event id and lifecycle normalization)
 add_library(event_fusion SHARED
    event_fusion/event_fusion_node.cpp
@ -565,7 +589,7 @@ set_target_properties(ai_shoe_det PROPERTIES
    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
 )
-install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate region_event action_recog event_fusion storage ai_scheduler ai_shoe_det
+install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate region_event action_recog ai_pose pose_assoc event_fusion storage ai_scheduler ai_shoe_det
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
    RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
 )
--- a/plugins/action_recog/action_recog_node.cpp
+++ b/plugins/action_recog/action_recog_node.cpp
@ -4,6 +4,7 @@
 #include <cmath>
 #include <cstdint>
 #include <deque>
 #include <limits>
 #include <map>
 #include <memory>
 #include <set>
@ -12,6 +13,7 @@
 #include <vector>
 #include "behavior/behavior_event.h"
 #include "pose/pose_result.h"
 #include "utils/logger.h"
 namespace rk3588 {
@ -22,21 +24,135 @@ enum class ActionEventKind {
    Fight
 };
 enum class ActionSignalMode {
    Any,
    All
 };
 struct FallBboxRule {
    bool enabled = true;
    float min_drop_pixels = 0.0f;
    float min_aspect_ratio_delta = 0.0f;
 };
 struct FallPoseRule {
    bool enabled = false;
    float min_torso_drop_pixels = 0.0f;
    float max_upright_ratio = 0.0f;
 };
 struct FightBboxRule {
    bool enabled = true;
    float proximity_pixels = 0.0f;
    float min_motion_pixels = 0.0f;
 };
 struct FightPoseRule {
    bool enabled = false;
    float min_wrist_motion_pixels = 0.0f;
    float max_wrist_distance_pixels = 0.0f;
 };
 struct ActionEventRule {
    ActionEventKind kind = ActionEventKind::Fall;
    uint64_t window_ms = 1000;
    uint64_t activate_duration_ms = 0;
-    float min_drop_pixels = 0.0f;
+    ActionSignalMode signal_mode = ActionSignalMode::Any;
-    float min_aspect_ratio_delta = 0.0f;
+    FallBboxRule fall_bbox{};
-    float proximity_pixels = 0.0f;
+    FallPoseRule fall_pose{};
-    float min_motion_pixels = 0.0f;
+    FightBboxRule fight_bbox{};
    FightPoseRule fight_pose{};
 };
 struct PoseTrackSample {
    bool matched = false;
    float torso_center_y = 0.0f;
    float upright_ratio = 0.0f;
    float left_wrist_x = 0.0f;
    float left_wrist_y = 0.0f;
    float right_wrist_x = 0.0f;
    float right_wrist_y = 0.0f;
    bool has_left_wrist = false;
    bool has_right_wrist = false;
 };
 struct TrackSample {
    uint64_t pts = 0;
    Rect bbox{};
    PoseTrackSample pose{};
 };
 static ActionSignalMode ParseSignalMode(const std::string& value) {
    if (value == "all") return ActionSignalMode::All;
    return ActionSignalMode::Any;
 }
 static bool EvaluateSignalMode(ActionSignalMode mode, bool bbox_trigger, bool pose_trigger,
                               bool bbox_enabled, bool pose_enabled) {
    if (bbox_enabled && pose_enabled) {
        return mode == ActionSignalMode::All ? (bbox_trigger && pose_trigger) : (bbox_trigger || pose_trigger);
    }
    if (bbox_enabled) return bbox_trigger;
    if (pose_enabled) return pose_trigger;
    return false;
 }
 static bool ParseFallRule(const SimpleJson& ev, ActionEventRule& rule) {
    rule.kind = ActionEventKind::Fall;
    rule.fall_bbox.min_drop_pixels = ev.ValueOr<float>("min_drop_pixels", 0.0f);
    rule.fall_bbox.min_aspect_ratio_delta = ev.ValueOr<float>("min_aspect_ratio_delta", 0.0f);
    rule.fall_pose.min_torso_drop_pixels = ev.ValueOr<float>("pose_min_torso_drop_pixels", 0.0f);
    rule.fall_pose.max_upright_ratio = ev.ValueOr<float>("pose_max_upright_ratio", 0.0f);
    if (const SimpleJson* bbox = ev.Find("bbox"); bbox && bbox->IsObject()) {
        rule.fall_bbox.enabled = bbox->ValueOr<bool>("enabled", true);
        rule.fall_bbox.min_drop_pixels = bbox->ValueOr<float>("min_drop_pixels", rule.fall_bbox.min_drop_pixels);
        rule.fall_bbox.min_aspect_ratio_delta =
            bbox->ValueOr<float>("min_aspect_ratio_delta", rule.fall_bbox.min_aspect_ratio_delta);
    } else {
        rule.fall_bbox.enabled = true;
    }
    if (const SimpleJson* pose = ev.Find("pose"); pose && pose->IsObject()) {
        rule.fall_pose.enabled = pose->ValueOr<bool>("enabled", true);
        rule.fall_pose.min_torso_drop_pixels =
            pose->ValueOr<float>("min_torso_drop_pixels", rule.fall_pose.min_torso_drop_pixels);
        rule.fall_pose.max_upright_ratio =
            pose->ValueOr<float>("max_upright_ratio", rule.fall_pose.max_upright_ratio);
    } else {
        rule.fall_pose.enabled = rule.fall_pose.min_torso_drop_pixels > 0.0f || rule.fall_pose.max_upright_ratio > 0.0f;
    }
    return true;
 }
 static bool ParseFightRule(const SimpleJson& ev, ActionEventRule& rule) {
    rule.kind = ActionEventKind::Fight;
    rule.fight_bbox.proximity_pixels = ev.ValueOr<float>("proximity_pixels", 0.0f);
    rule.fight_bbox.min_motion_pixels = ev.ValueOr<float>("min_motion_pixels", 0.0f);
    rule.fight_pose.min_wrist_motion_pixels = ev.ValueOr<float>("pose_min_wrist_motion_pixels", 0.0f);
    rule.fight_pose.max_wrist_distance_pixels = ev.ValueOr<float>("pose_max_wrist_distance_pixels", 0.0f);
    if (const SimpleJson* bbox = ev.Find("bbox"); bbox && bbox->IsObject()) {
        rule.fight_bbox.enabled = bbox->ValueOr<bool>("enabled", true);
        rule.fight_bbox.proximity_pixels = bbox->ValueOr<float>("proximity_pixels", rule.fight_bbox.proximity_pixels);
        rule.fight_bbox.min_motion_pixels = bbox->ValueOr<float>("min_motion_pixels", rule.fight_bbox.min_motion_pixels);
    } else {
        rule.fight_bbox.enabled = true;
    }
    if (const SimpleJson* pose = ev.Find("pose"); pose && pose->IsObject()) {
        rule.fight_pose.enabled = pose->ValueOr<bool>("enabled", true);
        rule.fight_pose.min_wrist_motion_pixels =
            pose->ValueOr<float>("min_wrist_motion_pixels", rule.fight_pose.min_wrist_motion_pixels);
        rule.fight_pose.max_wrist_distance_pixels =
            pose->ValueOr<float>("max_wrist_distance_pixels", rule.fight_pose.max_wrist_distance_pixels);
    } else {
        rule.fight_pose.enabled =
            rule.fight_pose.min_wrist_motion_pixels > 0.0f || rule.fight_pose.max_wrist_distance_pixels > 0.0f;
    }
    return true;
 }
 static bool ParseRules(const SimpleJson& config, std::vector<ActionEventRule>& out, std::string& err) {
    const SimpleJson* events = config.Find("events");
    if (!events || !events->IsArray()) {
@ -55,15 +171,15 @@ static bool ParseRules(const SimpleJson& config, std::vector<ActionEventRule>& o
        const std::string type = ev.ValueOr<std::string>("type", "");
        rule.window_ms = static_cast<uint64_t>(std::max(0, ev.ValueOr<int>("window_ms", 1000)));
        rule.activate_duration_ms = static_cast<uint64_t>(std::max(0, ev.ValueOr<int>("activate_duration_ms", 0)));
        rule.signal_mode = ParseSignalMode(ev.ValueOr<std::string>("signal_mode", "any"));
        if (const SimpleJson* fusion = ev.Find("fusion"); fusion && fusion->IsObject()) {
            rule.signal_mode = ParseSignalMode(fusion->ValueOr<std::string>("match_mode", "any"));
        }
        if (type == "fall") {
-            rule.kind = ActionEventKind::Fall;
+            ParseFallRule(ev, rule);
            rule.min_drop_pixels = ev.ValueOr<float>("min_drop_pixels", 0.0f);
            rule.min_aspect_ratio_delta = ev.ValueOr<float>("min_aspect_ratio_delta", 0.0f);
        } else if (type == "fight") {
-            rule.kind = ActionEventKind::Fight;
+            ParseFightRule(ev, rule);
            rule.proximity_pixels = ev.ValueOr<float>("proximity_pixels", 0.0f);
            rule.min_motion_pixels = ev.ValueOr<float>("min_motion_pixels", 0.0f);
        } else {
            err = "unsupported event type: " + type;
            return false;
@ -98,6 +214,129 @@ static float AspectRatio(const Rect& rect) {
    return rect.h > 0.0f ? (rect.w / rect.h) : 0.0f;
 }
 static float BboxIoU(const Rect& lhs, const Rect& rhs) {
    const float x1 = std::max(lhs.x, rhs.x);
    const float y1 = std::max(lhs.y, rhs.y);
    const float x2 = std::min(lhs.x + lhs.w, rhs.x + rhs.w);
    const float y2 = std::min(lhs.y + lhs.h, rhs.y + rhs.h);
    const float iw = std::max(0.0f, x2 - x1);
    const float ih = std::max(0.0f, y2 - y1);
    const float inter = iw * ih;
    const float uni = lhs.w * lhs.h + rhs.w * rhs.h - inter;
    return uni <= 0.0f ? 0.0f : (inter / uni);
 }
 static bool IsValidKeypoint(const PoseItem& pose, int index) {
    return index >= 0 &&
           static_cast<size_t>(index) < pose.keypoints.size() &&
           pose.keypoints[static_cast<size_t>(index)].score > 0.0f;
 }
 static PoseTrackSample BuildPoseTrackSample(const PoseItem& pose) {
    PoseTrackSample sample;
    sample.matched = true;
    const bool has_left_shoulder = IsValidKeypoint(pose, 5);
    const bool has_right_shoulder = IsValidKeypoint(pose, 6);
    const bool has_left_hip = IsValidKeypoint(pose, 11);
    const bool has_right_hip = IsValidKeypoint(pose, 12);
    if (has_left_shoulder && has_right_shoulder && has_left_hip && has_right_hip) {
        const PosePoint2f& ls = pose.keypoints[5].point;
        const PosePoint2f& rs = pose.keypoints[6].point;
        const PosePoint2f& lh = pose.keypoints[11].point;
        const PosePoint2f& rh = pose.keypoints[12].point;
        const float shoulder_center_y = (ls.y + rs.y) * 0.5f;
        const float hip_center_y = (lh.y + rh.y) * 0.5f;
        sample.torso_center_y = (shoulder_center_y + hip_center_y) * 0.5f;
        const float min_x = std::min(std::min(ls.x, rs.x), std::min(lh.x, rh.x));
        const float max_x = std::max(std::max(ls.x, rs.x), std::max(lh.x, rh.x));
        const float min_y = std::min(std::min(ls.y, rs.y), std::min(lh.y, rh.y));
        const float max_y = std::max(std::max(ls.y, rs.y), std::max(lh.y, rh.y));
        const float horizontal_span = std::fabs(max_x - min_x);
        const float vertical_span = std::fabs(max_y - min_y);
        sample.upright_ratio = vertical_span > 1e-3f ? (horizontal_span / vertical_span) : std::numeric_limits<float>::infinity();
    }
    if (IsValidKeypoint(pose, 9)) {
        sample.left_wrist_x = pose.keypoints[9].point.x;
        sample.left_wrist_y = pose.keypoints[9].point.y;
        sample.has_left_wrist = true;
    }
    if (IsValidKeypoint(pose, 10)) {
        sample.right_wrist_x = pose.keypoints[10].point.x;
        sample.right_wrist_y = pose.keypoints[10].point.y;
        sample.has_right_wrist = true;
    }
    return sample;
 }
 static PoseTrackSample MatchPoseToDetection(const Rect& bbox, const std::shared_ptr<PoseResult>& pose_result) {
    if (!pose_result) return {};
    float best_iou = 0.0f;
    const PoseItem* best_pose = nullptr;
    for (const auto& pose : pose_result->items) {
        const float iou = BboxIoU(bbox, pose.bbox);
        if (iou > best_iou) {
            best_iou = iou;
            best_pose = &pose;
        }
    }
    if (!best_pose || best_iou <= 0.0f) return {};
    return BuildPoseTrackSample(*best_pose);
 }
 static PoseTrackSample MatchPoseToTrackId(int track_id, const Rect& bbox, const std::shared_ptr<PoseResult>& pose_result) {
    if (!pose_result) return {};
    const PoseItem* best_pose = nullptr;
    float best_iou = 0.0f;
    for (const auto& pose : pose_result->items) {
        if (pose.track_id != track_id) continue;
        const float iou = BboxIoU(bbox, pose.bbox);
        if (iou >= best_iou) {
            best_iou = iou;
            best_pose = &pose;
        }
    }
    if (best_pose) return BuildPoseTrackSample(*best_pose);
    return MatchPoseToDetection(bbox, pose_result);
 }
 static float WristMotion(const PoseTrackSample& newer, const PoseTrackSample& older) {
    float motion = 0.0f;
    if (newer.has_left_wrist && older.has_left_wrist) {
        const float dx = newer.left_wrist_x - older.left_wrist_x;
        const float dy = newer.left_wrist_y - older.left_wrist_y;
        motion += std::sqrt(dx * dx + dy * dy);
    }
    if (newer.has_right_wrist && older.has_right_wrist) {
        const float dx = newer.right_wrist_x - older.right_wrist_x;
        const float dy = newer.right_wrist_y - older.right_wrist_y;
        motion += std::sqrt(dx * dx + dy * dy);
    }
    return motion;
 }
 static float MinimumWristDistance(const PoseTrackSample& lhs, const PoseTrackSample& rhs) {
    float best = std::numeric_limits<float>::infinity();
    auto update = [&](bool has_a, float ax, float ay, bool has_b, float bx, float by) {
        if (!has_a || !has_b) return;
        const float dx = ax - bx;
        const float dy = ay - by;
        best = std::min(best, std::sqrt(dx * dx + dy * dy));
    };
    update(lhs.has_left_wrist, lhs.left_wrist_x, lhs.left_wrist_y,
           rhs.has_left_wrist, rhs.left_wrist_x, rhs.left_wrist_y);
    update(lhs.has_left_wrist, lhs.left_wrist_x, lhs.left_wrist_y,
           rhs.has_right_wrist, rhs.right_wrist_x, rhs.right_wrist_y);
    update(lhs.has_right_wrist, lhs.right_wrist_x, lhs.right_wrist_y,
           rhs.has_left_wrist, rhs.left_wrist_x, rhs.left_wrist_y);
    update(lhs.has_right_wrist, lhs.right_wrist_x, lhs.right_wrist_y,
           rhs.has_right_wrist, rhs.right_wrist_x, rhs.right_wrist_y);
    return std::isfinite(best) ? best : std::numeric_limits<float>::infinity();
 }
 static BehaviorEventType ToBehaviorEventType(ActionEventKind kind) {
    return kind == ActionEventKind::Fall ? BehaviorEventType::Fall : BehaviorEventType::Fight;
 }
@ -152,7 +391,7 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
        if (det.track_id < 0) continue;
        current_tracks[det.track_id] = det.bbox;
        auto& history = impl_->history[det.track_id];
-        history.push_back(TrackSample{frame->pts, det.bbox});
+        history.push_back(TrackSample{frame->pts, det.bbox, MatchPoseToTrackId(det.track_id, det.bbox, frame->pose)});
        while (!history.empty() && frame->pts > history.front().pts &&
               (frame->pts - history.front().pts) > impl_->rules.front().window_ms) {
            history.pop_front();
@ -168,8 +407,18 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
                const float drop = CenterY(bbox) - CenterY(first.bbox);
                const float aspect_delta = AspectRatio(bbox) - AspectRatio(first.bbox);
                const uint64_t duration = frame->pts >= first.pts ? (frame->pts - first.pts) : 0;
-                if (drop < rule.min_drop_pixels) continue;
+                const bool bbox_enabled = rule.fall_bbox.enabled;
-                if (aspect_delta < rule.min_aspect_ratio_delta) continue;
+                const bool pose_enabled = rule.fall_pose.enabled;
                const bool bbox_trigger = drop >= rule.fall_bbox.min_drop_pixels &&
                                          aspect_delta >= rule.fall_bbox.min_aspect_ratio_delta;
                bool pose_trigger = false;
                if (pose_enabled &&
                    first.pose.matched && it->second.back().pose.matched) {
                    const float torso_drop = it->second.back().pose.torso_center_y - first.pose.torso_center_y;
                    pose_trigger = torso_drop >= rule.fall_pose.min_torso_drop_pixels &&
                                   it->second.back().pose.upright_ratio >= rule.fall_pose.max_upright_ratio;
                }
                if (!EvaluateSignalMode(rule.signal_mode, bbox_trigger, pose_trigger, bbox_enabled, pose_enabled)) continue;
                if (duration < rule.activate_duration_ms) continue;
                BehaviorEventItem event;
@ -189,7 +438,7 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
            for (auto left = current_tracks.begin(); left != current_tracks.end(); ++left) {
                for (auto right = std::next(left); right != current_tracks.end(); ++right) {
                    const float proximity = CenterDistance(left->second, right->second);
-                    if (proximity > rule.proximity_pixels) continue;
+                    if (proximity > rule.fight_bbox.proximity_pixels) continue;
                    const auto it_left = impl_->history.find(left->first);
                    const auto it_right = impl_->history.find(right->first);
@ -203,7 +452,22 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
                    const uint64_t duration = std::min(duration_left, duration_right);
                    const float combined_motion = left_motion + right_motion;
-                    if (combined_motion < rule.min_motion_pixels) continue;
+                    const bool bbox_enabled = rule.fight_bbox.enabled;
                    const bool pose_enabled = rule.fight_pose.enabled;
                    const bool bbox_trigger = combined_motion >= rule.fight_bbox.min_motion_pixels;
                    bool pose_trigger = false;
                    const auto& left_pose_now = it_left->second.back().pose;
                    const auto& right_pose_now = it_right->second.back().pose;
                    if (pose_enabled &&
                        it_left->second.front().pose.matched && it_right->second.front().pose.matched &&
                        left_pose_now.matched && right_pose_now.matched) {
                        const float pose_motion = WristMotion(left_pose_now, it_left->second.front().pose) +
                                                  WristMotion(right_pose_now, it_right->second.front().pose);
                        const float wrist_distance = MinimumWristDistance(left_pose_now, right_pose_now);
                        pose_trigger = pose_motion >= rule.fight_pose.min_wrist_motion_pixels &&
                                       wrist_distance <= rule.fight_pose.max_wrist_distance_pixels;
                    }
                    if (!EvaluateSignalMode(rule.signal_mode, bbox_trigger, pose_trigger, bbox_enabled, pose_enabled)) continue;
                    if (duration < rule.activate_duration_ms) continue;
                    const auto pair_key = std::make_pair(left->first, right->first);
--- a/plugins/ai_pose/ai_pose_node.cpp
+++ b/plugins/ai_pose/ai_pose_node.cpp
@ -0,0 +1,466 @@
 #include "ai_pose_node.h"
 #include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <cstring>
 #include <limits>
 #include <memory>
 #include <set>
 #include <string>
 #include <utility>
 #include <vector>
 #include "face/face_detection_utils.h"
 #include "hw/i_infer_backend.h"
 #include "pose/pose_result.h"
 #include "utils/logger.h"
 namespace rk3588 {
 namespace {
 using face_detection::HalfToFloat;
 using face_detection::ResizeRgbBilinear;
 constexpr int kPoseOutputCount = 4;
 constexpr int kPoseLocChannels = 64;
 constexpr int kPoseScoreChannel = 64;
 constexpr int kDflBins = 16;
 constexpr int kMaxPoseDetections = 32;
 constexpr uint8_t kLetterboxValue = 114;
 struct PoseConfig {
    bool enabled = true;
    std::string model_path;
    int model_input_w = 640;
    int model_input_h = 640;
    int expected_keypoints = 17;
    float conf_thresh = 0.25f;
    float nms_thresh = 0.45f;
 };
 struct LetterboxInfo {
    float scale = 1.0f;
    float pad_x = 0.0f;
    float pad_y = 0.0f;
 };
 struct PoseCandidate {
    Rect bbox{};
    float score = 0.0f;
    int keypoint_index = -1;
 };
 bool ParseConfig(const SimpleJson& config, PoseConfig& out, std::string& err) {
    out.enabled = config.ValueOr<bool>("enabled", true);
    out.model_path = config.ValueOr<std::string>("model_path", "");
    out.model_input_w = config.ValueOr<int>("model_input_w", 640);
    out.model_input_h = config.ValueOr<int>("model_input_h", 640);
    out.expected_keypoints = config.ValueOr<int>("expected_keypoints", 17);
    out.conf_thresh = config.ValueOr<float>("conf_thresh", 0.25f);
    out.nms_thresh = config.ValueOr<float>("nms_thresh", 0.45f);
    if (out.model_input_w <= 0 || out.model_input_h <= 0) {
        err = "model_input_w and model_input_h must be positive";
        return false;
    }
    if (out.expected_keypoints <= 0) {
        err = "expected_keypoints must be positive";
        return false;
    }
    if (out.conf_thresh <= 0.0f || out.conf_thresh >= 1.0f) {
        err = "conf_thresh must be in (0, 1)";
        return false;
    }
    if (out.nms_thresh <= 0.0f || out.nms_thresh >= 1.0f) {
        err = "nms_thresh must be in (0, 1)";
        return false;
    }
    if (out.enabled && out.model_path.empty()) {
        err = "model_path is required when enabled=true";
        return false;
    }
    return true;
 }
 void PushToDownstream(const std::vector<std::shared_ptr<SpscQueue<FramePtr>>>& output_queues, const FramePtr& frame) {
    for (const auto& q : output_queues) {
        if (q) q->Push(frame);
    }
 }
 inline float Sigmoid(float x) {
    return 1.0f / (1.0f + std::exp(-x));
 }
 float Unsigmoid(float y) {
    return -std::log((1.0f / y) - 1.0f);
 }
 float ClampFloat(float v, float lo, float hi) {
    return std::max(lo, std::min(v, hi));
 }
 float CalculateIoU(const Rect& lhs, const Rect& rhs) {
    const float x1 = std::max(lhs.x, rhs.x);
    const float y1 = std::max(lhs.y, rhs.y);
    const float x2 = std::min(lhs.x + lhs.w, rhs.x + rhs.w);
    const float y2 = std::min(lhs.y + lhs.h, rhs.y + rhs.h);
    const float inter_w = std::max(0.0f, x2 - x1);
    const float inter_h = std::max(0.0f, y2 - y1);
    const float inter = inter_w * inter_h;
    const float union_area = lhs.w * lhs.h + rhs.w * rhs.h - inter;
    return union_area <= 0.0f ? 0.0f : (inter / union_area);
 }
 float ReadTensorValue(const InferOutput& output, size_t index) {
 #if defined(RK3588_ENABLE_RKNN)
    switch (output.type) {
        case RKNN_TENSOR_FLOAT32: {
            const auto* p = reinterpret_cast<const float*>(output.data.data());
            return p[index];
        }
        case RKNN_TENSOR_FLOAT16: {
            const auto* p = reinterpret_cast<const uint16_t*>(output.data.data());
            return HalfToFloat(p[index]);
        }
        case RKNN_TENSOR_INT8: {
            const auto* p = reinterpret_cast<const int8_t*>(output.data.data());
            return (static_cast<float>(p[index]) - static_cast<float>(output.zp)) * output.scale;
        }
        case RKNN_TENSOR_UINT8: {
            const auto* p = reinterpret_cast<const uint8_t*>(output.data.data());
            return (static_cast<float>(p[index]) - static_cast<float>(output.zp)) * output.scale;
        }
        default:
            break;
    }
 #endif
    const auto* p = reinterpret_cast<const float*>(output.data.data());
    return p[index];
 }
 bool BuildLetterboxedRgbInput(const Frame& frame, int dst_w, int dst_h,
                              std::vector<uint8_t>& out, LetterboxInfo& letterbox) {
    if (frame.width <= 0 || frame.height <= 0 || dst_w <= 0 || dst_h <= 0) return false;
    if (frame.format != PixelFormat::RGB && frame.format != PixelFormat::BGR) return false;
    const uint8_t* src = frame.planes[0].data ? frame.planes[0].data : frame.data;
    if (!src) return false;
    const int src_stride = frame.planes[0].stride > 0 ? frame.planes[0].stride : frame.width * 3;
    out.assign(static_cast<size_t>(dst_w) * static_cast<size_t>(dst_h) * 3, kLetterboxValue);
    const float scale = std::min(static_cast<float>(dst_w) / static_cast<float>(frame.width),
                                 static_cast<float>(dst_h) / static_cast<float>(frame.height));
    const int resized_w = std::max(1, static_cast<int>(std::round(frame.width * scale)));
    const int resized_h = std::max(1, static_cast<int>(std::round(frame.height * scale)));
    const int pad_x = (dst_w - resized_w) / 2;
    const int pad_y = (dst_h - resized_h) / 2;
    std::vector<uint8_t> resized(static_cast<size_t>(resized_w) * static_cast<size_t>(resized_h) * 3);
    const bool swap_rb = (frame.format == PixelFormat::BGR);
    ResizeRgbBilinear(src, frame.width, frame.height, src_stride, resized.data(), resized_w, resized_h, swap_rb);
    for (int y = 0; y < resized_h; ++y) {
        uint8_t* dst_row = out.data() + (static_cast<size_t>(y + pad_y) * static_cast<size_t>(dst_w) + pad_x) * 3;
        const uint8_t* src_row = resized.data() + static_cast<size_t>(y) * static_cast<size_t>(resized_w) * 3;
        std::memcpy(dst_row, src_row, static_cast<size_t>(resized_w) * 3);
    }
    letterbox.scale = scale;
    letterbox.pad_x = static_cast<float>(pad_x);
    letterbox.pad_y = static_cast<float>(pad_y);
    return true;
 }
 float DecodeDfl(const InferOutput& output, int cell_index, int cells_per_head, int channel_group) {
    float max_value = -std::numeric_limits<float>::infinity();
    float logits[kDflBins]{};
    for (int i = 0; i < kDflBins; ++i) {
        const size_t offset = static_cast<size_t>((channel_group * kDflBins + i) * cells_per_head + cell_index);
        logits[i] = ReadTensorValue(output, offset);
        max_value = std::max(max_value, logits[i]);
    }
    float denom = 0.0f;
    float numer = 0.0f;
    for (int i = 0; i < kDflBins; ++i) {
        const float e = std::exp(logits[i] - max_value);
        denom += e;
        numer += e * static_cast<float>(i);
    }
    return denom > 0.0f ? (numer / denom) : 0.0f;
 }
 std::vector<PoseCandidate> DecodePoseCandidates(const std::vector<InferOutput>& outputs,
                                                const PoseConfig& config) {
    std::vector<PoseCandidate> candidates;
    if (outputs.size() < kPoseOutputCount) return candidates;
    const float score_threshold = Unsigmoid(config.conf_thresh);
    int keypoint_base_index = 0;
    for (int head = 0; head < 3; ++head) {
        const int stride = (head == 0) ? 8 : (head == 1 ? 16 : 32);
        const int grid_h = config.model_input_h / stride;
        const int grid_w = config.model_input_w / stride;
        const int cells = grid_h * grid_w;
        if (cells <= 0) continue;
        const InferOutput& output = outputs[static_cast<size_t>(head)];
        const size_t elem_count = sizeof(float) > 0 ? (output.data.size() / sizeof(float)) : 0;
 #if defined(RK3588_ENABLE_RKNN)
        size_t bytes_per_elem = 1;
        switch (output.type) {
            case RKNN_TENSOR_FLOAT32: bytes_per_elem = sizeof(float); break;
            case RKNN_TENSOR_FLOAT16: bytes_per_elem = sizeof(uint16_t); break;
            default: bytes_per_elem = 1; break;
        }
        const size_t actual_elem_count = bytes_per_elem > 0 ? (output.data.size() / bytes_per_elem) : 0;
 #else
        const size_t actual_elem_count = elem_count;
 #endif
        if (actual_elem_count < static_cast<size_t>(65 * cells)) {
            keypoint_base_index += cells;
            continue;
        }
        for (int cell = 0; cell < cells; ++cell) {
            const size_t score_offset = static_cast<size_t>(kPoseScoreChannel * cells + cell);
            const float score_logit = ReadTensorValue(output, score_offset);
            if (score_logit < score_threshold) continue;
            const float left = DecodeDfl(output, cell, cells, 0);
            const float top = DecodeDfl(output, cell, cells, 1);
            const float right = DecodeDfl(output, cell, cells, 2);
            const float bottom = DecodeDfl(output, cell, cells, 3);
            const int grid_x = cell % grid_w;
            const int grid_y = cell / grid_w;
            const float center_x = (static_cast<float>(grid_x) + 0.5f) * stride;
            const float center_y = (static_cast<float>(grid_y) + 0.5f) * stride;
            const float x1 = center_x - left * stride;
            const float y1 = center_y - top * stride;
            const float x2 = center_x + right * stride;
            const float y2 = center_y + bottom * stride;
            PoseCandidate candidate;
            candidate.bbox.x = x1;
            candidate.bbox.y = y1;
            candidate.bbox.w = std::max(0.0f, x2 - x1);
            candidate.bbox.h = std::max(0.0f, y2 - y1);
            candidate.score = Sigmoid(score_logit);
            candidate.keypoint_index = keypoint_base_index + cell;
            candidates.push_back(candidate);
        }
        keypoint_base_index += cells;
    }
    std::sort(candidates.begin(), candidates.end(), [](const PoseCandidate& lhs, const PoseCandidate& rhs) {
        return lhs.score > rhs.score;
    });
    std::vector<PoseCandidate> kept;
    kept.reserve(candidates.size());
    for (const auto& candidate : candidates) {
        bool suppressed = false;
        for (const auto& accepted : kept) {
            if (CalculateIoU(candidate.bbox, accepted.bbox) > config.nms_thresh) {
                suppressed = true;
                break;
            }
        }
        if (!suppressed) {
            kept.push_back(candidate);
            if (static_cast<int>(kept.size()) >= kMaxPoseDetections) break;
        }
    }
    return kept;
 }
 float MapToSourceCoord(float value, float pad, float scale, int limit) {
    if (scale <= 0.0f) return 0.0f;
    return ClampFloat((value - pad) / scale, 0.0f, static_cast<float>(std::max(0, limit)));
 }
 std::shared_ptr<PoseResult> BuildPoseResult(const std::vector<InferOutput>& outputs,
                                            const PoseConfig& config,
                                            int frame_w, int frame_h,
                                            const LetterboxInfo& letterbox) {
    if (outputs.size() < kPoseOutputCount) return nullptr;
    const InferOutput& keypoint_output = outputs[3];
    const int total_points = (config.model_input_w / 8) * (config.model_input_h / 8) +
                             (config.model_input_w / 16) * (config.model_input_h / 16) +
                             (config.model_input_w / 32) * (config.model_input_h / 32);
    const int values_per_point = config.expected_keypoints * 3;
 #if defined(RK3588_ENABLE_RKNN)
    size_t kp_bytes_per_elem = 1;
    switch (keypoint_output.type) {
        case RKNN_TENSOR_FLOAT32: kp_bytes_per_elem = sizeof(float); break;
        case RKNN_TENSOR_FLOAT16: kp_bytes_per_elem = sizeof(uint16_t); break;
        default: kp_bytes_per_elem = 1; break;
    }
 #else
    const size_t kp_bytes_per_elem = sizeof(float);
 #endif
    const size_t kp_elem_count = kp_bytes_per_elem > 0 ? (keypoint_output.data.size() / kp_bytes_per_elem) : 0;
    if (kp_elem_count < static_cast<size_t>(values_per_point * total_points)) return nullptr;
    auto result = std::make_shared<PoseResult>();
    result->img_w = frame_w;
    result->img_h = frame_h;
    result->model_name = "yolov8n-pose";
    const std::vector<PoseCandidate> candidates = DecodePoseCandidates(outputs, config);
    result->items.reserve(candidates.size());
    for (const auto& candidate : candidates) {
        if (candidate.keypoint_index < 0 || candidate.keypoint_index >= total_points) continue;
        PoseItem item;
        item.score = candidate.score;
        item.track_id = -1;
        item.bbox.x = MapToSourceCoord(candidate.bbox.x, letterbox.pad_x, letterbox.scale, frame_w);
        item.bbox.y = MapToSourceCoord(candidate.bbox.y, letterbox.pad_y, letterbox.scale, frame_h);
        const float x2 = MapToSourceCoord(candidate.bbox.x + candidate.bbox.w, letterbox.pad_x, letterbox.scale, frame_w);
        const float y2 = MapToSourceCoord(candidate.bbox.y + candidate.bbox.h, letterbox.pad_y, letterbox.scale, frame_h);
        item.bbox.w = std::max(0.0f, x2 - item.bbox.x);
        item.bbox.h = std::max(0.0f, y2 - item.bbox.y);
        item.keypoints.reserve(static_cast<size_t>(config.expected_keypoints));
        for (int k = 0; k < config.expected_keypoints; ++k) {
            const size_t x_offset = static_cast<size_t>(k * 3 * total_points + candidate.keypoint_index);
            const size_t y_offset = static_cast<size_t>(k * 3 * total_points + total_points + candidate.keypoint_index);
            const size_t s_offset = static_cast<size_t>(k * 3 * total_points + 2 * total_points + candidate.keypoint_index);
            PoseKeypoint kp;
            kp.point.x = MapToSourceCoord(ReadTensorValue(keypoint_output, x_offset), letterbox.pad_x, letterbox.scale, frame_w);
            kp.point.y = MapToSourceCoord(ReadTensorValue(keypoint_output, y_offset), letterbox.pad_y, letterbox.scale, frame_h);
            kp.score = ReadTensorValue(keypoint_output, s_offset);
            item.keypoints.push_back(kp);
        }
        result->items.push_back(std::move(item));
    }
    return result;
 }
 }  // namespace
 struct AiPoseNode::Impl {
    PoseConfig config;
    std::string init_err;
    std::shared_ptr<IInferBackend> infer_backend;
    ModelHandle model_handle = kInvalidModelHandle;
    bool started = false;
    std::vector<uint8_t> input_rgb;
 };
 AiPoseNode::AiPoseNode() : impl_(std::make_unique<Impl>()) {}
 AiPoseNode::~AiPoseNode() = default;
 std::string AiPoseNode::Id() const {
    return id_;
 }
 std::string AiPoseNode::Type() const {
    return "ai_pose";
 }
 bool AiPoseNode::Init(const SimpleJson& config, const NodeContext& ctx) {
    id_ = config.ValueOr<std::string>("id", "ai_pose");
    if (!ParseConfig(config, impl_->config, impl_->init_err)) {
        LogError("[ai_pose] invalid config: " + impl_->init_err);
        return false;
    }
    impl_->infer_backend = ctx.infer_backend;
    output_queues_ = ctx.output_queues;
    return true;
 }
 bool AiPoseNode::Start() {
    if (!impl_->config.enabled) {
        impl_->started = true;
        return true;
    }
    if (!impl_->infer_backend) {
        LogError("[ai_pose] infer_backend is required when enabled=true");
        return false;
    }
    std::string err;
    impl_->model_handle = impl_->infer_backend->LoadModel(impl_->config.model_path, err);
    if (impl_->model_handle == kInvalidModelHandle) {
        LogError("[ai_pose] failed to load model: " + impl_->config.model_path + " err=" + err);
        return false;
    }
    impl_->started = true;
    return true;
 }
 void AiPoseNode::Stop() {
    if (impl_->infer_backend && impl_->model_handle != kInvalidModelHandle) {
        impl_->infer_backend->UnloadModel(impl_->model_handle);
        impl_->model_handle = kInvalidModelHandle;
    }
    impl_->started = false;
 }
 NodeStatus AiPoseNode::Process(FramePtr frame) {
    if (!frame) return NodeStatus::DROP;
    if (!impl_->config.enabled) {
        PushToDownstream(output_queues_, frame);
        return NodeStatus::OK;
    }
    if (!impl_->started || !impl_->infer_backend || impl_->model_handle == kInvalidModelHandle) {
        LogWarn("[ai_pose] process called before node is started");
        PushToDownstream(output_queues_, frame);
        return NodeStatus::ERROR;
    }
    LetterboxInfo letterbox;
    if (!BuildLetterboxedRgbInput(*frame, impl_->config.model_input_w, impl_->config.model_input_h,
                                  impl_->input_rgb, letterbox)) {
        LogWarn("[ai_pose] only RGB/BGR frames are supported currently");
        PushToDownstream(output_queues_, frame);
        return NodeStatus::OK;
    }
    InferInput input;
    input.data = impl_->input_rgb.data();
    input.size = impl_->input_rgb.size();
    input.width = impl_->config.model_input_w;
    input.height = impl_->config.model_input_h;
    input.is_nhwc = true;
    input.dma_fd = -1;
    input.dma_offset = 0;
    InferResult infer_result = impl_->infer_backend->Infer(impl_->model_handle, input);
    if (!infer_result.success) {
        LogWarn("[ai_pose] inference failed: " + infer_result.error);
        PushToDownstream(output_queues_, frame);
        return NodeStatus::OK;
    }
    frame->pose = BuildPoseResult(infer_result.outputs, impl_->config, frame->width, frame->height, letterbox);
    if (!frame->pose) {
        frame->pose = std::make_shared<PoseResult>();
        frame->pose->img_w = frame->width;
        frame->pose->img_h = frame->height;
        frame->pose->model_name = "yolov8n-pose";
    }
    PushToDownstream(output_queues_, frame);
    return NodeStatus::OK;
 }
 #ifndef RK3588_TEST_BUILD
 REGISTER_NODE(AiPoseNode, "ai_pose");
 #endif
 }  // namespace rk3588
--- a/plugins/ai_pose/ai_pose_node.h
+++ b/plugins/ai_pose/ai_pose_node.h
@ -0,0 +1,30 @@
 #pragma once
 #include <memory>
 #include <string>
 #include <vector>
 #include "node.h"
 namespace rk3588 {
 class AiPoseNode final : public INode {
 public:
    AiPoseNode();
    ~AiPoseNode() override;
    std::string Id() const override;
    std::string Type() const override;
    bool Init(const SimpleJson& config, const NodeContext& ctx) override;
    bool Start() override;
    void Stop() override;
    NodeStatus Process(FramePtr frame) override;
 private:
    struct Impl;
    std::unique_ptr<Impl> impl_;
    std::string id_;
    std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
 };
 }  // namespace rk3588
--- a/plugins/pose_assoc/pose_assoc_node.cpp
+++ b/plugins/pose_assoc/pose_assoc_node.cpp
@ -0,0 +1,139 @@
 #include "pose_assoc_node.h"
 #include <algorithm>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 #include "pose/pose_result.h"
 #include "utils/logger.h"
 namespace rk3588 {
 namespace {
 struct PoseAssocConfig {
    float min_iou = 0.1f;
 };
 struct MatchCandidate {
    float iou = 0.0f;
    int pose_index = -1;
    int det_index = -1;
 };
 bool ParseConfig(const SimpleJson& config, PoseAssocConfig& out, std::string& err) {
    out.min_iou = config.ValueOr<float>("min_iou", 0.1f);
    if (out.min_iou < 0.0f || out.min_iou > 1.0f) {
        err = "min_iou must be in [0, 1]";
        return false;
    }
    return true;
 }
 float IoU(const Rect& lhs, const Rect& rhs) {
    const float x1 = std::max(lhs.x, rhs.x);
    const float y1 = std::max(lhs.y, rhs.y);
    const float x2 = std::min(lhs.x + lhs.w, rhs.x + rhs.w);
    const float y2 = std::min(lhs.y + lhs.h, rhs.y + rhs.h);
    const float iw = std::max(0.0f, x2 - x1);
    const float ih = std::max(0.0f, y2 - y1);
    const float inter = iw * ih;
    const float area_l = std::max(0.0f, lhs.w) * std::max(0.0f, lhs.h);
    const float area_r = std::max(0.0f, rhs.w) * std::max(0.0f, rhs.h);
    const float uni = area_l + area_r - inter;
    return uni <= 0.0f ? 0.0f : (inter / uni);
 }
 void PushToDownstream(const std::vector<std::shared_ptr<SpscQueue<FramePtr>>>& output_queues, const FramePtr& frame) {
    for (const auto& q : output_queues) {
        if (q) q->Push(frame);
    }
 }
 }  // namespace
 struct PoseAssocNode::Impl {
    PoseAssocConfig config;
    std::string init_err;
 };
 PoseAssocNode::PoseAssocNode() : impl_(std::make_unique<Impl>()) {}
 PoseAssocNode::~PoseAssocNode() = default;
 std::string PoseAssocNode::Id() const {
    return id_;
 }
 std::string PoseAssocNode::Type() const {
    return "pose_assoc";
 }
 bool PoseAssocNode::Init(const SimpleJson& config, const NodeContext& ctx) {
    id_ = config.ValueOr<std::string>("id", "pose_assoc");
    if (!ParseConfig(config, impl_->config, impl_->init_err)) {
        LogError("[pose_assoc] invalid config: " + impl_->init_err);
        return false;
    }
    output_queues_ = ctx.output_queues;
    return true;
 }
 bool PoseAssocNode::Start() {
    return true;
 }
 void PoseAssocNode::Stop() {}
 NodeStatus PoseAssocNode::Process(FramePtr frame) {
    if (!frame) return NodeStatus::DROP;
    if (frame->pose && frame->det) {
        for (auto& pose : frame->pose->items) {
            pose.track_id = -1;
        }
        std::vector<MatchCandidate> candidates;
        candidates.reserve(frame->pose->items.size() * frame->det->items.size());
        for (size_t pi = 0; pi < frame->pose->items.size(); ++pi) {
            const Rect& pose_bbox = frame->pose->items[pi].bbox;
            for (size_t di = 0; di < frame->det->items.size(); ++di) {
                const auto& det = frame->det->items[di];
                if (det.track_id < 0) continue;
                const float iou = IoU(pose_bbox, det.bbox);
                if (iou >= impl_->config.min_iou) {
                    candidates.push_back(MatchCandidate{iou, static_cast<int>(pi), static_cast<int>(di)});
                }
            }
        }
        std::sort(candidates.begin(), candidates.end(), [](const MatchCandidate& lhs, const MatchCandidate& rhs) {
            if (lhs.iou != rhs.iou) return lhs.iou > rhs.iou;
            if (lhs.pose_index != rhs.pose_index) return lhs.pose_index < rhs.pose_index;
            return lhs.det_index < rhs.det_index;
        });
        std::vector<bool> used_pose(frame->pose->items.size(), false);
        std::vector<bool> used_det(frame->det->items.size(), false);
        for (const auto& candidate : candidates) {
            if (used_pose[static_cast<size_t>(candidate.pose_index)] ||
                used_det[static_cast<size_t>(candidate.det_index)]) {
                continue;
            }
            frame->pose->items[static_cast<size_t>(candidate.pose_index)].track_id =
                frame->det->items[static_cast<size_t>(candidate.det_index)].track_id;
            used_pose[static_cast<size_t>(candidate.pose_index)] = true;
            used_det[static_cast<size_t>(candidate.det_index)] = true;
        }
    }
    PushToDownstream(output_queues_, frame);
    return NodeStatus::OK;
 }
 #ifndef RK3588_TEST_BUILD
 REGISTER_NODE(PoseAssocNode, "pose_assoc");
 #endif
 }  // namespace rk3588
--- a/plugins/pose_assoc/pose_assoc_node.h
+++ b/plugins/pose_assoc/pose_assoc_node.h
@ -0,0 +1,30 @@
 #pragma once
 #include <memory>
 #include <string>
 #include <vector>
 #include "node.h"
 namespace rk3588 {
 class PoseAssocNode final : public INode {
 public:
    PoseAssocNode();
    ~PoseAssocNode() override;
    std::string Id() const override;
    std::string Type() const override;
    bool Init(const SimpleJson& config, const NodeContext& ctx) override;
    bool Start() override;
    void Stop() override;
    NodeStatus Process(FramePtr frame) override;
 private:
    struct Impl;
    std::unique_ptr<Impl> impl_;
    std::string id_;
    std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
 };
 }  // namespace rk3588
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -40,6 +40,8 @@ add_executable(rk3588_gtests
    test_behavior_event_model.cpp
    test_region_event.cpp
    test_action_recog.cpp
    test_ai_pose.cpp
    test_pose_assoc.cpp
    test_event_fusion.cpp
    test_alarm_behavior_events.cpp
    test_infer_backend.cpp
@ -53,6 +55,8 @@ add_executable(rk3588_gtests
    ${CMAKE_SOURCE_DIR}/src/utils/dma_alloc.cpp
    ${CMAKE_SOURCE_DIR}/plugins/region_event/region_event_node.cpp
    ${CMAKE_SOURCE_DIR}/plugins/action_recog/action_recog_node.cpp
    ${CMAKE_SOURCE_DIR}/plugins/ai_pose/ai_pose_node.cpp
    ${CMAKE_SOURCE_DIR}/plugins/pose_assoc/pose_assoc_node.cpp
    ${CMAKE_SOURCE_DIR}/plugins/event_fusion/event_fusion_node.cpp
    ${CMAKE_SOURCE_DIR}/plugins/alarm/rule_engine.cpp
 )
--- a/tests/test_action_recog.cpp
+++ b/tests/test_action_recog.cpp
@ -5,6 +5,7 @@
 #include "frame/frame.h"
 #include "node.h"
 #include "pose/pose_result.h"
 #include "utils/simple_json.h"
 #include "../plugins/action_recog/action_recog_node.h"
@ -124,5 +125,385 @@ TEST(ActionRecogTest, EmitsFightWhenTwoTracksStayCloseWithRepeatedMotion) {
    EXPECT_EQ(frame2->behavior_events->items[0].track_ids[1], 32);
 }
 TEST(ActionRecogTest, EmitsFallFromPoseSequenceWhenBboxShapeChangeIsSmall) {
    ActionRecogNode node;
    const std::string config_text = R"({
        "id": "action_evt",
        "events": [
            {
                "type": "fall",
                "window_ms": 1500,
                "min_drop_pixels": 120,
                "min_aspect_ratio_delta": 10.0,
                "activate_duration_ms": 0,
                "pose_min_torso_drop_pixels": 120,
                "pose_max_upright_ratio": 0.6
            }
        ]
    })";
    SimpleJson config = ParseActionConfig(config_text);
    NodeContext ctx;
    auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
    ctx.output_queues.push_back(out);
    ASSERT_TRUE(node.Init(config, ctx));
    ASSERT_TRUE(node.Start());
    auto frame1 = std::make_shared<Frame>();
    frame1->width = 1920;
    frame1->height = 1080;
    frame1->pts = 1000;
    frame1->det = std::make_shared<DetectionResult>();
    frame1->det->img_w = 1920;
    frame1->det->img_h = 1080;
    frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
    frame1->pose = std::make_shared<PoseResult>();
    frame1->pose->img_w = 1920;
    frame1->pose->img_h = 1080;
    PoseItem pose1;
    pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
    pose1.score = 0.9f;
    pose1.keypoints.resize(17);
    pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
    pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
    pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
    pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
    frame1->pose->items.push_back(pose1);
    auto frame2 = std::make_shared<Frame>();
    frame2->width = 1920;
    frame2->height = 1080;
    frame2->pts = 1600;
    frame2->det = std::make_shared<DetectionResult>();
    frame2->det->img_w = 1920;
    frame2->det->img_h = 1080;
    frame2->det->items.push_back(Detection{0, 0.93f, Rect{780.0f, 420.0f, 200.0f, 280.0f}, 17});
    frame2->pose = std::make_shared<PoseResult>();
    frame2->pose->img_w = 1920;
    frame2->pose->img_h = 1080;
    PoseItem pose2;
    pose2.bbox = Rect{780.0f, 420.0f, 200.0f, 280.0f};
    pose2.score = 0.91f;
    pose2.keypoints.resize(17);
    pose2.keypoints[5] = PoseKeypoint{PosePoint2f{790.0f, 520.0f}, 0.9f};
    pose2.keypoints[6] = PoseKeypoint{PosePoint2f{970.0f, 520.0f}, 0.9f};
    pose2.keypoints[11] = PoseKeypoint{PosePoint2f{820.0f, 590.0f}, 0.9f};
    pose2.keypoints[12] = PoseKeypoint{PosePoint2f{940.0f, 590.0f}, 0.9f};
    frame2->pose->items.push_back(pose2);
    EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
    EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
    ASSERT_NE(frame2->behavior_events, nullptr);
    ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
    EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fall);
    ASSERT_EQ(frame2->behavior_events->items[0].track_ids.size(), 1u);
    EXPECT_EQ(frame2->behavior_events->items[0].track_ids[0], 17);
 }
 TEST(ActionRecogTest, EmitsFightFromPoseMotionWhenBboxMotionIsSmall) {
    ActionRecogNode node;
    const std::string config_text = R"({
        "id": "action_evt",
        "events": [
            {
                "type": "fight",
                "window_ms": 1200,
                "proximity_pixels": 220,
                "min_motion_pixels": 1000,
                "activate_duration_ms": 0,
                "pose_min_wrist_motion_pixels": 120,
                "pose_max_wrist_distance_pixels": 120
            }
        ]
    })";
    SimpleJson config = ParseActionConfig(config_text);
    NodeContext ctx;
    auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
    ctx.output_queues.push_back(out);
    ASSERT_TRUE(node.Init(config, ctx));
    ASSERT_TRUE(node.Start());
    auto frame1 = std::make_shared<Frame>();
    frame1->width = 1920;
    frame1->height = 1080;
    frame1->pts = 1000;
    frame1->det = std::make_shared<DetectionResult>();
    frame1->det->img_w = 1920;
    frame1->det->img_h = 1080;
    frame1->det->items.push_back(Detection{0, 0.90f, Rect{700.0f, 320.0f, 120.0f, 300.0f}, 31});
    frame1->det->items.push_back(Detection{0, 0.89f, Rect{860.0f, 320.0f, 120.0f, 300.0f}, 32});
    frame1->pose = std::make_shared<PoseResult>();
    frame1->pose->img_w = 1920;
    frame1->pose->img_h = 1080;
    PoseItem left1;
    left1.bbox = Rect{700.0f, 320.0f, 120.0f, 300.0f};
    left1.keypoints.resize(17);
    left1.keypoints[9] = PoseKeypoint{PosePoint2f{780.0f, 430.0f}, 0.9f};
    left1.keypoints[10] = PoseKeypoint{PosePoint2f{790.0f, 450.0f}, 0.9f};
    PoseItem right1;
    right1.bbox = Rect{860.0f, 320.0f, 120.0f, 300.0f};
    right1.keypoints.resize(17);
    right1.keypoints[9] = PoseKeypoint{PosePoint2f{900.0f, 430.0f}, 0.9f};
    right1.keypoints[10] = PoseKeypoint{PosePoint2f{910.0f, 450.0f}, 0.9f};
    frame1->pose->items.push_back(left1);
    frame1->pose->items.push_back(right1);
    auto frame2 = std::make_shared<Frame>();
    frame2->width = 1920;
    frame2->height = 1080;
    frame2->pts = 1300;
    frame2->det = std::make_shared<DetectionResult>();
    frame2->det->img_w = 1920;
    frame2->det->img_h = 1080;
    frame2->det->items.push_back(Detection{0, 0.91f, Rect{710.0f, 325.0f, 120.0f, 300.0f}, 31});
    frame2->det->items.push_back(Detection{0, 0.92f, Rect{850.0f, 315.0f, 120.0f, 300.0f}, 32});
    frame2->pose = std::make_shared<PoseResult>();
    frame2->pose->img_w = 1920;
    frame2->pose->img_h = 1080;
    PoseItem left2;
    left2.bbox = Rect{710.0f, 325.0f, 120.0f, 300.0f};
    left2.keypoints.resize(17);
    left2.keypoints[9] = PoseKeypoint{PosePoint2f{860.0f, 430.0f}, 0.9f};
    left2.keypoints[10] = PoseKeypoint{PosePoint2f{870.0f, 450.0f}, 0.9f};
    PoseItem right2;
    right2.bbox = Rect{850.0f, 315.0f, 120.0f, 300.0f};
    right2.keypoints.resize(17);
    right2.keypoints[9] = PoseKeypoint{PosePoint2f{840.0f, 430.0f}, 0.9f};
    right2.keypoints[10] = PoseKeypoint{PosePoint2f{830.0f, 450.0f}, 0.9f};
    frame2->pose->items.push_back(left2);
    frame2->pose->items.push_back(right2);
    EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
    EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
    ASSERT_NE(frame2->behavior_events, nullptr);
    ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
    EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fight);
    ASSERT_EQ(frame2->behavior_events->items[0].track_ids.size(), 2u);
    EXPECT_EQ(frame2->behavior_events->items[0].track_ids[0], 31);
    EXPECT_EQ(frame2->behavior_events->items[0].track_ids[1], 32);
 }
 TEST(ActionRecogTest, SupportsStructuredFusionConfigForFall) {
    ActionRecogNode node;
    const std::string config_text = R"({
        "id": "action_evt",
        "events": [
            {
                "type": "fall",
                "window_ms": 1500,
                "activate_duration_ms": 0,
                "bbox": {
                    "enabled": false
                },
                "pose": {
                    "enabled": true,
                    "min_torso_drop_pixels": 120,
                    "max_upright_ratio": 0.6
                },
                "fusion": {
                    "match_mode": "any"
                }
            }
        ]
    })";
    SimpleJson config = ParseActionConfig(config_text);
    NodeContext ctx;
    ASSERT_TRUE(node.Init(config, ctx));
    ASSERT_TRUE(node.Start());
    auto frame1 = std::make_shared<Frame>();
    frame1->width = 1920;
    frame1->height = 1080;
    frame1->pts = 1000;
    frame1->det = std::make_shared<DetectionResult>();
    frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
    frame1->pose = std::make_shared<PoseResult>();
    PoseItem pose1;
    pose1.track_id = 17;
    pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
    pose1.keypoints.resize(17);
    pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
    pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
    pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
    pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
    frame1->pose->items.push_back(pose1);
    auto frame2 = std::make_shared<Frame>();
    frame2->width = 1920;
    frame2->height = 1080;
    frame2->pts = 1600;
    frame2->det = std::make_shared<DetectionResult>();
    frame2->det->items.push_back(Detection{0, 0.93f, Rect{790.0f, 400.0f, 180.0f, 300.0f}, 17});
    frame2->pose = std::make_shared<PoseResult>();
    PoseItem pose2;
    pose2.track_id = 17;
    pose2.bbox = Rect{790.0f, 400.0f, 180.0f, 300.0f};
    pose2.keypoints.resize(17);
    pose2.keypoints[5] = PoseKeypoint{PosePoint2f{790.0f, 520.0f}, 0.9f};
    pose2.keypoints[6] = PoseKeypoint{PosePoint2f{970.0f, 520.0f}, 0.9f};
    pose2.keypoints[11] = PoseKeypoint{PosePoint2f{820.0f, 590.0f}, 0.9f};
    pose2.keypoints[12] = PoseKeypoint{PosePoint2f{940.0f, 590.0f}, 0.9f};
    frame2->pose->items.push_back(pose2);
    EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
    EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
    ASSERT_NE(frame2->behavior_events, nullptr);
    ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
    EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fall);
 }
 TEST(ActionRecogTest, SuppressesFallFalsePositiveWhenPoseShapeConditionFails) {
    ActionRecogNode node;
    const std::string config_text = R"({
        "id": "action_evt",
        "events": [
            {
                "type": "fall",
                "window_ms": 1500,
                "activate_duration_ms": 0,
                "bbox": {
                    "enabled": false
                },
                "pose": {
                    "enabled": true,
                    "min_torso_drop_pixels": 120,
                    "max_upright_ratio": 0.8
                },
                "fusion": {
                    "match_mode": "any"
                }
            }
        ]
    })";
    SimpleJson config = ParseActionConfig(config_text);
    NodeContext ctx;
    ASSERT_TRUE(node.Init(config, ctx));
    ASSERT_TRUE(node.Start());
    auto frame1 = std::make_shared<Frame>();
    frame1->pts = 1000;
    frame1->det = std::make_shared<DetectionResult>();
    frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
    frame1->pose = std::make_shared<PoseResult>();
    PoseItem pose1;
    pose1.track_id = 17;
    pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
    pose1.keypoints.resize(17);
    pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
    pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
    pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
    pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
    frame1->pose->items.push_back(pose1);
    auto frame2 = std::make_shared<Frame>();
    frame2->pts = 1600;
    frame2->det = std::make_shared<DetectionResult>();
    frame2->det->items.push_back(Detection{0, 0.93f, Rect{790.0f, 400.0f, 180.0f, 300.0f}, 17});
    frame2->pose = std::make_shared<PoseResult>();
    PoseItem pose2;
    pose2.track_id = 17;
    pose2.bbox = Rect{790.0f, 400.0f, 180.0f, 300.0f};
    pose2.keypoints.resize(17);
    pose2.keypoints[5] = PoseKeypoint{PosePoint2f{860.0f, 520.0f}, 0.9f};
    pose2.keypoints[6] = PoseKeypoint{PosePoint2f{900.0f, 520.0f}, 0.9f};
    pose2.keypoints[11] = PoseKeypoint{PosePoint2f{865.0f, 700.0f}, 0.9f};
    pose2.keypoints[12] = PoseKeypoint{PosePoint2f{905.0f, 700.0f}, 0.9f};
    frame2->pose->items.push_back(pose2);
    EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
    EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
    ASSERT_NE(frame2->behavior_events, nullptr);
    EXPECT_TRUE(frame2->behavior_events->items.empty());
 }
 TEST(ActionRecogTest, SuppressesFightFalsePositiveWhenWristsStayFarApart) {
    ActionRecogNode node;
    const std::string config_text = R"({
        "id": "action_evt",
        "events": [
            {
                "type": "fight",
                "window_ms": 1200,
                "activate_duration_ms": 0,
                "bbox": {
                    "enabled": false,
                    "proximity_pixels": 220,
                    "min_motion_pixels": 0
                },
                "pose": {
                    "enabled": true,
                    "min_wrist_motion_pixels": 120,
                    "max_wrist_distance_pixels": 60
                },
                "fusion": {
                    "match_mode": "any"
                }
            }
        ]
    })";
    SimpleJson config = ParseActionConfig(config_text);
    NodeContext ctx;
    ASSERT_TRUE(node.Init(config, ctx));
    ASSERT_TRUE(node.Start());
    auto frame1 = std::make_shared<Frame>();
    frame1->pts = 1000;
    frame1->det = std::make_shared<DetectionResult>();
    frame1->det->items.push_back(Detection{0, 0.90f, Rect{700.0f, 320.0f, 120.0f, 300.0f}, 31});
    frame1->det->items.push_back(Detection{0, 0.89f, Rect{860.0f, 320.0f, 120.0f, 300.0f}, 32});
    frame1->pose = std::make_shared<PoseResult>();
    PoseItem left1;
    left1.track_id = 31;
    left1.bbox = Rect{700.0f, 320.0f, 120.0f, 300.0f};
    left1.keypoints.resize(17);
    left1.keypoints[9] = PoseKeypoint{PosePoint2f{760.0f, 430.0f}, 0.9f};
    left1.keypoints[10] = PoseKeypoint{PosePoint2f{770.0f, 450.0f}, 0.9f};
    PoseItem right1;
    right1.track_id = 32;
    right1.bbox = Rect{860.0f, 320.0f, 120.0f, 300.0f};
    right1.keypoints.resize(17);
    right1.keypoints[9] = PoseKeypoint{PosePoint2f{960.0f, 430.0f}, 0.9f};
    right1.keypoints[10] = PoseKeypoint{PosePoint2f{970.0f, 450.0f}, 0.9f};
    frame1->pose->items.push_back(left1);
    frame1->pose->items.push_back(right1);
    auto frame2 = std::make_shared<Frame>();
    frame2->pts = 1300;
    frame2->det = std::make_shared<DetectionResult>();
    frame2->det->items.push_back(Detection{0, 0.91f, Rect{710.0f, 325.0f, 120.0f, 300.0f}, 31});
    frame2->det->items.push_back(Detection{0, 0.92f, Rect{850.0f, 315.0f, 120.0f, 300.0f}, 32});
    frame2->pose = std::make_shared<PoseResult>();
    PoseItem left2;
    left2.track_id = 31;
    left2.bbox = Rect{710.0f, 325.0f, 120.0f, 300.0f};
    left2.keypoints.resize(17);
    left2.keypoints[9] = PoseKeypoint{PosePoint2f{820.0f, 430.0f}, 0.9f};
    left2.keypoints[10] = PoseKeypoint{PosePoint2f{830.0f, 450.0f}, 0.9f};
    PoseItem right2;
    right2.track_id = 32;
    right2.bbox = Rect{850.0f, 315.0f, 120.0f, 300.0f};
    right2.keypoints.resize(17);
    right2.keypoints[9] = PoseKeypoint{PosePoint2f{980.0f, 430.0f}, 0.9f};
    right2.keypoints[10] = PoseKeypoint{PosePoint2f{990.0f, 450.0f}, 0.9f};
    frame2->pose->items.push_back(left2);
    frame2->pose->items.push_back(right2);
    EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
    EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
    ASSERT_NE(frame2->behavior_events, nullptr);
    EXPECT_TRUE(frame2->behavior_events->items.empty());
 }
 }  // namespace
 }  // namespace rk3588
--- a/tests/test_ai_pose.cpp
+++ b/tests/test_ai_pose.cpp
@ -0,0 +1,232 @@
 #include <gtest/gtest.h>
 #include <cstdint>
 #include <cstring>
 #include <memory>
 #include <string>
 #include <vector>
 #include "hw/i_infer_backend.h"
 #include "node.h"
 #include "pose/pose_result.h"
 #include "utils/simple_json.h"
 #include "../plugins/ai_pose/ai_pose_node.h"
 namespace rk3588 {
 namespace {
 SimpleJson ParsePoseConfig(const std::string& text) {
    SimpleJson config;
    std::string err;
    const bool ok = ParseSimpleJson(text, config, err);
    EXPECT_TRUE(ok);
    return config;
 }
 class FakeInferBackend final : public IInferBackend {
 public:
    ModelHandle LoadModel(const std::string& /*model_path*/, std::string& /*err*/) override {
        return 1;
    }
    void UnloadModel(ModelHandle /*handle*/) override {}
    bool GetModelInfo(ModelHandle /*handle*/, ModelInfo& info) const override {
        info.input_width = 640;
        info.input_height = 640;
        info.input_channels = 3;
        info.n_input = 1;
        info.n_output = 4;
        info.name = "fake_yolov8_pose";
        return true;
    }
    InferResult Infer(ModelHandle /*handle*/, const InferInput& /*input*/) override {
        InferResult result;
        result.success = true;
        result.outputs = outputs_;
        return result;
    }
    AiScheduler::BorrowedInferResult InferBorrowed(ModelHandle /*handle*/, const InferInput& /*input*/) override {
        AiScheduler::BorrowedInferResult result;
        result.success = false;
        result.error = "not used in unit test";
        return result;
    }
    std::vector<InferOutput> outputs_;
 };
 InferOutput MakeFloatOutput(const std::vector<float>& values) {
    InferOutput out;
    out.size = values.size() * sizeof(float);
    out.data.resize(out.size);
    std::memcpy(out.data.data(), values.data(), out.size);
    return out;
 }
 TEST(AiPoseNodeTest, RejectsEnabledConfigWithoutModelPath) {
    AiPoseNode node;
    SimpleJson config = ParsePoseConfig(R"({
        "id": "pose0",
        "enabled": true
    })");
    NodeContext ctx;
    EXPECT_FALSE(node.Init(config, ctx));
 }
 TEST(AiPoseNodeTest, DisabledNodeStartsAndPassesFramesThrough) {
    AiPoseNode node;
    SimpleJson config = ParsePoseConfig(R"({
        "id": "pose0",
        "enabled": false
    })");
    NodeContext ctx;
    auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
    ctx.output_queues.push_back(out);
    ASSERT_TRUE(node.Init(config, ctx));
    ASSERT_TRUE(node.Start());
    auto frame = std::make_shared<Frame>();
    frame->width = 1280;
    frame->height = 720;
    EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
    FramePtr forwarded;
    ASSERT_TRUE(out->TryPop(forwarded));
    EXPECT_EQ(forwarded.get(), frame.get());
    EXPECT_EQ(frame->pose, nullptr);
 }
 TEST(AiPoseNodeTest, EnabledNodeRequiresInferBackendAtStart) {
    AiPoseNode node;
    SimpleJson config = ParsePoseConfig(R"({
        "id": "pose0",
        "enabled": true,
        "model_path": "models/yolov8n-pose.rknn"
    })");
    NodeContext ctx;
    ASSERT_TRUE(node.Init(config, ctx));
    EXPECT_FALSE(node.Start());
 }
 TEST(AiPoseNodeTest, DecodePoseOutputsToOriginalCoordinates) {
    const int num_keypoints = 17;
    const int total_points = 8400;
    std::vector<float> head0(static_cast<size_t>(65 * 80 * 80), -10.0f);
    std::vector<float> head1(static_cast<size_t>(65 * 40 * 40), -10.0f);
    std::vector<float> head2(static_cast<size_t>(65 * 20 * 20), -10.0f);
    std::vector<float> keypoints(static_cast<size_t>(num_keypoints * 3 * total_points), 0.0f);
    const int cell_x = 10;
    const int cell_y = 15;
    const int grid_w = 80;
    const int point_index = cell_y * grid_w + cell_x;
    auto set_dfl_bin = [&](int channel_group, int bin) {
        for (int i = 0; i < 16; ++i) {
            head0[static_cast<size_t>((channel_group * 16 + i) * grid_w * 80 + point_index)] = (i == bin) ? 10.0f : -10.0f;
        }
    };
    set_dfl_bin(0, 4);
    set_dfl_bin(1, 6);
    set_dfl_bin(2, 8);
    set_dfl_bin(3, 10);
    head0[static_cast<size_t>(64 * grid_w * 80 + point_index)] = 10.0f;
    for (int k = 0; k < num_keypoints; ++k) {
        keypoints[static_cast<size_t>(k * 3 * total_points + point_index)] = 120.0f + static_cast<float>(k);
        keypoints[static_cast<size_t>(k * 3 * total_points + total_points + point_index)] = 140.0f + static_cast<float>(k * 2);
        keypoints[static_cast<size_t>(k * 3 * total_points + 2 * total_points + point_index)] = 0.9f;
    }
    auto backend = std::make_shared<FakeInferBackend>();
    backend->outputs_ = {
        MakeFloatOutput(head0),
        MakeFloatOutput(head1),
        MakeFloatOutput(head2),
        MakeFloatOutput(keypoints),
    };
    AiPoseNode node;
    SimpleJson config = ParsePoseConfig(R"({
        "id": "pose0",
        "enabled": true,
        "model_path": "models/yolov8n-pose.rknn",
        "model_input_w": 640,
        "model_input_h": 640,
        "conf_thresh": 0.25,
        "nms_thresh": 0.45
    })");
    NodeContext ctx;
    ctx.infer_backend = backend;
    auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
    ctx.output_queues.push_back(out);
    ASSERT_TRUE(node.Init(config, ctx));
    ASSERT_TRUE(node.Start());
    auto rgb = std::make_shared<std::vector<uint8_t>>(static_cast<size_t>(320 * 320 * 3), 127);
    auto frame = std::make_shared<Frame>();
    frame->width = 320;
    frame->height = 320;
    frame->format = PixelFormat::RGB;
    frame->data = rgb->data();
    frame->data_size = rgb->size();
    frame->stride = 320 * 3;
    frame->plane_count = 1;
    frame->planes[0] = {frame->data, frame->stride, static_cast<int>(frame->data_size), 0};
    frame->data_owner = rgb;
    EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
    ASSERT_NE(frame->pose, nullptr);
    ASSERT_EQ(frame->pose->items.size(), 1u);
    const PoseItem& item = frame->pose->items[0];
    EXPECT_NEAR(item.bbox.x, 26.0f, 1.0f);
    EXPECT_NEAR(item.bbox.y, 38.0f, 1.0f);
    EXPECT_NEAR(item.bbox.w, 48.0f, 1.0f);
    EXPECT_NEAR(item.bbox.h, 64.0f, 1.0f);
    ASSERT_EQ(item.keypoints.size(), static_cast<size_t>(num_keypoints));
    EXPECT_NEAR(item.keypoints[0].point.x, 60.0f, 1.0f);
    EXPECT_NEAR(item.keypoints[0].point.y, 70.0f, 1.0f);
    EXPECT_NEAR(item.keypoints[16].point.x, 68.0f, 1.0f);
    EXPECT_NEAR(item.keypoints[16].point.y, 86.0f, 1.0f);
    FramePtr forwarded;
    ASSERT_TRUE(out->TryPop(forwarded));
    EXPECT_EQ(forwarded.get(), frame.get());
 }
 TEST(PoseResultTest, SupportsPerTrackKeypoints) {
    PoseResult result;
    result.img_w = 1920;
    result.img_h = 1080;
    result.model_name = "pose_model";
    PoseItem item;
    item.track_id = 42;
    item.score = 0.93f;
    item.bbox = Rect{100.0f, 200.0f, 300.0f, 400.0f};
    item.keypoints.push_back(PoseKeypoint{PosePoint2f{120.0f, 220.0f}, 0.99f});
    item.keypoints.push_back(PoseKeypoint{PosePoint2f{140.0f, 260.0f}, 0.95f});
    result.items.push_back(item);
    ASSERT_EQ(result.items.size(), 1u);
    EXPECT_EQ(result.items[0].track_id, 42);
    ASSERT_EQ(result.items[0].keypoints.size(), 2u);
    EXPECT_FLOAT_EQ(result.items[0].keypoints[0].point.x, 120.0f);
    EXPECT_FLOAT_EQ(result.items[0].keypoints[1].score, 0.95f);
 }
 }  // namespace
 }  // namespace rk3588
--- a/tests/test_pose_assoc.cpp
+++ b/tests/test_pose_assoc.cpp
@ -0,0 +1,117 @@
 #include <gtest/gtest.h>
 #include <memory>
 #include <string>
 #include "frame/frame.h"
 #include "node.h"
 #include "pose/pose_result.h"
 #include "utils/simple_json.h"
 #include "../plugins/pose_assoc/pose_assoc_node.h"
 namespace rk3588 {
 namespace {
 SimpleJson ParsePoseAssocConfig(const std::string& text) {
    SimpleJson config;
    std::string err;
    const bool ok = ParseSimpleJson(text, config, err);
    EXPECT_TRUE(ok);
    return config;
 }
 TEST(PoseAssocNodeTest, AssociatesPoseItemsToTrackedDetections) {
    PoseAssocNode node;
    SimpleJson config = ParsePoseAssocConfig(R"({
        "id": "pose_assoc0",
        "min_iou": 0.1
    })");
    NodeContext ctx;
    auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
    ctx.output_queues.push_back(out);
    ASSERT_TRUE(node.Init(config, ctx));
    ASSERT_TRUE(node.Start());
    auto frame = std::make_shared<Frame>();
    frame->det = std::make_shared<DetectionResult>();
    frame->det->items.push_back(Detection{0, 0.95f, Rect{100.0f, 100.0f, 120.0f, 240.0f}, 7});
    frame->det->items.push_back(Detection{0, 0.94f, Rect{320.0f, 120.0f, 120.0f, 240.0f}, 8});
    frame->pose = std::make_shared<PoseResult>();
    PoseItem pose0;
    pose0.bbox = Rect{110.0f, 110.0f, 110.0f, 220.0f};
    PoseItem pose1;
    pose1.bbox = Rect{330.0f, 130.0f, 100.0f, 210.0f};
    frame->pose->items.push_back(pose0);
    frame->pose->items.push_back(pose1);
    EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
    ASSERT_EQ(frame->pose->items.size(), 2u);
    EXPECT_EQ(frame->pose->items[0].track_id, 7);
    EXPECT_EQ(frame->pose->items[1].track_id, 8);
    FramePtr forwarded;
    ASSERT_TRUE(out->TryPop(forwarded));
    EXPECT_EQ(forwarded.get(), frame.get());
 }
 TEST(PoseAssocNodeTest, UsesOneToOneAssignmentForOverlappingCandidates) {
    PoseAssocNode node;
    SimpleJson config = ParsePoseAssocConfig(R"({
        "id": "pose_assoc0",
        "min_iou": 0.05
    })");
    NodeContext ctx;
    ASSERT_TRUE(node.Init(config, ctx));
    ASSERT_TRUE(node.Start());
    auto frame = std::make_shared<Frame>();
    frame->det = std::make_shared<DetectionResult>();
    frame->det->items.push_back(Detection{0, 0.95f, Rect{100.0f, 100.0f, 150.0f, 250.0f}, 11});
    frame->det->items.push_back(Detection{0, 0.94f, Rect{130.0f, 120.0f, 150.0f, 250.0f}, 12});
    frame->pose = std::make_shared<PoseResult>();
    PoseItem pose0;
    pose0.bbox = Rect{105.0f, 110.0f, 145.0f, 245.0f};
    PoseItem pose1;
    pose1.bbox = Rect{132.0f, 125.0f, 148.0f, 245.0f};
    frame->pose->items.push_back(pose0);
    frame->pose->items.push_back(pose1);
    EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
    EXPECT_NE(frame->pose->items[0].track_id, frame->pose->items[1].track_id);
    EXPECT_TRUE(frame->pose->items[0].track_id == 11 || frame->pose->items[0].track_id == 12);
    EXPECT_TRUE(frame->pose->items[1].track_id == 11 || frame->pose->items[1].track_id == 12);
 }
 TEST(PoseAssocNodeTest, PassesThroughWhenPoseOrDetectionMissing) {
    PoseAssocNode node;
    SimpleJson config = ParsePoseAssocConfig(R"({
        "id": "pose_assoc0"
    })");
    NodeContext ctx;
    ASSERT_TRUE(node.Init(config, ctx));
    ASSERT_TRUE(node.Start());
    auto no_pose = std::make_shared<Frame>();
    no_pose->det = std::make_shared<DetectionResult>();
    no_pose->det->items.push_back(Detection{0, 0.9f, Rect{0.0f, 0.0f, 10.0f, 10.0f}, 1});
    EXPECT_EQ(static_cast<int>(node.Process(no_pose)), static_cast<int>(NodeStatus::OK));
    EXPECT_EQ(no_pose->pose, nullptr);
    auto no_det = std::make_shared<Frame>();
    no_det->pose = std::make_shared<PoseResult>();
    PoseItem pose;
    pose.bbox = Rect{0.0f, 0.0f, 10.0f, 10.0f};
    no_det->pose->items.push_back(pose);
    EXPECT_EQ(static_cast<int>(node.Process(no_det)), static_cast<int>(NodeStatus::OK));
    ASSERT_EQ(no_det->pose->items.size(), 1u);
    EXPECT_EQ(no_det->pose->items[0].track_id, -1);
 }
 }  // namespace
 }  // namespace rk3588