feat: add pose behavior pipeline

2026-04-02 17:44:36 +08:00
15 changed files with 1939 additions and 23 deletions
--- a/Readme.md
+++ b/Readme.md
@ -1266,3 +1266,40 @@ rsync -avz build-cross/ user@board_ip:/opt/media-server/
 - [docs/models.md](docs/models.md) - 模型转换与部署指南
 - [docs/Agent_API_Extensions.md](docs/Agent_API_Extensions.md) - Agent API 扩展说明
 - [docs/API_Device_RemoteMgmt_InterfaceTable.md](docs/API_Device_RemoteMgmt_InterfaceTable.md) - 远程管理接口
+---
+
+## Pose Behavior Pipeline
+
+The pose behavior pipeline is optional and does not replace the existing detection pipeline.
+
+Baseline behavior graph:
+
+```text
+input_rtsp -> preprocess -> ai_yolo -> tracker -> region_event -> action_recog -> event_fusion
+```
+
+Pose-enabled behavior graph:
+
+```text
+input_rtsp -> preprocess -> ai_yolo -> tracker -> ai_pose -> pose_assoc -> region_event -> action_recog -> event_fusion
+```
+
+Engineering rules:
+
+- `tracker` remains the source of stable `track_id` for the main pipeline.
+- `ai_pose` only produces `Frame.pose` and does not replace `Frame.det`.
+- `pose_assoc` is the canonical node for assigning `PoseItem.track_id`.
+- `action_recog` may use pose-enhanced rules, but it must still run when pose nodes are absent.
+
+Degradation semantics:
+
+- If `ai_pose` is not present, the graph keeps the original bbox-based behavior logic.
+- If `ai_pose` is present but inference fails on some frames, `Frame.pose` may be empty and downstream nodes must continue with bbox data.
+- If `pose_assoc` is absent, downstream nodes should treat `pose.track_id` as optional and may fall back to bbox matching only as a compatibility path.
+
+Sample configs:
+
+- `configs/sample_region_behavior_intrusion.json`
+  - Baseline intrusion-only graph without pose nodes. This is the reference for the non-pose deployment path.
+- `configs/sample_region_behavior_full.json`
+  - Full behavior graph with `ai_pose` and `pose_assoc`, intended for pose-aware `fall` and `fight`.
--- a/configs/sample_region_behavior_full.json
+++ b/configs/sample_region_behavior_full.json
@ -60,6 +60,24 @@
          "max_age_ms": 1200,
          "min_hits": 2
        },
+        {
+          "id": "pose",
+          "type": "ai_pose",
+          "role": "filter",
+          "enable": true,
+          "model_path": "./models/yolov8n-pose.rknn",
+          "model_input_w": 640,
+          "model_input_h": 640,
+          "conf_thresh": 0.25,
+          "nms_thresh": 0.45
+        },
+        {
+          "id": "pose_assoc",
+          "type": "pose_assoc",
+          "role": "filter",
+          "enable": true,
+          "min_iou": 0.1
+        },
        {
          "id": "region_evt",
          "type": "region_event",
@ -90,16 +108,38 @@
            {
              "type": "fall",
              "window_ms": 1500,
-              "min_drop_pixels": 120,
-              "min_aspect_ratio_delta": 0.35,
-              "activate_duration_ms": 300
+              "activate_duration_ms": 300,
+              "bbox": {
+                "enabled": true,
+                "min_drop_pixels": 120,
+                "min_aspect_ratio_delta": 0.35
+              },
+              "pose": {
+                "enabled": true,
+                "min_torso_drop_pixels": 120,
+                "max_upright_ratio": 0.60
+              },
+              "fusion": {
+                "match_mode": "any"
+              }
            },
            {
              "type": "fight",
              "window_ms": 1200,
-              "proximity_pixels": 220,
-              "min_motion_pixels": 90,
-              "activate_duration_ms": 200
+              "activate_duration_ms": 200,
+              "bbox": {
+                "enabled": true,
+                "proximity_pixels": 220,
+                "min_motion_pixels": 90
+              },
+              "pose": {
+                "enabled": true,
+                "min_wrist_motion_pixels": 120,
+                "max_wrist_distance_pixels": 120
+              },
+              "fusion": {
+                "match_mode": "any"
+              }
            }
          ]
        },
@ -190,7 +230,9 @@
        ["in", "pre"],
        ["pre", "person_det"],
        ["person_det", "trk"],
-        ["trk", "region_evt"],
+        ["trk", "pose"],
+        ["pose", "pose_assoc"],
+        ["pose_assoc", "region_evt"],
        ["region_evt", "action_evt"],
        ["action_evt", "fusion"],
        ["fusion", "osd"],
--- a/docs/config_guide.md
+++ b/docs/config_guide.md
@ -554,3 +554,117 @@ input_rtsp

 **版本**：v2.0  
 **更新日期**：2026-03-15
+## Pose-Enabled Behavior Configuration
+
+This repository now supports two behavior deployment modes.
+
+### Mode 1: Baseline Behavior Graph
+
+Use this when you only need tracked detections and rule-based region behavior:
+
+```text
+input_rtsp -> preprocess -> ai_yolo -> tracker -> region_event -> action_recog -> event_fusion
+```
+
+Reference config:
+
+- `configs/sample_region_behavior_intrusion.json`
+
+Characteristics:
+
+- No pose model dependency
+- Lowest compute cost
+- `fall` and `fight` rely on bbox-only rules
+
+### Mode 2: Pose-Enabled Behavior Graph
+
+Use this when you need pose-aware `fall` and `fight`:
+
+```text
+input_rtsp -> preprocess -> ai_yolo -> tracker -> ai_pose -> pose_assoc -> region_event -> action_recog -> event_fusion
+```
+
+Reference config:
+
+- `configs/sample_region_behavior_full.json`
+
+Characteristics:
+
+- `ai_pose` writes `Frame.pose`
+- `pose_assoc` assigns `PoseItem.track_id`
+- `action_recog` can fuse bbox and pose signals per event rule
+
+### Optional Integration Rules
+
+- `ai_pose` is optional. Do not add it to graphs that only need intrusion or climb.
+- `pose_assoc` should be placed after `ai_pose` and after tracked detections are already available on the frame.
+- `action_recog` must remain runnable when `Frame.pose` is missing or empty.
+
+### Degradation Semantics
+
+- Without `ai_pose`, the graph must still behave correctly with bbox-only behavior logic.
+- With `ai_pose` but without `pose_assoc`, `pose.track_id` is not guaranteed and downstream logic should treat it as optional compatibility mode.
+- If pose inference returns no items for a frame, the rest of the graph must keep using `Frame.det` and `track_id`.
+
+### Structured `action_recog` Rules
+
+`action_recog` supports a structured event format with `bbox`, `pose`, and `fusion` sections.
+
+Example:
+
+```json
+{
+  "id": "action_evt",
+  "type": "action_recog",
+  "events": [
+    {
+      "type": "fall",
+      "window_ms": 1500,
+      "activate_duration_ms": 300,
+      "bbox": {
+        "enabled": true,
+        "min_drop_pixels": 120,
+        "min_aspect_ratio_delta": 0.35
+      },
+      "pose": {
+        "enabled": true,
+        "min_torso_drop_pixels": 120,
+        "max_upright_ratio": 0.60
+      },
+      "fusion": {
+        "match_mode": "any"
+      }
+    },
+    {
+      "type": "fight",
+      "window_ms": 1200,
+      "activate_duration_ms": 200,
+      "bbox": {
+        "enabled": true,
+        "proximity_pixels": 220,
+        "min_motion_pixels": 90
+      },
+      "pose": {
+        "enabled": true,
+        "min_wrist_motion_pixels": 120,
+        "max_wrist_distance_pixels": 120
+      },
+      "fusion": {
+        "match_mode": "any"
+      }
+    }
+  ]
+}
+```
+
+Rules:
+
+- `bbox.enabled=false` disables bbox signal evaluation for that event.
+- `pose.enabled=false` disables pose signal evaluation for that event.
+- `fusion.match_mode="any"` means either bbox or pose may activate the event.
+- `fusion.match_mode="all"` means bbox and pose must both activate the event.
+
+Backward compatibility:
+
+- Old flat keys such as `min_drop_pixels` and `pose_min_torso_drop_pixels` are still accepted.
+- New configs should prefer the structured form above.
--- a/include/frame/frame.h
+++ b/include/frame/frame.h
@ -16,6 +16,7 @@ namespace rk3588 {

 struct FaceDetResult;
 struct FaceRecogResult;
+struct PoseResult;

 enum class PixelFormat {
    NV12,
@ -82,6 +83,7 @@ struct Frame {

    std::shared_ptr<DetectionResult> det;
    std::shared_ptr<BehaviorEventResult> behavior_events;
+    std::shared_ptr<PoseResult> pose;
    // Face recognition pipeline meta (kept separate from user_meta to avoid conflicts with publish).
    std::shared_ptr<FaceDetResult> face_det;
    std::shared_ptr<FaceRecogResult> face_recog;
--- a/include/pose/pose_result.h
+++ b/include/pose/pose_result.h
@ -0,0 +1,34 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "frame/rect.h"
+
+namespace rk3588 {
+
+struct PosePoint2f {
+    float x = 0.0f;
+    float y = 0.0f;
+};
+
+struct PoseKeypoint {
+    PosePoint2f point{};
+    float score = 0.0f;
+};
+
+struct PoseItem {
+    Rect bbox{};
+    float score = 0.0f;
+    int track_id = -1;
+    std::vector<PoseKeypoint> keypoints;
+};
+
+struct PoseResult {
+    std::vector<PoseItem> items;
+    int img_w = 0;
+    int img_h = 0;
+    std::string model_name;
+};
+
+}  // namespace rk3588
--- a/plugins/CMakeLists.txt
+++ b/plugins/CMakeLists.txt
@ -482,6 +482,30 @@ set_target_properties(action_recog PROPERTIES
    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
 )

+# ai_pose plugin (pose inference scaffold)
+add_library(ai_pose SHARED
+    ai_pose/ai_pose_node.cpp
+)
+target_include_directories(ai_pose PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
+target_link_libraries(ai_pose PRIVATE project_options Threads::Threads)
+set_target_properties(ai_pose PROPERTIES
+    OUTPUT_NAME "ai_pose"
+    LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
+    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
+)
+
+# pose_assoc plugin (assign pose.track_id from tracked detections)
+add_library(pose_assoc SHARED
+    pose_assoc/pose_assoc_node.cpp
+)
+target_include_directories(pose_assoc PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party)
+target_link_libraries(pose_assoc PRIVATE project_options Threads::Threads)
+set_target_properties(pose_assoc PROPERTIES
+    OUTPUT_NAME "pose_assoc"
+    LIBRARY_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
+    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
+)
+
 # event_fusion plugin (behavior event id and lifecycle normalization)
 add_library(event_fusion SHARED
    event_fusion/event_fusion_node.cpp
@ -565,7 +589,7 @@ set_target_properties(ai_shoe_det PROPERTIES
    RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR}
 )

-install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate region_event action_recog event_fusion storage ai_scheduler ai_shoe_det
+install(TARGETS input_rtsp input_file publish preprocess ai_yolo ai_face_det ai_scrfd ai_scrfd_sliding ai_face_recog tracker gate osd alarm logic_gate region_event action_recog ai_pose pose_assoc event_fusion storage ai_scheduler ai_shoe_det
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
    RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/rk3588-media-server/plugins
 )
--- a/plugins/action_recog/action_recog_node.cpp
+++ b/plugins/action_recog/action_recog_node.cpp
@ -4,6 +4,7 @@
 #include <cmath>
 #include <cstdint>
 #include <deque>
+#include <limits>
 #include <map>
 #include <memory>
 #include <set>
@ -12,6 +13,7 @@
 #include <vector>

 #include "behavior/behavior_event.h"
+#include "pose/pose_result.h"
 #include "utils/logger.h"

 namespace rk3588 {
@ -22,21 +24,135 @@ enum class ActionEventKind {
    Fight
 };

+enum class ActionSignalMode {
+    Any,
+    All
+};
+
+struct FallBboxRule {
+    bool enabled = true;
+    float min_drop_pixels = 0.0f;
+    float min_aspect_ratio_delta = 0.0f;
+};
+
+struct FallPoseRule {
+    bool enabled = false;
+    float min_torso_drop_pixels = 0.0f;
+    float max_upright_ratio = 0.0f;
+};
+
+struct FightBboxRule {
+    bool enabled = true;
+    float proximity_pixels = 0.0f;
+    float min_motion_pixels = 0.0f;
+};
+
+struct FightPoseRule {
+    bool enabled = false;
+    float min_wrist_motion_pixels = 0.0f;
+    float max_wrist_distance_pixels = 0.0f;
+};
+
 struct ActionEventRule {
    ActionEventKind kind = ActionEventKind::Fall;
    uint64_t window_ms = 1000;
    uint64_t activate_duration_ms = 0;
-    float min_drop_pixels = 0.0f;
-    float min_aspect_ratio_delta = 0.0f;
-    float proximity_pixels = 0.0f;
-    float min_motion_pixels = 0.0f;
+    ActionSignalMode signal_mode = ActionSignalMode::Any;
+    FallBboxRule fall_bbox{};
+    FallPoseRule fall_pose{};
+    FightBboxRule fight_bbox{};
+    FightPoseRule fight_pose{};
+};
+
+struct PoseTrackSample {
+    bool matched = false;
+    float torso_center_y = 0.0f;
+    float upright_ratio = 0.0f;
+    float left_wrist_x = 0.0f;
+    float left_wrist_y = 0.0f;
+    float right_wrist_x = 0.0f;
+    float right_wrist_y = 0.0f;
+    bool has_left_wrist = false;
+    bool has_right_wrist = false;
 };

 struct TrackSample {
    uint64_t pts = 0;
    Rect bbox{};
+    PoseTrackSample pose{};
 };

+static ActionSignalMode ParseSignalMode(const std::string& value) {
+    if (value == "all") return ActionSignalMode::All;
+    return ActionSignalMode::Any;
+}
+
+static bool EvaluateSignalMode(ActionSignalMode mode, bool bbox_trigger, bool pose_trigger,
+                               bool bbox_enabled, bool pose_enabled) {
+    if (bbox_enabled && pose_enabled) {
+        return mode == ActionSignalMode::All ? (bbox_trigger && pose_trigger) : (bbox_trigger || pose_trigger);
+    }
+    if (bbox_enabled) return bbox_trigger;
+    if (pose_enabled) return pose_trigger;
+    return false;
+}
+
+static bool ParseFallRule(const SimpleJson& ev, ActionEventRule& rule) {
+    rule.kind = ActionEventKind::Fall;
+    rule.fall_bbox.min_drop_pixels = ev.ValueOr<float>("min_drop_pixels", 0.0f);
+    rule.fall_bbox.min_aspect_ratio_delta = ev.ValueOr<float>("min_aspect_ratio_delta", 0.0f);
+    rule.fall_pose.min_torso_drop_pixels = ev.ValueOr<float>("pose_min_torso_drop_pixels", 0.0f);
+    rule.fall_pose.max_upright_ratio = ev.ValueOr<float>("pose_max_upright_ratio", 0.0f);
+
+    if (const SimpleJson* bbox = ev.Find("bbox"); bbox && bbox->IsObject()) {
+        rule.fall_bbox.enabled = bbox->ValueOr<bool>("enabled", true);
+        rule.fall_bbox.min_drop_pixels = bbox->ValueOr<float>("min_drop_pixels", rule.fall_bbox.min_drop_pixels);
+        rule.fall_bbox.min_aspect_ratio_delta =
+            bbox->ValueOr<float>("min_aspect_ratio_delta", rule.fall_bbox.min_aspect_ratio_delta);
+    } else {
+        rule.fall_bbox.enabled = true;
+    }
+
+    if (const SimpleJson* pose = ev.Find("pose"); pose && pose->IsObject()) {
+        rule.fall_pose.enabled = pose->ValueOr<bool>("enabled", true);
+        rule.fall_pose.min_torso_drop_pixels =
+            pose->ValueOr<float>("min_torso_drop_pixels", rule.fall_pose.min_torso_drop_pixels);
+        rule.fall_pose.max_upright_ratio =
+            pose->ValueOr<float>("max_upright_ratio", rule.fall_pose.max_upright_ratio);
+    } else {
+        rule.fall_pose.enabled = rule.fall_pose.min_torso_drop_pixels > 0.0f || rule.fall_pose.max_upright_ratio > 0.0f;
+    }
+    return true;
+}
+
+static bool ParseFightRule(const SimpleJson& ev, ActionEventRule& rule) {
+    rule.kind = ActionEventKind::Fight;
+    rule.fight_bbox.proximity_pixels = ev.ValueOr<float>("proximity_pixels", 0.0f);
+    rule.fight_bbox.min_motion_pixels = ev.ValueOr<float>("min_motion_pixels", 0.0f);
+    rule.fight_pose.min_wrist_motion_pixels = ev.ValueOr<float>("pose_min_wrist_motion_pixels", 0.0f);
+    rule.fight_pose.max_wrist_distance_pixels = ev.ValueOr<float>("pose_max_wrist_distance_pixels", 0.0f);
+
+    if (const SimpleJson* bbox = ev.Find("bbox"); bbox && bbox->IsObject()) {
+        rule.fight_bbox.enabled = bbox->ValueOr<bool>("enabled", true);
+        rule.fight_bbox.proximity_pixels = bbox->ValueOr<float>("proximity_pixels", rule.fight_bbox.proximity_pixels);
+        rule.fight_bbox.min_motion_pixels = bbox->ValueOr<float>("min_motion_pixels", rule.fight_bbox.min_motion_pixels);
+    } else {
+        rule.fight_bbox.enabled = true;
+    }
+
+    if (const SimpleJson* pose = ev.Find("pose"); pose && pose->IsObject()) {
+        rule.fight_pose.enabled = pose->ValueOr<bool>("enabled", true);
+        rule.fight_pose.min_wrist_motion_pixels =
+            pose->ValueOr<float>("min_wrist_motion_pixels", rule.fight_pose.min_wrist_motion_pixels);
+        rule.fight_pose.max_wrist_distance_pixels =
+            pose->ValueOr<float>("max_wrist_distance_pixels", rule.fight_pose.max_wrist_distance_pixels);
+    } else {
+        rule.fight_pose.enabled =
+            rule.fight_pose.min_wrist_motion_pixels > 0.0f || rule.fight_pose.max_wrist_distance_pixels > 0.0f;
+    }
+    return true;
+}
+
 static bool ParseRules(const SimpleJson& config, std::vector<ActionEventRule>& out, std::string& err) {
    const SimpleJson* events = config.Find("events");
    if (!events || !events->IsArray()) {
@ -55,15 +171,15 @@ static bool ParseRules(const SimpleJson& config, std::vector<ActionEventRule>& o
        const std::string type = ev.ValueOr<std::string>("type", "");
        rule.window_ms = static_cast<uint64_t>(std::max(0, ev.ValueOr<int>("window_ms", 1000)));
        rule.activate_duration_ms = static_cast<uint64_t>(std::max(0, ev.ValueOr<int>("activate_duration_ms", 0)));
+        rule.signal_mode = ParseSignalMode(ev.ValueOr<std::string>("signal_mode", "any"));
+        if (const SimpleJson* fusion = ev.Find("fusion"); fusion && fusion->IsObject()) {
+            rule.signal_mode = ParseSignalMode(fusion->ValueOr<std::string>("match_mode", "any"));
+        }

        if (type == "fall") {
-            rule.kind = ActionEventKind::Fall;
-            rule.min_drop_pixels = ev.ValueOr<float>("min_drop_pixels", 0.0f);
-            rule.min_aspect_ratio_delta = ev.ValueOr<float>("min_aspect_ratio_delta", 0.0f);
+            ParseFallRule(ev, rule);
        } else if (type == "fight") {
-            rule.kind = ActionEventKind::Fight;
-            rule.proximity_pixels = ev.ValueOr<float>("proximity_pixels", 0.0f);
-            rule.min_motion_pixels = ev.ValueOr<float>("min_motion_pixels", 0.0f);
+            ParseFightRule(ev, rule);
        } else {
            err = "unsupported event type: " + type;
            return false;
@ -98,6 +214,129 @@ static float AspectRatio(const Rect& rect) {
    return rect.h > 0.0f ? (rect.w / rect.h) : 0.0f;
 }

+static float BboxIoU(const Rect& lhs, const Rect& rhs) {
+    const float x1 = std::max(lhs.x, rhs.x);
+    const float y1 = std::max(lhs.y, rhs.y);
+    const float x2 = std::min(lhs.x + lhs.w, rhs.x + rhs.w);
+    const float y2 = std::min(lhs.y + lhs.h, rhs.y + rhs.h);
+    const float iw = std::max(0.0f, x2 - x1);
+    const float ih = std::max(0.0f, y2 - y1);
+    const float inter = iw * ih;
+    const float uni = lhs.w * lhs.h + rhs.w * rhs.h - inter;
+    return uni <= 0.0f ? 0.0f : (inter / uni);
+}
+
+static bool IsValidKeypoint(const PoseItem& pose, int index) {
+    return index >= 0 &&
+           static_cast<size_t>(index) < pose.keypoints.size() &&
+           pose.keypoints[static_cast<size_t>(index)].score > 0.0f;
+}
+
+static PoseTrackSample BuildPoseTrackSample(const PoseItem& pose) {
+    PoseTrackSample sample;
+    sample.matched = true;
+
+    const bool has_left_shoulder = IsValidKeypoint(pose, 5);
+    const bool has_right_shoulder = IsValidKeypoint(pose, 6);
+    const bool has_left_hip = IsValidKeypoint(pose, 11);
+    const bool has_right_hip = IsValidKeypoint(pose, 12);
+    if (has_left_shoulder && has_right_shoulder && has_left_hip && has_right_hip) {
+        const PosePoint2f& ls = pose.keypoints[5].point;
+        const PosePoint2f& rs = pose.keypoints[6].point;
+        const PosePoint2f& lh = pose.keypoints[11].point;
+        const PosePoint2f& rh = pose.keypoints[12].point;
+        const float shoulder_center_y = (ls.y + rs.y) * 0.5f;
+        const float hip_center_y = (lh.y + rh.y) * 0.5f;
+        sample.torso_center_y = (shoulder_center_y + hip_center_y) * 0.5f;
+        const float min_x = std::min(std::min(ls.x, rs.x), std::min(lh.x, rh.x));
+        const float max_x = std::max(std::max(ls.x, rs.x), std::max(lh.x, rh.x));
+        const float min_y = std::min(std::min(ls.y, rs.y), std::min(lh.y, rh.y));
+        const float max_y = std::max(std::max(ls.y, rs.y), std::max(lh.y, rh.y));
+        const float horizontal_span = std::fabs(max_x - min_x);
+        const float vertical_span = std::fabs(max_y - min_y);
+        sample.upright_ratio = vertical_span > 1e-3f ? (horizontal_span / vertical_span) : std::numeric_limits<float>::infinity();
+    }
+
+    if (IsValidKeypoint(pose, 9)) {
+        sample.left_wrist_x = pose.keypoints[9].point.x;
+        sample.left_wrist_y = pose.keypoints[9].point.y;
+        sample.has_left_wrist = true;
+    }
+    if (IsValidKeypoint(pose, 10)) {
+        sample.right_wrist_x = pose.keypoints[10].point.x;
+        sample.right_wrist_y = pose.keypoints[10].point.y;
+        sample.has_right_wrist = true;
+    }
+    return sample;
+}
+
+static PoseTrackSample MatchPoseToDetection(const Rect& bbox, const std::shared_ptr<PoseResult>& pose_result) {
+    if (!pose_result) return {};
+
+    float best_iou = 0.0f;
+    const PoseItem* best_pose = nullptr;
+    for (const auto& pose : pose_result->items) {
+        const float iou = BboxIoU(bbox, pose.bbox);
+        if (iou > best_iou) {
+            best_iou = iou;
+            best_pose = &pose;
+        }
+    }
+    if (!best_pose || best_iou <= 0.0f) return {};
+    return BuildPoseTrackSample(*best_pose);
+}
+
+static PoseTrackSample MatchPoseToTrackId(int track_id, const Rect& bbox, const std::shared_ptr<PoseResult>& pose_result) {
+    if (!pose_result) return {};
+
+    const PoseItem* best_pose = nullptr;
+    float best_iou = 0.0f;
+    for (const auto& pose : pose_result->items) {
+        if (pose.track_id != track_id) continue;
+        const float iou = BboxIoU(bbox, pose.bbox);
+        if (iou >= best_iou) {
+            best_iou = iou;
+            best_pose = &pose;
+        }
+    }
+    if (best_pose) return BuildPoseTrackSample(*best_pose);
+    return MatchPoseToDetection(bbox, pose_result);
+}
+
+static float WristMotion(const PoseTrackSample& newer, const PoseTrackSample& older) {
+    float motion = 0.0f;
+    if (newer.has_left_wrist && older.has_left_wrist) {
+        const float dx = newer.left_wrist_x - older.left_wrist_x;
+        const float dy = newer.left_wrist_y - older.left_wrist_y;
+        motion += std::sqrt(dx * dx + dy * dy);
+    }
+    if (newer.has_right_wrist && older.has_right_wrist) {
+        const float dx = newer.right_wrist_x - older.right_wrist_x;
+        const float dy = newer.right_wrist_y - older.right_wrist_y;
+        motion += std::sqrt(dx * dx + dy * dy);
+    }
+    return motion;
+}
+
+static float MinimumWristDistance(const PoseTrackSample& lhs, const PoseTrackSample& rhs) {
+    float best = std::numeric_limits<float>::infinity();
+    auto update = [&](bool has_a, float ax, float ay, bool has_b, float bx, float by) {
+        if (!has_a || !has_b) return;
+        const float dx = ax - bx;
+        const float dy = ay - by;
+        best = std::min(best, std::sqrt(dx * dx + dy * dy));
+    };
+    update(lhs.has_left_wrist, lhs.left_wrist_x, lhs.left_wrist_y,
+           rhs.has_left_wrist, rhs.left_wrist_x, rhs.left_wrist_y);
+    update(lhs.has_left_wrist, lhs.left_wrist_x, lhs.left_wrist_y,
+           rhs.has_right_wrist, rhs.right_wrist_x, rhs.right_wrist_y);
+    update(lhs.has_right_wrist, lhs.right_wrist_x, lhs.right_wrist_y,
+           rhs.has_left_wrist, rhs.left_wrist_x, rhs.left_wrist_y);
+    update(lhs.has_right_wrist, lhs.right_wrist_x, lhs.right_wrist_y,
+           rhs.has_right_wrist, rhs.right_wrist_x, rhs.right_wrist_y);
+    return std::isfinite(best) ? best : std::numeric_limits<float>::infinity();
+}
+
 static BehaviorEventType ToBehaviorEventType(ActionEventKind kind) {
    return kind == ActionEventKind::Fall ? BehaviorEventType::Fall : BehaviorEventType::Fight;
 }
@ -152,7 +391,7 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
        if (det.track_id < 0) continue;
        current_tracks[det.track_id] = det.bbox;
        auto& history = impl_->history[det.track_id];
-        history.push_back(TrackSample{frame->pts, det.bbox});
+        history.push_back(TrackSample{frame->pts, det.bbox, MatchPoseToTrackId(det.track_id, det.bbox, frame->pose)});
        while (!history.empty() && frame->pts > history.front().pts &&
               (frame->pts - history.front().pts) > impl_->rules.front().window_ms) {
            history.pop_front();
@ -168,8 +407,18 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
                const float drop = CenterY(bbox) - CenterY(first.bbox);
                const float aspect_delta = AspectRatio(bbox) - AspectRatio(first.bbox);
                const uint64_t duration = frame->pts >= first.pts ? (frame->pts - first.pts) : 0;
-                if (drop < rule.min_drop_pixels) continue;
-                if (aspect_delta < rule.min_aspect_ratio_delta) continue;
+                const bool bbox_enabled = rule.fall_bbox.enabled;
+                const bool pose_enabled = rule.fall_pose.enabled;
+                const bool bbox_trigger = drop >= rule.fall_bbox.min_drop_pixels &&
+                                          aspect_delta >= rule.fall_bbox.min_aspect_ratio_delta;
+                bool pose_trigger = false;
+                if (pose_enabled &&
+                    first.pose.matched && it->second.back().pose.matched) {
+                    const float torso_drop = it->second.back().pose.torso_center_y - first.pose.torso_center_y;
+                    pose_trigger = torso_drop >= rule.fall_pose.min_torso_drop_pixels &&
+                                   it->second.back().pose.upright_ratio >= rule.fall_pose.max_upright_ratio;
+                }
+                if (!EvaluateSignalMode(rule.signal_mode, bbox_trigger, pose_trigger, bbox_enabled, pose_enabled)) continue;
                if (duration < rule.activate_duration_ms) continue;

                BehaviorEventItem event;
@ -189,7 +438,7 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
            for (auto left = current_tracks.begin(); left != current_tracks.end(); ++left) {
                for (auto right = std::next(left); right != current_tracks.end(); ++right) {
                    const float proximity = CenterDistance(left->second, right->second);
-                    if (proximity > rule.proximity_pixels) continue;
+                    if (proximity > rule.fight_bbox.proximity_pixels) continue;

                    const auto it_left = impl_->history.find(left->first);
                    const auto it_right = impl_->history.find(right->first);
@ -203,7 +452,22 @@ NodeStatus ActionRecogNode::Process(FramePtr frame) {
                    const uint64_t duration = std::min(duration_left, duration_right);

                    const float combined_motion = left_motion + right_motion;
-                    if (combined_motion < rule.min_motion_pixels) continue;
+                    const bool bbox_enabled = rule.fight_bbox.enabled;
+                    const bool pose_enabled = rule.fight_pose.enabled;
+                    const bool bbox_trigger = combined_motion >= rule.fight_bbox.min_motion_pixels;
+                    bool pose_trigger = false;
+                    const auto& left_pose_now = it_left->second.back().pose;
+                    const auto& right_pose_now = it_right->second.back().pose;
+                    if (pose_enabled &&
+                        it_left->second.front().pose.matched && it_right->second.front().pose.matched &&
+                        left_pose_now.matched && right_pose_now.matched) {
+                        const float pose_motion = WristMotion(left_pose_now, it_left->second.front().pose) +
+                                                  WristMotion(right_pose_now, it_right->second.front().pose);
+                        const float wrist_distance = MinimumWristDistance(left_pose_now, right_pose_now);
+                        pose_trigger = pose_motion >= rule.fight_pose.min_wrist_motion_pixels &&
+                                       wrist_distance <= rule.fight_pose.max_wrist_distance_pixels;
+                    }
+                    if (!EvaluateSignalMode(rule.signal_mode, bbox_trigger, pose_trigger, bbox_enabled, pose_enabled)) continue;
                    if (duration < rule.activate_duration_ms) continue;

                    const auto pair_key = std::make_pair(left->first, right->first);
--- a/plugins/ai_pose/ai_pose_node.cpp
+++ b/plugins/ai_pose/ai_pose_node.cpp
@ -0,0 +1,466 @@
+#include "ai_pose_node.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "face/face_detection_utils.h"
+#include "hw/i_infer_backend.h"
+#include "pose/pose_result.h"
+#include "utils/logger.h"
+
+namespace rk3588 {
+namespace {
+
+using face_detection::HalfToFloat;
+using face_detection::ResizeRgbBilinear;
+
+constexpr int kPoseOutputCount = 4;
+constexpr int kPoseLocChannels = 64;
+constexpr int kPoseScoreChannel = 64;
+constexpr int kDflBins = 16;
+constexpr int kMaxPoseDetections = 32;
+constexpr uint8_t kLetterboxValue = 114;
+
+struct PoseConfig {
+    bool enabled = true;
+    std::string model_path;
+    int model_input_w = 640;
+    int model_input_h = 640;
+    int expected_keypoints = 17;
+    float conf_thresh = 0.25f;
+    float nms_thresh = 0.45f;
+};
+
+struct LetterboxInfo {
+    float scale = 1.0f;
+    float pad_x = 0.0f;
+    float pad_y = 0.0f;
+};
+
+struct PoseCandidate {
+    Rect bbox{};
+    float score = 0.0f;
+    int keypoint_index = -1;
+};
+
+bool ParseConfig(const SimpleJson& config, PoseConfig& out, std::string& err) {
+    out.enabled = config.ValueOr<bool>("enabled", true);
+    out.model_path = config.ValueOr<std::string>("model_path", "");
+    out.model_input_w = config.ValueOr<int>("model_input_w", 640);
+    out.model_input_h = config.ValueOr<int>("model_input_h", 640);
+    out.expected_keypoints = config.ValueOr<int>("expected_keypoints", 17);
+    out.conf_thresh = config.ValueOr<float>("conf_thresh", 0.25f);
+    out.nms_thresh = config.ValueOr<float>("nms_thresh", 0.45f);
+
+    if (out.model_input_w <= 0 || out.model_input_h <= 0) {
+        err = "model_input_w and model_input_h must be positive";
+        return false;
+    }
+    if (out.expected_keypoints <= 0) {
+        err = "expected_keypoints must be positive";
+        return false;
+    }
+    if (out.conf_thresh <= 0.0f || out.conf_thresh >= 1.0f) {
+        err = "conf_thresh must be in (0, 1)";
+        return false;
+    }
+    if (out.nms_thresh <= 0.0f || out.nms_thresh >= 1.0f) {
+        err = "nms_thresh must be in (0, 1)";
+        return false;
+    }
+    if (out.enabled && out.model_path.empty()) {
+        err = "model_path is required when enabled=true";
+        return false;
+    }
+    return true;
+}
+
+void PushToDownstream(const std::vector<std::shared_ptr<SpscQueue<FramePtr>>>& output_queues, const FramePtr& frame) {
+    for (const auto& q : output_queues) {
+        if (q) q->Push(frame);
+    }
+}
+
+inline float Sigmoid(float x) {
+    return 1.0f / (1.0f + std::exp(-x));
+}
+
+float Unsigmoid(float y) {
+    return -std::log((1.0f / y) - 1.0f);
+}
+
+float ClampFloat(float v, float lo, float hi) {
+    return std::max(lo, std::min(v, hi));
+}
+
+float CalculateIoU(const Rect& lhs, const Rect& rhs) {
+    const float x1 = std::max(lhs.x, rhs.x);
+    const float y1 = std::max(lhs.y, rhs.y);
+    const float x2 = std::min(lhs.x + lhs.w, rhs.x + rhs.w);
+    const float y2 = std::min(lhs.y + lhs.h, rhs.y + rhs.h);
+    const float inter_w = std::max(0.0f, x2 - x1);
+    const float inter_h = std::max(0.0f, y2 - y1);
+    const float inter = inter_w * inter_h;
+    const float union_area = lhs.w * lhs.h + rhs.w * rhs.h - inter;
+    return union_area <= 0.0f ? 0.0f : (inter / union_area);
+}
+
+float ReadTensorValue(const InferOutput& output, size_t index) {
+#if defined(RK3588_ENABLE_RKNN)
+    switch (output.type) {
+        case RKNN_TENSOR_FLOAT32: {
+            const auto* p = reinterpret_cast<const float*>(output.data.data());
+            return p[index];
+        }
+        case RKNN_TENSOR_FLOAT16: {
+            const auto* p = reinterpret_cast<const uint16_t*>(output.data.data());
+            return HalfToFloat(p[index]);
+        }
+        case RKNN_TENSOR_INT8: {
+            const auto* p = reinterpret_cast<const int8_t*>(output.data.data());
+            return (static_cast<float>(p[index]) - static_cast<float>(output.zp)) * output.scale;
+        }
+        case RKNN_TENSOR_UINT8: {
+            const auto* p = reinterpret_cast<const uint8_t*>(output.data.data());
+            return (static_cast<float>(p[index]) - static_cast<float>(output.zp)) * output.scale;
+        }
+        default:
+            break;
+    }
+#endif
+    const auto* p = reinterpret_cast<const float*>(output.data.data());
+    return p[index];
+}
+
+bool BuildLetterboxedRgbInput(const Frame& frame, int dst_w, int dst_h,
+                              std::vector<uint8_t>& out, LetterboxInfo& letterbox) {
+    if (frame.width <= 0 || frame.height <= 0 || dst_w <= 0 || dst_h <= 0) return false;
+    if (frame.format != PixelFormat::RGB && frame.format != PixelFormat::BGR) return false;
+
+    const uint8_t* src = frame.planes[0].data ? frame.planes[0].data : frame.data;
+    if (!src) return false;
+    const int src_stride = frame.planes[0].stride > 0 ? frame.planes[0].stride : frame.width * 3;
+
+    out.assign(static_cast<size_t>(dst_w) * static_cast<size_t>(dst_h) * 3, kLetterboxValue);
+
+    const float scale = std::min(static_cast<float>(dst_w) / static_cast<float>(frame.width),
+                                 static_cast<float>(dst_h) / static_cast<float>(frame.height));
+    const int resized_w = std::max(1, static_cast<int>(std::round(frame.width * scale)));
+    const int resized_h = std::max(1, static_cast<int>(std::round(frame.height * scale)));
+    const int pad_x = (dst_w - resized_w) / 2;
+    const int pad_y = (dst_h - resized_h) / 2;
+
+    std::vector<uint8_t> resized(static_cast<size_t>(resized_w) * static_cast<size_t>(resized_h) * 3);
+    const bool swap_rb = (frame.format == PixelFormat::BGR);
+    ResizeRgbBilinear(src, frame.width, frame.height, src_stride, resized.data(), resized_w, resized_h, swap_rb);
+
+    for (int y = 0; y < resized_h; ++y) {
+        uint8_t* dst_row = out.data() + (static_cast<size_t>(y + pad_y) * static_cast<size_t>(dst_w) + pad_x) * 3;
+        const uint8_t* src_row = resized.data() + static_cast<size_t>(y) * static_cast<size_t>(resized_w) * 3;
+        std::memcpy(dst_row, src_row, static_cast<size_t>(resized_w) * 3);
+    }
+
+    letterbox.scale = scale;
+    letterbox.pad_x = static_cast<float>(pad_x);
+    letterbox.pad_y = static_cast<float>(pad_y);
+    return true;
+}
+
+float DecodeDfl(const InferOutput& output, int cell_index, int cells_per_head, int channel_group) {
+    float max_value = -std::numeric_limits<float>::infinity();
+    float logits[kDflBins]{};
+    for (int i = 0; i < kDflBins; ++i) {
+        const size_t offset = static_cast<size_t>((channel_group * kDflBins + i) * cells_per_head + cell_index);
+        logits[i] = ReadTensorValue(output, offset);
+        max_value = std::max(max_value, logits[i]);
+    }
+
+    float denom = 0.0f;
+    float numer = 0.0f;
+    for (int i = 0; i < kDflBins; ++i) {
+        const float e = std::exp(logits[i] - max_value);
+        denom += e;
+        numer += e * static_cast<float>(i);
+    }
+    return denom > 0.0f ? (numer / denom) : 0.0f;
+}
+
+std::vector<PoseCandidate> DecodePoseCandidates(const std::vector<InferOutput>& outputs,
+                                                const PoseConfig& config) {
+    std::vector<PoseCandidate> candidates;
+    if (outputs.size() < kPoseOutputCount) return candidates;
+
+    const float score_threshold = Unsigmoid(config.conf_thresh);
+    int keypoint_base_index = 0;
+
+    for (int head = 0; head < 3; ++head) {
+        const int stride = (head == 0) ? 8 : (head == 1 ? 16 : 32);
+        const int grid_h = config.model_input_h / stride;
+        const int grid_w = config.model_input_w / stride;
+        const int cells = grid_h * grid_w;
+        if (cells <= 0) continue;
+
+        const InferOutput& output = outputs[static_cast<size_t>(head)];
+        const size_t elem_count = sizeof(float) > 0 ? (output.data.size() / sizeof(float)) : 0;
+#if defined(RK3588_ENABLE_RKNN)
+        size_t bytes_per_elem = 1;
+        switch (output.type) {
+            case RKNN_TENSOR_FLOAT32: bytes_per_elem = sizeof(float); break;
+            case RKNN_TENSOR_FLOAT16: bytes_per_elem = sizeof(uint16_t); break;
+            default: bytes_per_elem = 1; break;
+        }
+        const size_t actual_elem_count = bytes_per_elem > 0 ? (output.data.size() / bytes_per_elem) : 0;
+#else
+        const size_t actual_elem_count = elem_count;
+#endif
+        if (actual_elem_count < static_cast<size_t>(65 * cells)) {
+            keypoint_base_index += cells;
+            continue;
+        }
+
+        for (int cell = 0; cell < cells; ++cell) {
+            const size_t score_offset = static_cast<size_t>(kPoseScoreChannel * cells + cell);
+            const float score_logit = ReadTensorValue(output, score_offset);
+            if (score_logit < score_threshold) continue;
+
+            const float left = DecodeDfl(output, cell, cells, 0);
+            const float top = DecodeDfl(output, cell, cells, 1);
+            const float right = DecodeDfl(output, cell, cells, 2);
+            const float bottom = DecodeDfl(output, cell, cells, 3);
+
+            const int grid_x = cell % grid_w;
+            const int grid_y = cell / grid_w;
+            const float center_x = (static_cast<float>(grid_x) + 0.5f) * stride;
+            const float center_y = (static_cast<float>(grid_y) + 0.5f) * stride;
+            const float x1 = center_x - left * stride;
+            const float y1 = center_y - top * stride;
+            const float x2 = center_x + right * stride;
+            const float y2 = center_y + bottom * stride;
+
+            PoseCandidate candidate;
+            candidate.bbox.x = x1;
+            candidate.bbox.y = y1;
+            candidate.bbox.w = std::max(0.0f, x2 - x1);
+            candidate.bbox.h = std::max(0.0f, y2 - y1);
+            candidate.score = Sigmoid(score_logit);
+            candidate.keypoint_index = keypoint_base_index + cell;
+            candidates.push_back(candidate);
+        }
+
+        keypoint_base_index += cells;
+    }
+
+    std::sort(candidates.begin(), candidates.end(), [](const PoseCandidate& lhs, const PoseCandidate& rhs) {
+        return lhs.score > rhs.score;
+    });
+
+    std::vector<PoseCandidate> kept;
+    kept.reserve(candidates.size());
+    for (const auto& candidate : candidates) {
+        bool suppressed = false;
+        for (const auto& accepted : kept) {
+            if (CalculateIoU(candidate.bbox, accepted.bbox) > config.nms_thresh) {
+                suppressed = true;
+                break;
+            }
+        }
+        if (!suppressed) {
+            kept.push_back(candidate);
+            if (static_cast<int>(kept.size()) >= kMaxPoseDetections) break;
+        }
+    }
+    return kept;
+}
+
+float MapToSourceCoord(float value, float pad, float scale, int limit) {
+    if (scale <= 0.0f) return 0.0f;
+    return ClampFloat((value - pad) / scale, 0.0f, static_cast<float>(std::max(0, limit)));
+}
+
+std::shared_ptr<PoseResult> BuildPoseResult(const std::vector<InferOutput>& outputs,
+                                            const PoseConfig& config,
+                                            int frame_w, int frame_h,
+                                            const LetterboxInfo& letterbox) {
+    if (outputs.size() < kPoseOutputCount) return nullptr;
+
+    const InferOutput& keypoint_output = outputs[3];
+    const int total_points = (config.model_input_w / 8) * (config.model_input_h / 8) +
+                             (config.model_input_w / 16) * (config.model_input_h / 16) +
+                             (config.model_input_w / 32) * (config.model_input_h / 32);
+    const int values_per_point = config.expected_keypoints * 3;
+#if defined(RK3588_ENABLE_RKNN)
+    size_t kp_bytes_per_elem = 1;
+    switch (keypoint_output.type) {
+        case RKNN_TENSOR_FLOAT32: kp_bytes_per_elem = sizeof(float); break;
+        case RKNN_TENSOR_FLOAT16: kp_bytes_per_elem = sizeof(uint16_t); break;
+        default: kp_bytes_per_elem = 1; break;
+    }
+#else
+    const size_t kp_bytes_per_elem = sizeof(float);
+#endif
+    const size_t kp_elem_count = kp_bytes_per_elem > 0 ? (keypoint_output.data.size() / kp_bytes_per_elem) : 0;
+    if (kp_elem_count < static_cast<size_t>(values_per_point * total_points)) return nullptr;
+
+    auto result = std::make_shared<PoseResult>();
+    result->img_w = frame_w;
+    result->img_h = frame_h;
+    result->model_name = "yolov8n-pose";
+
+    const std::vector<PoseCandidate> candidates = DecodePoseCandidates(outputs, config);
+    result->items.reserve(candidates.size());
+    for (const auto& candidate : candidates) {
+        if (candidate.keypoint_index < 0 || candidate.keypoint_index >= total_points) continue;
+
+        PoseItem item;
+        item.score = candidate.score;
+        item.track_id = -1;
+        item.bbox.x = MapToSourceCoord(candidate.bbox.x, letterbox.pad_x, letterbox.scale, frame_w);
+        item.bbox.y = MapToSourceCoord(candidate.bbox.y, letterbox.pad_y, letterbox.scale, frame_h);
+        const float x2 = MapToSourceCoord(candidate.bbox.x + candidate.bbox.w, letterbox.pad_x, letterbox.scale, frame_w);
+        const float y2 = MapToSourceCoord(candidate.bbox.y + candidate.bbox.h, letterbox.pad_y, letterbox.scale, frame_h);
+        item.bbox.w = std::max(0.0f, x2 - item.bbox.x);
+        item.bbox.h = std::max(0.0f, y2 - item.bbox.y);
+
+        item.keypoints.reserve(static_cast<size_t>(config.expected_keypoints));
+        for (int k = 0; k < config.expected_keypoints; ++k) {
+            const size_t x_offset = static_cast<size_t>(k * 3 * total_points + candidate.keypoint_index);
+            const size_t y_offset = static_cast<size_t>(k * 3 * total_points + total_points + candidate.keypoint_index);
+            const size_t s_offset = static_cast<size_t>(k * 3 * total_points + 2 * total_points + candidate.keypoint_index);
+
+            PoseKeypoint kp;
+            kp.point.x = MapToSourceCoord(ReadTensorValue(keypoint_output, x_offset), letterbox.pad_x, letterbox.scale, frame_w);
+            kp.point.y = MapToSourceCoord(ReadTensorValue(keypoint_output, y_offset), letterbox.pad_y, letterbox.scale, frame_h);
+            kp.score = ReadTensorValue(keypoint_output, s_offset);
+            item.keypoints.push_back(kp);
+        }
+        result->items.push_back(std::move(item));
+    }
+
+    return result;
+}
+
+}  // namespace
+
+struct AiPoseNode::Impl {
+    PoseConfig config;
+    std::string init_err;
+    std::shared_ptr<IInferBackend> infer_backend;
+    ModelHandle model_handle = kInvalidModelHandle;
+    bool started = false;
+    std::vector<uint8_t> input_rgb;
+};
+
+AiPoseNode::AiPoseNode() : impl_(std::make_unique<Impl>()) {}
+
+AiPoseNode::~AiPoseNode() = default;
+
+std::string AiPoseNode::Id() const {
+    return id_;
+}
+
+std::string AiPoseNode::Type() const {
+    return "ai_pose";
+}
+
+bool AiPoseNode::Init(const SimpleJson& config, const NodeContext& ctx) {
+    id_ = config.ValueOr<std::string>("id", "ai_pose");
+    if (!ParseConfig(config, impl_->config, impl_->init_err)) {
+        LogError("[ai_pose] invalid config: " + impl_->init_err);
+        return false;
+    }
+    impl_->infer_backend = ctx.infer_backend;
+    output_queues_ = ctx.output_queues;
+    return true;
+}
+
+bool AiPoseNode::Start() {
+    if (!impl_->config.enabled) {
+        impl_->started = true;
+        return true;
+    }
+    if (!impl_->infer_backend) {
+        LogError("[ai_pose] infer_backend is required when enabled=true");
+        return false;
+    }
+
+    std::string err;
+    impl_->model_handle = impl_->infer_backend->LoadModel(impl_->config.model_path, err);
+    if (impl_->model_handle == kInvalidModelHandle) {
+        LogError("[ai_pose] failed to load model: " + impl_->config.model_path + " err=" + err);
+        return false;
+    }
+
+    impl_->started = true;
+    return true;
+}
+
+void AiPoseNode::Stop() {
+    if (impl_->infer_backend && impl_->model_handle != kInvalidModelHandle) {
+        impl_->infer_backend->UnloadModel(impl_->model_handle);
+        impl_->model_handle = kInvalidModelHandle;
+    }
+    impl_->started = false;
+}
+
+NodeStatus AiPoseNode::Process(FramePtr frame) {
+    if (!frame) return NodeStatus::DROP;
+
+    if (!impl_->config.enabled) {
+        PushToDownstream(output_queues_, frame);
+        return NodeStatus::OK;
+    }
+    if (!impl_->started || !impl_->infer_backend || impl_->model_handle == kInvalidModelHandle) {
+        LogWarn("[ai_pose] process called before node is started");
+        PushToDownstream(output_queues_, frame);
+        return NodeStatus::ERROR;
+    }
+
+    LetterboxInfo letterbox;
+    if (!BuildLetterboxedRgbInput(*frame, impl_->config.model_input_w, impl_->config.model_input_h,
+                                  impl_->input_rgb, letterbox)) {
+        LogWarn("[ai_pose] only RGB/BGR frames are supported currently");
+        PushToDownstream(output_queues_, frame);
+        return NodeStatus::OK;
+    }
+
+    InferInput input;
+    input.data = impl_->input_rgb.data();
+    input.size = impl_->input_rgb.size();
+    input.width = impl_->config.model_input_w;
+    input.height = impl_->config.model_input_h;
+    input.is_nhwc = true;
+    input.dma_fd = -1;
+    input.dma_offset = 0;
+
+    InferResult infer_result = impl_->infer_backend->Infer(impl_->model_handle, input);
+    if (!infer_result.success) {
+        LogWarn("[ai_pose] inference failed: " + infer_result.error);
+        PushToDownstream(output_queues_, frame);
+        return NodeStatus::OK;
+    }
+
+    frame->pose = BuildPoseResult(infer_result.outputs, impl_->config, frame->width, frame->height, letterbox);
+    if (!frame->pose) {
+        frame->pose = std::make_shared<PoseResult>();
+        frame->pose->img_w = frame->width;
+        frame->pose->img_h = frame->height;
+        frame->pose->model_name = "yolov8n-pose";
+    }
+
+    PushToDownstream(output_queues_, frame);
+    return NodeStatus::OK;
+}
+
+#ifndef RK3588_TEST_BUILD
+REGISTER_NODE(AiPoseNode, "ai_pose");
+#endif
+
+}  // namespace rk3588
--- a/plugins/ai_pose/ai_pose_node.h
+++ b/plugins/ai_pose/ai_pose_node.h
@ -0,0 +1,30 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "node.h"
+
+namespace rk3588 {
+
+class AiPoseNode final : public INode {
+public:
+    AiPoseNode();
+    ~AiPoseNode() override;
+
+    std::string Id() const override;
+    std::string Type() const override;
+    bool Init(const SimpleJson& config, const NodeContext& ctx) override;
+    bool Start() override;
+    void Stop() override;
+    NodeStatus Process(FramePtr frame) override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> impl_;
+    std::string id_;
+    std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
+};
+
+}  // namespace rk3588
--- a/plugins/pose_assoc/pose_assoc_node.cpp
+++ b/plugins/pose_assoc/pose_assoc_node.cpp
@ -0,0 +1,139 @@
+#include "pose_assoc_node.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "pose/pose_result.h"
+#include "utils/logger.h"
+
+namespace rk3588 {
+namespace {
+
+struct PoseAssocConfig {
+    float min_iou = 0.1f;
+};
+
+struct MatchCandidate {
+    float iou = 0.0f;
+    int pose_index = -1;
+    int det_index = -1;
+};
+
+bool ParseConfig(const SimpleJson& config, PoseAssocConfig& out, std::string& err) {
+    out.min_iou = config.ValueOr<float>("min_iou", 0.1f);
+    if (out.min_iou < 0.0f || out.min_iou > 1.0f) {
+        err = "min_iou must be in [0, 1]";
+        return false;
+    }
+    return true;
+}
+
+float IoU(const Rect& lhs, const Rect& rhs) {
+    const float x1 = std::max(lhs.x, rhs.x);
+    const float y1 = std::max(lhs.y, rhs.y);
+    const float x2 = std::min(lhs.x + lhs.w, rhs.x + rhs.w);
+    const float y2 = std::min(lhs.y + lhs.h, rhs.y + rhs.h);
+    const float iw = std::max(0.0f, x2 - x1);
+    const float ih = std::max(0.0f, y2 - y1);
+    const float inter = iw * ih;
+    const float area_l = std::max(0.0f, lhs.w) * std::max(0.0f, lhs.h);
+    const float area_r = std::max(0.0f, rhs.w) * std::max(0.0f, rhs.h);
+    const float uni = area_l + area_r - inter;
+    return uni <= 0.0f ? 0.0f : (inter / uni);
+}
+
+void PushToDownstream(const std::vector<std::shared_ptr<SpscQueue<FramePtr>>>& output_queues, const FramePtr& frame) {
+    for (const auto& q : output_queues) {
+        if (q) q->Push(frame);
+    }
+}
+
+}  // namespace
+
+struct PoseAssocNode::Impl {
+    PoseAssocConfig config;
+    std::string init_err;
+};
+
+PoseAssocNode::PoseAssocNode() : impl_(std::make_unique<Impl>()) {}
+
+PoseAssocNode::~PoseAssocNode() = default;
+
+std::string PoseAssocNode::Id() const {
+    return id_;
+}
+
+std::string PoseAssocNode::Type() const {
+    return "pose_assoc";
+}
+
+bool PoseAssocNode::Init(const SimpleJson& config, const NodeContext& ctx) {
+    id_ = config.ValueOr<std::string>("id", "pose_assoc");
+    if (!ParseConfig(config, impl_->config, impl_->init_err)) {
+        LogError("[pose_assoc] invalid config: " + impl_->init_err);
+        return false;
+    }
+    output_queues_ = ctx.output_queues;
+    return true;
+}
+
+bool PoseAssocNode::Start() {
+    return true;
+}
+
+void PoseAssocNode::Stop() {}
+
+NodeStatus PoseAssocNode::Process(FramePtr frame) {
+    if (!frame) return NodeStatus::DROP;
+
+    if (frame->pose && frame->det) {
+        for (auto& pose : frame->pose->items) {
+            pose.track_id = -1;
+        }
+
+        std::vector<MatchCandidate> candidates;
+        candidates.reserve(frame->pose->items.size() * frame->det->items.size());
+        for (size_t pi = 0; pi < frame->pose->items.size(); ++pi) {
+            const Rect& pose_bbox = frame->pose->items[pi].bbox;
+            for (size_t di = 0; di < frame->det->items.size(); ++di) {
+                const auto& det = frame->det->items[di];
+                if (det.track_id < 0) continue;
+                const float iou = IoU(pose_bbox, det.bbox);
+                if (iou >= impl_->config.min_iou) {
+                    candidates.push_back(MatchCandidate{iou, static_cast<int>(pi), static_cast<int>(di)});
+                }
+            }
+        }
+
+        std::sort(candidates.begin(), candidates.end(), [](const MatchCandidate& lhs, const MatchCandidate& rhs) {
+            if (lhs.iou != rhs.iou) return lhs.iou > rhs.iou;
+            if (lhs.pose_index != rhs.pose_index) return lhs.pose_index < rhs.pose_index;
+            return lhs.det_index < rhs.det_index;
+        });
+
+        std::vector<bool> used_pose(frame->pose->items.size(), false);
+        std::vector<bool> used_det(frame->det->items.size(), false);
+        for (const auto& candidate : candidates) {
+            if (used_pose[static_cast<size_t>(candidate.pose_index)] ||
+                used_det[static_cast<size_t>(candidate.det_index)]) {
+                continue;
+            }
+            frame->pose->items[static_cast<size_t>(candidate.pose_index)].track_id =
+                frame->det->items[static_cast<size_t>(candidate.det_index)].track_id;
+            used_pose[static_cast<size_t>(candidate.pose_index)] = true;
+            used_det[static_cast<size_t>(candidate.det_index)] = true;
+        }
+    }
+
+    PushToDownstream(output_queues_, frame);
+    return NodeStatus::OK;
+}
+
+#ifndef RK3588_TEST_BUILD
+REGISTER_NODE(PoseAssocNode, "pose_assoc");
+#endif
+
+}  // namespace rk3588
--- a/plugins/pose_assoc/pose_assoc_node.h
+++ b/plugins/pose_assoc/pose_assoc_node.h
@ -0,0 +1,30 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "node.h"
+
+namespace rk3588 {
+
+class PoseAssocNode final : public INode {
+public:
+    PoseAssocNode();
+    ~PoseAssocNode() override;
+
+    std::string Id() const override;
+    std::string Type() const override;
+    bool Init(const SimpleJson& config, const NodeContext& ctx) override;
+    bool Start() override;
+    void Stop() override;
+    NodeStatus Process(FramePtr frame) override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> impl_;
+    std::string id_;
+    std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
+};
+
+}  // namespace rk3588
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -40,6 +40,8 @@ add_executable(rk3588_gtests
    test_behavior_event_model.cpp
    test_region_event.cpp
    test_action_recog.cpp
+    test_ai_pose.cpp
+    test_pose_assoc.cpp
    test_event_fusion.cpp
    test_alarm_behavior_events.cpp
    test_infer_backend.cpp
@ -53,6 +55,8 @@ add_executable(rk3588_gtests
    ${CMAKE_SOURCE_DIR}/src/utils/dma_alloc.cpp
    ${CMAKE_SOURCE_DIR}/plugins/region_event/region_event_node.cpp
    ${CMAKE_SOURCE_DIR}/plugins/action_recog/action_recog_node.cpp
+    ${CMAKE_SOURCE_DIR}/plugins/ai_pose/ai_pose_node.cpp
+    ${CMAKE_SOURCE_DIR}/plugins/pose_assoc/pose_assoc_node.cpp
    ${CMAKE_SOURCE_DIR}/plugins/event_fusion/event_fusion_node.cpp
    ${CMAKE_SOURCE_DIR}/plugins/alarm/rule_engine.cpp
 )
--- a/tests/test_action_recog.cpp
+++ b/tests/test_action_recog.cpp
@ -5,6 +5,7 @@

 #include "frame/frame.h"
 #include "node.h"
+#include "pose/pose_result.h"
 #include "utils/simple_json.h"
 #include "../plugins/action_recog/action_recog_node.h"

@ -124,5 +125,385 @@ TEST(ActionRecogTest, EmitsFightWhenTwoTracksStayCloseWithRepeatedMotion) {
    EXPECT_EQ(frame2->behavior_events->items[0].track_ids[1], 32);
 }

+TEST(ActionRecogTest, EmitsFallFromPoseSequenceWhenBboxShapeChangeIsSmall) {
+    ActionRecogNode node;
+
+    const std::string config_text = R"({
+        "id": "action_evt",
+        "events": [
+            {
+                "type": "fall",
+                "window_ms": 1500,
+                "min_drop_pixels": 120,
+                "min_aspect_ratio_delta": 10.0,
+                "activate_duration_ms": 0,
+                "pose_min_torso_drop_pixels": 120,
+                "pose_max_upright_ratio": 0.6
+            }
+        ]
+    })";
+
+    SimpleJson config = ParseActionConfig(config_text);
+    NodeContext ctx;
+    auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
+    ctx.output_queues.push_back(out);
+
+    ASSERT_TRUE(node.Init(config, ctx));
+    ASSERT_TRUE(node.Start());
+
+    auto frame1 = std::make_shared<Frame>();
+    frame1->width = 1920;
+    frame1->height = 1080;
+    frame1->pts = 1000;
+    frame1->det = std::make_shared<DetectionResult>();
+    frame1->det->img_w = 1920;
+    frame1->det->img_h = 1080;
+    frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
+    frame1->pose = std::make_shared<PoseResult>();
+    frame1->pose->img_w = 1920;
+    frame1->pose->img_h = 1080;
+    PoseItem pose1;
+    pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
+    pose1.score = 0.9f;
+    pose1.keypoints.resize(17);
+    pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
+    pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
+    pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
+    pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
+    frame1->pose->items.push_back(pose1);
+
+    auto frame2 = std::make_shared<Frame>();
+    frame2->width = 1920;
+    frame2->height = 1080;
+    frame2->pts = 1600;
+    frame2->det = std::make_shared<DetectionResult>();
+    frame2->det->img_w = 1920;
+    frame2->det->img_h = 1080;
+    frame2->det->items.push_back(Detection{0, 0.93f, Rect{780.0f, 420.0f, 200.0f, 280.0f}, 17});
+    frame2->pose = std::make_shared<PoseResult>();
+    frame2->pose->img_w = 1920;
+    frame2->pose->img_h = 1080;
+    PoseItem pose2;
+    pose2.bbox = Rect{780.0f, 420.0f, 200.0f, 280.0f};
+    pose2.score = 0.91f;
+    pose2.keypoints.resize(17);
+    pose2.keypoints[5] = PoseKeypoint{PosePoint2f{790.0f, 520.0f}, 0.9f};
+    pose2.keypoints[6] = PoseKeypoint{PosePoint2f{970.0f, 520.0f}, 0.9f};
+    pose2.keypoints[11] = PoseKeypoint{PosePoint2f{820.0f, 590.0f}, 0.9f};
+    pose2.keypoints[12] = PoseKeypoint{PosePoint2f{940.0f, 590.0f}, 0.9f};
+    frame2->pose->items.push_back(pose2);
+
+    EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
+    EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
+    ASSERT_NE(frame2->behavior_events, nullptr);
+    ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
+    EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fall);
+    ASSERT_EQ(frame2->behavior_events->items[0].track_ids.size(), 1u);
+    EXPECT_EQ(frame2->behavior_events->items[0].track_ids[0], 17);
+}
+
+TEST(ActionRecogTest, EmitsFightFromPoseMotionWhenBboxMotionIsSmall) {
+    ActionRecogNode node;
+
+    const std::string config_text = R"({
+        "id": "action_evt",
+        "events": [
+            {
+                "type": "fight",
+                "window_ms": 1200,
+                "proximity_pixels": 220,
+                "min_motion_pixels": 1000,
+                "activate_duration_ms": 0,
+                "pose_min_wrist_motion_pixels": 120,
+                "pose_max_wrist_distance_pixels": 120
+            }
+        ]
+    })";
+
+    SimpleJson config = ParseActionConfig(config_text);
+    NodeContext ctx;
+    auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
+    ctx.output_queues.push_back(out);
+
+    ASSERT_TRUE(node.Init(config, ctx));
+    ASSERT_TRUE(node.Start());
+
+    auto frame1 = std::make_shared<Frame>();
+    frame1->width = 1920;
+    frame1->height = 1080;
+    frame1->pts = 1000;
+    frame1->det = std::make_shared<DetectionResult>();
+    frame1->det->img_w = 1920;
+    frame1->det->img_h = 1080;
+    frame1->det->items.push_back(Detection{0, 0.90f, Rect{700.0f, 320.0f, 120.0f, 300.0f}, 31});
+    frame1->det->items.push_back(Detection{0, 0.89f, Rect{860.0f, 320.0f, 120.0f, 300.0f}, 32});
+    frame1->pose = std::make_shared<PoseResult>();
+    frame1->pose->img_w = 1920;
+    frame1->pose->img_h = 1080;
+    PoseItem left1;
+    left1.bbox = Rect{700.0f, 320.0f, 120.0f, 300.0f};
+    left1.keypoints.resize(17);
+    left1.keypoints[9] = PoseKeypoint{PosePoint2f{780.0f, 430.0f}, 0.9f};
+    left1.keypoints[10] = PoseKeypoint{PosePoint2f{790.0f, 450.0f}, 0.9f};
+    PoseItem right1;
+    right1.bbox = Rect{860.0f, 320.0f, 120.0f, 300.0f};
+    right1.keypoints.resize(17);
+    right1.keypoints[9] = PoseKeypoint{PosePoint2f{900.0f, 430.0f}, 0.9f};
+    right1.keypoints[10] = PoseKeypoint{PosePoint2f{910.0f, 450.0f}, 0.9f};
+    frame1->pose->items.push_back(left1);
+    frame1->pose->items.push_back(right1);
+
+    auto frame2 = std::make_shared<Frame>();
+    frame2->width = 1920;
+    frame2->height = 1080;
+    frame2->pts = 1300;
+    frame2->det = std::make_shared<DetectionResult>();
+    frame2->det->img_w = 1920;
+    frame2->det->img_h = 1080;
+    frame2->det->items.push_back(Detection{0, 0.91f, Rect{710.0f, 325.0f, 120.0f, 300.0f}, 31});
+    frame2->det->items.push_back(Detection{0, 0.92f, Rect{850.0f, 315.0f, 120.0f, 300.0f}, 32});
+    frame2->pose = std::make_shared<PoseResult>();
+    frame2->pose->img_w = 1920;
+    frame2->pose->img_h = 1080;
+    PoseItem left2;
+    left2.bbox = Rect{710.0f, 325.0f, 120.0f, 300.0f};
+    left2.keypoints.resize(17);
+    left2.keypoints[9] = PoseKeypoint{PosePoint2f{860.0f, 430.0f}, 0.9f};
+    left2.keypoints[10] = PoseKeypoint{PosePoint2f{870.0f, 450.0f}, 0.9f};
+    PoseItem right2;
+    right2.bbox = Rect{850.0f, 315.0f, 120.0f, 300.0f};
+    right2.keypoints.resize(17);
+    right2.keypoints[9] = PoseKeypoint{PosePoint2f{840.0f, 430.0f}, 0.9f};
+    right2.keypoints[10] = PoseKeypoint{PosePoint2f{830.0f, 450.0f}, 0.9f};
+    frame2->pose->items.push_back(left2);
+    frame2->pose->items.push_back(right2);
+
+    EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
+    EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
+    ASSERT_NE(frame2->behavior_events, nullptr);
+    ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
+    EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fight);
+    ASSERT_EQ(frame2->behavior_events->items[0].track_ids.size(), 2u);
+    EXPECT_EQ(frame2->behavior_events->items[0].track_ids[0], 31);
+    EXPECT_EQ(frame2->behavior_events->items[0].track_ids[1], 32);
+}
+
+TEST(ActionRecogTest, SupportsStructuredFusionConfigForFall) {
+    ActionRecogNode node;
+
+    const std::string config_text = R"({
+        "id": "action_evt",
+        "events": [
+            {
+                "type": "fall",
+                "window_ms": 1500,
+                "activate_duration_ms": 0,
+                "bbox": {
+                    "enabled": false
+                },
+                "pose": {
+                    "enabled": true,
+                    "min_torso_drop_pixels": 120,
+                    "max_upright_ratio": 0.6
+                },
+                "fusion": {
+                    "match_mode": "any"
+                }
+            }
+        ]
+    })";
+
+    SimpleJson config = ParseActionConfig(config_text);
+    NodeContext ctx;
+    ASSERT_TRUE(node.Init(config, ctx));
+    ASSERT_TRUE(node.Start());
+
+    auto frame1 = std::make_shared<Frame>();
+    frame1->width = 1920;
+    frame1->height = 1080;
+    frame1->pts = 1000;
+    frame1->det = std::make_shared<DetectionResult>();
+    frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
+    frame1->pose = std::make_shared<PoseResult>();
+    PoseItem pose1;
+    pose1.track_id = 17;
+    pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
+    pose1.keypoints.resize(17);
+    pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
+    pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
+    pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
+    pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
+    frame1->pose->items.push_back(pose1);
+
+    auto frame2 = std::make_shared<Frame>();
+    frame2->width = 1920;
+    frame2->height = 1080;
+    frame2->pts = 1600;
+    frame2->det = std::make_shared<DetectionResult>();
+    frame2->det->items.push_back(Detection{0, 0.93f, Rect{790.0f, 400.0f, 180.0f, 300.0f}, 17});
+    frame2->pose = std::make_shared<PoseResult>();
+    PoseItem pose2;
+    pose2.track_id = 17;
+    pose2.bbox = Rect{790.0f, 400.0f, 180.0f, 300.0f};
+    pose2.keypoints.resize(17);
+    pose2.keypoints[5] = PoseKeypoint{PosePoint2f{790.0f, 520.0f}, 0.9f};
+    pose2.keypoints[6] = PoseKeypoint{PosePoint2f{970.0f, 520.0f}, 0.9f};
+    pose2.keypoints[11] = PoseKeypoint{PosePoint2f{820.0f, 590.0f}, 0.9f};
+    pose2.keypoints[12] = PoseKeypoint{PosePoint2f{940.0f, 590.0f}, 0.9f};
+    frame2->pose->items.push_back(pose2);
+
+    EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
+    EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
+    ASSERT_NE(frame2->behavior_events, nullptr);
+    ASSERT_EQ(frame2->behavior_events->items.size(), 1u);
+    EXPECT_EQ(frame2->behavior_events->items[0].type, BehaviorEventType::Fall);
+}
+
+TEST(ActionRecogTest, SuppressesFallFalsePositiveWhenPoseShapeConditionFails) {
+    ActionRecogNode node;
+
+    const std::string config_text = R"({
+        "id": "action_evt",
+        "events": [
+            {
+                "type": "fall",
+                "window_ms": 1500,
+                "activate_duration_ms": 0,
+                "bbox": {
+                    "enabled": false
+                },
+                "pose": {
+                    "enabled": true,
+                    "min_torso_drop_pixels": 120,
+                    "max_upright_ratio": 0.8
+                },
+                "fusion": {
+                    "match_mode": "any"
+                }
+            }
+        ]
+    })";
+
+    SimpleJson config = ParseActionConfig(config_text);
+    NodeContext ctx;
+    ASSERT_TRUE(node.Init(config, ctx));
+    ASSERT_TRUE(node.Start());
+
+    auto frame1 = std::make_shared<Frame>();
+    frame1->pts = 1000;
+    frame1->det = std::make_shared<DetectionResult>();
+    frame1->det->items.push_back(Detection{0, 0.92f, Rect{800.0f, 220.0f, 180.0f, 300.0f}, 17});
+    frame1->pose = std::make_shared<PoseResult>();
+    PoseItem pose1;
+    pose1.track_id = 17;
+    pose1.bbox = Rect{800.0f, 220.0f, 180.0f, 300.0f};
+    pose1.keypoints.resize(17);
+    pose1.keypoints[5] = PoseKeypoint{PosePoint2f{840.0f, 280.0f}, 0.9f};
+    pose1.keypoints[6] = PoseKeypoint{PosePoint2f{940.0f, 280.0f}, 0.9f};
+    pose1.keypoints[11] = PoseKeypoint{PosePoint2f{850.0f, 420.0f}, 0.9f};
+    pose1.keypoints[12] = PoseKeypoint{PosePoint2f{930.0f, 420.0f}, 0.9f};
+    frame1->pose->items.push_back(pose1);
+
+    auto frame2 = std::make_shared<Frame>();
+    frame2->pts = 1600;
+    frame2->det = std::make_shared<DetectionResult>();
+    frame2->det->items.push_back(Detection{0, 0.93f, Rect{790.0f, 400.0f, 180.0f, 300.0f}, 17});
+    frame2->pose = std::make_shared<PoseResult>();
+    PoseItem pose2;
+    pose2.track_id = 17;
+    pose2.bbox = Rect{790.0f, 400.0f, 180.0f, 300.0f};
+    pose2.keypoints.resize(17);
+    pose2.keypoints[5] = PoseKeypoint{PosePoint2f{860.0f, 520.0f}, 0.9f};
+    pose2.keypoints[6] = PoseKeypoint{PosePoint2f{900.0f, 520.0f}, 0.9f};
+    pose2.keypoints[11] = PoseKeypoint{PosePoint2f{865.0f, 700.0f}, 0.9f};
+    pose2.keypoints[12] = PoseKeypoint{PosePoint2f{905.0f, 700.0f}, 0.9f};
+    frame2->pose->items.push_back(pose2);
+
+    EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
+    EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
+    ASSERT_NE(frame2->behavior_events, nullptr);
+    EXPECT_TRUE(frame2->behavior_events->items.empty());
+}
+
+TEST(ActionRecogTest, SuppressesFightFalsePositiveWhenWristsStayFarApart) {
+    ActionRecogNode node;
+
+    const std::string config_text = R"({
+        "id": "action_evt",
+        "events": [
+            {
+                "type": "fight",
+                "window_ms": 1200,
+                "activate_duration_ms": 0,
+                "bbox": {
+                    "enabled": false,
+                    "proximity_pixels": 220,
+                    "min_motion_pixels": 0
+                },
+                "pose": {
+                    "enabled": true,
+                    "min_wrist_motion_pixels": 120,
+                    "max_wrist_distance_pixels": 60
+                },
+                "fusion": {
+                    "match_mode": "any"
+                }
+            }
+        ]
+    })";
+
+    SimpleJson config = ParseActionConfig(config_text);
+    NodeContext ctx;
+    ASSERT_TRUE(node.Init(config, ctx));
+    ASSERT_TRUE(node.Start());
+
+    auto frame1 = std::make_shared<Frame>();
+    frame1->pts = 1000;
+    frame1->det = std::make_shared<DetectionResult>();
+    frame1->det->items.push_back(Detection{0, 0.90f, Rect{700.0f, 320.0f, 120.0f, 300.0f}, 31});
+    frame1->det->items.push_back(Detection{0, 0.89f, Rect{860.0f, 320.0f, 120.0f, 300.0f}, 32});
+    frame1->pose = std::make_shared<PoseResult>();
+    PoseItem left1;
+    left1.track_id = 31;
+    left1.bbox = Rect{700.0f, 320.0f, 120.0f, 300.0f};
+    left1.keypoints.resize(17);
+    left1.keypoints[9] = PoseKeypoint{PosePoint2f{760.0f, 430.0f}, 0.9f};
+    left1.keypoints[10] = PoseKeypoint{PosePoint2f{770.0f, 450.0f}, 0.9f};
+    PoseItem right1;
+    right1.track_id = 32;
+    right1.bbox = Rect{860.0f, 320.0f, 120.0f, 300.0f};
+    right1.keypoints.resize(17);
+    right1.keypoints[9] = PoseKeypoint{PosePoint2f{960.0f, 430.0f}, 0.9f};
+    right1.keypoints[10] = PoseKeypoint{PosePoint2f{970.0f, 450.0f}, 0.9f};
+    frame1->pose->items.push_back(left1);
+    frame1->pose->items.push_back(right1);
+
+    auto frame2 = std::make_shared<Frame>();
+    frame2->pts = 1300;
+    frame2->det = std::make_shared<DetectionResult>();
+    frame2->det->items.push_back(Detection{0, 0.91f, Rect{710.0f, 325.0f, 120.0f, 300.0f}, 31});
+    frame2->det->items.push_back(Detection{0, 0.92f, Rect{850.0f, 315.0f, 120.0f, 300.0f}, 32});
+    frame2->pose = std::make_shared<PoseResult>();
+    PoseItem left2;
+    left2.track_id = 31;
+    left2.bbox = Rect{710.0f, 325.0f, 120.0f, 300.0f};
+    left2.keypoints.resize(17);
+    left2.keypoints[9] = PoseKeypoint{PosePoint2f{820.0f, 430.0f}, 0.9f};
+    left2.keypoints[10] = PoseKeypoint{PosePoint2f{830.0f, 450.0f}, 0.9f};
+    PoseItem right2;
+    right2.track_id = 32;
+    right2.bbox = Rect{850.0f, 315.0f, 120.0f, 300.0f};
+    right2.keypoints.resize(17);
+    right2.keypoints[9] = PoseKeypoint{PosePoint2f{980.0f, 430.0f}, 0.9f};
+    right2.keypoints[10] = PoseKeypoint{PosePoint2f{990.0f, 450.0f}, 0.9f};
+    frame2->pose->items.push_back(left2);
+    frame2->pose->items.push_back(right2);
+
+    EXPECT_EQ(static_cast<int>(node.Process(frame1)), static_cast<int>(NodeStatus::OK));
+    EXPECT_EQ(static_cast<int>(node.Process(frame2)), static_cast<int>(NodeStatus::OK));
+    ASSERT_NE(frame2->behavior_events, nullptr);
+    EXPECT_TRUE(frame2->behavior_events->items.empty());
+}
+
 }  // namespace
 }  // namespace rk3588
--- a/tests/test_ai_pose.cpp
+++ b/tests/test_ai_pose.cpp
@ -0,0 +1,232 @@
+#include <gtest/gtest.h>
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "hw/i_infer_backend.h"
+#include "node.h"
+#include "pose/pose_result.h"
+#include "utils/simple_json.h"
+#include "../plugins/ai_pose/ai_pose_node.h"
+
+namespace rk3588 {
+namespace {
+
+SimpleJson ParsePoseConfig(const std::string& text) {
+    SimpleJson config;
+    std::string err;
+    const bool ok = ParseSimpleJson(text, config, err);
+    EXPECT_TRUE(ok);
+    return config;
+}
+
+class FakeInferBackend final : public IInferBackend {
+public:
+    ModelHandle LoadModel(const std::string& /*model_path*/, std::string& /*err*/) override {
+        return 1;
+    }
+
+    void UnloadModel(ModelHandle /*handle*/) override {}
+
+    bool GetModelInfo(ModelHandle /*handle*/, ModelInfo& info) const override {
+        info.input_width = 640;
+        info.input_height = 640;
+        info.input_channels = 3;
+        info.n_input = 1;
+        info.n_output = 4;
+        info.name = "fake_yolov8_pose";
+        return true;
+    }
+
+    InferResult Infer(ModelHandle /*handle*/, const InferInput& /*input*/) override {
+        InferResult result;
+        result.success = true;
+        result.outputs = outputs_;
+        return result;
+    }
+
+    AiScheduler::BorrowedInferResult InferBorrowed(ModelHandle /*handle*/, const InferInput& /*input*/) override {
+        AiScheduler::BorrowedInferResult result;
+        result.success = false;
+        result.error = "not used in unit test";
+        return result;
+    }
+
+    std::vector<InferOutput> outputs_;
+};
+
+InferOutput MakeFloatOutput(const std::vector<float>& values) {
+    InferOutput out;
+    out.size = values.size() * sizeof(float);
+    out.data.resize(out.size);
+    std::memcpy(out.data.data(), values.data(), out.size);
+    return out;
+}
+
+TEST(AiPoseNodeTest, RejectsEnabledConfigWithoutModelPath) {
+    AiPoseNode node;
+    SimpleJson config = ParsePoseConfig(R"({
+        "id": "pose0",
+        "enabled": true
+    })");
+
+    NodeContext ctx;
+    EXPECT_FALSE(node.Init(config, ctx));
+}
+
+TEST(AiPoseNodeTest, DisabledNodeStartsAndPassesFramesThrough) {
+    AiPoseNode node;
+    SimpleJson config = ParsePoseConfig(R"({
+        "id": "pose0",
+        "enabled": false
+    })");
+
+    NodeContext ctx;
+    auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
+    ctx.output_queues.push_back(out);
+
+    ASSERT_TRUE(node.Init(config, ctx));
+    ASSERT_TRUE(node.Start());
+
+    auto frame = std::make_shared<Frame>();
+    frame->width = 1280;
+    frame->height = 720;
+
+    EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
+
+    FramePtr forwarded;
+    ASSERT_TRUE(out->TryPop(forwarded));
+    EXPECT_EQ(forwarded.get(), frame.get());
+    EXPECT_EQ(frame->pose, nullptr);
+}
+
+TEST(AiPoseNodeTest, EnabledNodeRequiresInferBackendAtStart) {
+    AiPoseNode node;
+    SimpleJson config = ParsePoseConfig(R"({
+        "id": "pose0",
+        "enabled": true,
+        "model_path": "models/yolov8n-pose.rknn"
+    })");
+
+    NodeContext ctx;
+    ASSERT_TRUE(node.Init(config, ctx));
+    EXPECT_FALSE(node.Start());
+}
+
+TEST(AiPoseNodeTest, DecodePoseOutputsToOriginalCoordinates) {
+    const int num_keypoints = 17;
+    const int total_points = 8400;
+
+    std::vector<float> head0(static_cast<size_t>(65 * 80 * 80), -10.0f);
+    std::vector<float> head1(static_cast<size_t>(65 * 40 * 40), -10.0f);
+    std::vector<float> head2(static_cast<size_t>(65 * 20 * 20), -10.0f);
+    std::vector<float> keypoints(static_cast<size_t>(num_keypoints * 3 * total_points), 0.0f);
+
+    const int cell_x = 10;
+    const int cell_y = 15;
+    const int grid_w = 80;
+    const int point_index = cell_y * grid_w + cell_x;
+
+    auto set_dfl_bin = [&](int channel_group, int bin) {
+        for (int i = 0; i < 16; ++i) {
+            head0[static_cast<size_t>((channel_group * 16 + i) * grid_w * 80 + point_index)] = (i == bin) ? 10.0f : -10.0f;
+        }
+    };
+
+    set_dfl_bin(0, 4);
+    set_dfl_bin(1, 6);
+    set_dfl_bin(2, 8);
+    set_dfl_bin(3, 10);
+    head0[static_cast<size_t>(64 * grid_w * 80 + point_index)] = 10.0f;
+
+    for (int k = 0; k < num_keypoints; ++k) {
+        keypoints[static_cast<size_t>(k * 3 * total_points + point_index)] = 120.0f + static_cast<float>(k);
+        keypoints[static_cast<size_t>(k * 3 * total_points + total_points + point_index)] = 140.0f + static_cast<float>(k * 2);
+        keypoints[static_cast<size_t>(k * 3 * total_points + 2 * total_points + point_index)] = 0.9f;
+    }
+
+    auto backend = std::make_shared<FakeInferBackend>();
+    backend->outputs_ = {
+        MakeFloatOutput(head0),
+        MakeFloatOutput(head1),
+        MakeFloatOutput(head2),
+        MakeFloatOutput(keypoints),
+    };
+
+    AiPoseNode node;
+    SimpleJson config = ParsePoseConfig(R"({
+        "id": "pose0",
+        "enabled": true,
+        "model_path": "models/yolov8n-pose.rknn",
+        "model_input_w": 640,
+        "model_input_h": 640,
+        "conf_thresh": 0.25,
+        "nms_thresh": 0.45
+    })");
+
+    NodeContext ctx;
+    ctx.infer_backend = backend;
+    auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
+    ctx.output_queues.push_back(out);
+
+    ASSERT_TRUE(node.Init(config, ctx));
+    ASSERT_TRUE(node.Start());
+
+    auto rgb = std::make_shared<std::vector<uint8_t>>(static_cast<size_t>(320 * 320 * 3), 127);
+    auto frame = std::make_shared<Frame>();
+    frame->width = 320;
+    frame->height = 320;
+    frame->format = PixelFormat::RGB;
+    frame->data = rgb->data();
+    frame->data_size = rgb->size();
+    frame->stride = 320 * 3;
+    frame->plane_count = 1;
+    frame->planes[0] = {frame->data, frame->stride, static_cast<int>(frame->data_size), 0};
+    frame->data_owner = rgb;
+
+    EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
+
+    ASSERT_NE(frame->pose, nullptr);
+    ASSERT_EQ(frame->pose->items.size(), 1u);
+    const PoseItem& item = frame->pose->items[0];
+    EXPECT_NEAR(item.bbox.x, 26.0f, 1.0f);
+    EXPECT_NEAR(item.bbox.y, 38.0f, 1.0f);
+    EXPECT_NEAR(item.bbox.w, 48.0f, 1.0f);
+    EXPECT_NEAR(item.bbox.h, 64.0f, 1.0f);
+    ASSERT_EQ(item.keypoints.size(), static_cast<size_t>(num_keypoints));
+    EXPECT_NEAR(item.keypoints[0].point.x, 60.0f, 1.0f);
+    EXPECT_NEAR(item.keypoints[0].point.y, 70.0f, 1.0f);
+    EXPECT_NEAR(item.keypoints[16].point.x, 68.0f, 1.0f);
+    EXPECT_NEAR(item.keypoints[16].point.y, 86.0f, 1.0f);
+
+    FramePtr forwarded;
+    ASSERT_TRUE(out->TryPop(forwarded));
+    EXPECT_EQ(forwarded.get(), frame.get());
+}
+
+TEST(PoseResultTest, SupportsPerTrackKeypoints) {
+    PoseResult result;
+    result.img_w = 1920;
+    result.img_h = 1080;
+    result.model_name = "pose_model";
+
+    PoseItem item;
+    item.track_id = 42;
+    item.score = 0.93f;
+    item.bbox = Rect{100.0f, 200.0f, 300.0f, 400.0f};
+    item.keypoints.push_back(PoseKeypoint{PosePoint2f{120.0f, 220.0f}, 0.99f});
+    item.keypoints.push_back(PoseKeypoint{PosePoint2f{140.0f, 260.0f}, 0.95f});
+    result.items.push_back(item);
+
+    ASSERT_EQ(result.items.size(), 1u);
+    EXPECT_EQ(result.items[0].track_id, 42);
+    ASSERT_EQ(result.items[0].keypoints.size(), 2u);
+    EXPECT_FLOAT_EQ(result.items[0].keypoints[0].point.x, 120.0f);
+    EXPECT_FLOAT_EQ(result.items[0].keypoints[1].score, 0.95f);
+}
+
+}  // namespace
+}  // namespace rk3588
--- a/tests/test_pose_assoc.cpp
+++ b/tests/test_pose_assoc.cpp
@ -0,0 +1,117 @@
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+
+#include "frame/frame.h"
+#include "node.h"
+#include "pose/pose_result.h"
+#include "utils/simple_json.h"
+#include "../plugins/pose_assoc/pose_assoc_node.h"
+
+namespace rk3588 {
+namespace {
+
+SimpleJson ParsePoseAssocConfig(const std::string& text) {
+    SimpleJson config;
+    std::string err;
+    const bool ok = ParseSimpleJson(text, config, err);
+    EXPECT_TRUE(ok);
+    return config;
+}
+
+TEST(PoseAssocNodeTest, AssociatesPoseItemsToTrackedDetections) {
+    PoseAssocNode node;
+    SimpleJson config = ParsePoseAssocConfig(R"({
+        "id": "pose_assoc0",
+        "min_iou": 0.1
+    })");
+
+    NodeContext ctx;
+    auto out = std::make_shared<SpscQueue<FramePtr>>(4, QueueDropStrategy::DropOldest);
+    ctx.output_queues.push_back(out);
+
+    ASSERT_TRUE(node.Init(config, ctx));
+    ASSERT_TRUE(node.Start());
+
+    auto frame = std::make_shared<Frame>();
+    frame->det = std::make_shared<DetectionResult>();
+    frame->det->items.push_back(Detection{0, 0.95f, Rect{100.0f, 100.0f, 120.0f, 240.0f}, 7});
+    frame->det->items.push_back(Detection{0, 0.94f, Rect{320.0f, 120.0f, 120.0f, 240.0f}, 8});
+
+    frame->pose = std::make_shared<PoseResult>();
+    PoseItem pose0;
+    pose0.bbox = Rect{110.0f, 110.0f, 110.0f, 220.0f};
+    PoseItem pose1;
+    pose1.bbox = Rect{330.0f, 130.0f, 100.0f, 210.0f};
+    frame->pose->items.push_back(pose0);
+    frame->pose->items.push_back(pose1);
+
+    EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
+    ASSERT_EQ(frame->pose->items.size(), 2u);
+    EXPECT_EQ(frame->pose->items[0].track_id, 7);
+    EXPECT_EQ(frame->pose->items[1].track_id, 8);
+
+    FramePtr forwarded;
+    ASSERT_TRUE(out->TryPop(forwarded));
+    EXPECT_EQ(forwarded.get(), frame.get());
+}
+
+TEST(PoseAssocNodeTest, UsesOneToOneAssignmentForOverlappingCandidates) {
+    PoseAssocNode node;
+    SimpleJson config = ParsePoseAssocConfig(R"({
+        "id": "pose_assoc0",
+        "min_iou": 0.05
+    })");
+
+    NodeContext ctx;
+    ASSERT_TRUE(node.Init(config, ctx));
+    ASSERT_TRUE(node.Start());
+
+    auto frame = std::make_shared<Frame>();
+    frame->det = std::make_shared<DetectionResult>();
+    frame->det->items.push_back(Detection{0, 0.95f, Rect{100.0f, 100.0f, 150.0f, 250.0f}, 11});
+    frame->det->items.push_back(Detection{0, 0.94f, Rect{130.0f, 120.0f, 150.0f, 250.0f}, 12});
+
+    frame->pose = std::make_shared<PoseResult>();
+    PoseItem pose0;
+    pose0.bbox = Rect{105.0f, 110.0f, 145.0f, 245.0f};
+    PoseItem pose1;
+    pose1.bbox = Rect{132.0f, 125.0f, 148.0f, 245.0f};
+    frame->pose->items.push_back(pose0);
+    frame->pose->items.push_back(pose1);
+
+    EXPECT_EQ(static_cast<int>(node.Process(frame)), static_cast<int>(NodeStatus::OK));
+    EXPECT_NE(frame->pose->items[0].track_id, frame->pose->items[1].track_id);
+    EXPECT_TRUE(frame->pose->items[0].track_id == 11 || frame->pose->items[0].track_id == 12);
+    EXPECT_TRUE(frame->pose->items[1].track_id == 11 || frame->pose->items[1].track_id == 12);
+}
+
+TEST(PoseAssocNodeTest, PassesThroughWhenPoseOrDetectionMissing) {
+    PoseAssocNode node;
+    SimpleJson config = ParsePoseAssocConfig(R"({
+        "id": "pose_assoc0"
+    })");
+
+    NodeContext ctx;
+    ASSERT_TRUE(node.Init(config, ctx));
+    ASSERT_TRUE(node.Start());
+
+    auto no_pose = std::make_shared<Frame>();
+    no_pose->det = std::make_shared<DetectionResult>();
+    no_pose->det->items.push_back(Detection{0, 0.9f, Rect{0.0f, 0.0f, 10.0f, 10.0f}, 1});
+    EXPECT_EQ(static_cast<int>(node.Process(no_pose)), static_cast<int>(NodeStatus::OK));
+    EXPECT_EQ(no_pose->pose, nullptr);
+
+    auto no_det = std::make_shared<Frame>();
+    no_det->pose = std::make_shared<PoseResult>();
+    PoseItem pose;
+    pose.bbox = Rect{0.0f, 0.0f, 10.0f, 10.0f};
+    no_det->pose->items.push_back(pose);
+    EXPECT_EQ(static_cast<int>(node.Process(no_det)), static_cast<int>(NodeStatus::OK));
+    ASSERT_EQ(no_det->pose->items.size(), 1u);
+    EXPECT_EQ(no_det->pose->items[0].track_id, -1);
+}
+
+}  // namespace
+}  // namespace rk3588