From 3383e13e88aea4d06b352ec5286791f3976b05d2 Mon Sep 17 00:00:00 2001 From: tian <11429339@qq.com> Date: Sat, 14 Mar 2026 13:25:45 +0800 Subject: [PATCH] Add configurable person box expansion --- configs/sample_person_shoe_two_stage.json | 10 +++++- plugins/ai_yolo/ai_yolo_node.cpp | 43 +++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/configs/sample_person_shoe_two_stage.json b/configs/sample_person_shoe_two_stage.json index 989460a..3fe7251 100644 --- a/configs/sample_person_shoe_two_stage.json +++ b/configs/sample_person_shoe_two_stage.json @@ -45,7 +45,15 @@ "num_classes": 80, "conf": 0.3, "nms": 0.45, - "class_filter": [0] + "class_filter": [0], + "bbox_expand": { + "enable": true, + "class_id": 0, + "left": 0.06, + "right": 0.06, + "top": 0.04, + "bottom": 0.16 + } }, { "id": "person_trk", diff --git a/plugins/ai_yolo/ai_yolo_node.cpp b/plugins/ai_yolo/ai_yolo_node.cpp index 3a14458..881504c 100644 --- a/plugins/ai_yolo/ai_yolo_node.cpp +++ b/plugins/ai_yolo/ai_yolo_node.cpp @@ -87,6 +87,15 @@ DetCoordContext BuildDetCoordContext(const Frame& frame, int model_input_w, int return ctx; } +struct BboxExpandConfig { + bool enable = false; + int class_id = 0; + float left = 0.05f; + float right = 0.05f; + float top = 0.05f; + float bottom = 0.12f; +}; + Rect DecodeToOutputRect(float x, float y, float w, float h, const DetCoordContext& ctx) { float ox = x; float oy = y; @@ -115,6 +124,24 @@ Rect DecodeToOutputRect(float x, float y, float w, float h, const DetCoordContex return r; } +Rect ExpandRect(const Rect& in, int img_w, int img_h, const BboxExpandConfig& cfg) { + if (!cfg.enable || img_w <= 0 || img_h <= 0) return in; + + const float ex = in.w * cfg.left; + const float ey = in.h * cfg.top; + const float ew = in.w * (cfg.left + cfg.right); + const float eh = in.h * (cfg.top + cfg.bottom); + + Rect out{}; + out.x = std::max(0.0f, in.x - ex); + out.y = std::max(0.0f, in.y - ey); + out.w = std::min(static_cast(img_w) - out.x, in.w + ew); + out.h = std::min(static_cast(img_h) - out.y, in.h + eh); + out.w = std::max(0.0f, out.w); + out.h = std::max(0.0f, out.h); + return out; +} + inline int32_t ClipFloat(float val, float min_val, float max_val) { return static_cast(val <= min_val ? min_val : (val >= max_val ? max_val : val)); } @@ -677,6 +704,15 @@ public: } } + if (const SimpleJson* expand = config.Find("bbox_expand"); expand && expand->IsObject()) { + bbox_expand_.enable = expand->ValueOr("enable", false); + bbox_expand_.class_id = expand->ValueOr("class_id", bbox_expand_.class_id); + bbox_expand_.left = expand->ValueOr("left", bbox_expand_.left); + bbox_expand_.right = expand->ValueOr("right", bbox_expand_.right); + bbox_expand_.top = expand->ValueOr("top", bbox_expand_.top); + bbox_expand_.bottom = expand->ValueOr("bottom", bbox_expand_.bottom); + } + input_queue_ = ctx.input_queue; if (!input_queue_) { LogError("[ai_yolo] no input queue for node " + id_); @@ -977,6 +1013,9 @@ private: det.cls_id = cls_id; det.score = obj_probs[i]; det.bbox = DecodeToOutputRect(x1, y1, w, h, coord_ctx); + if (bbox_expand_.enable && det.cls_id == bbox_expand_.class_id) { + det.bbox = ExpandRect(det.bbox, coord_ctx.out_w, coord_ctx.out_h, bbox_expand_); + } det.track_id = -1; if (debug_det_ && det_result->items.size() < 5 && processed_ < 20) { @@ -1127,6 +1166,9 @@ private: det.cls_id = cls_id; det.score = obj_probs[i]; det.bbox = DecodeToOutputRect(x1, y1, w, h, coord_ctx); + if (bbox_expand_.enable && det.cls_id == bbox_expand_.class_id) { + det.bbox = ExpandRect(det.bbox, coord_ctx.out_w, coord_ctx.out_h, bbox_expand_); + } det.track_id = -1; if (debug_det_ && det_result->items.size() < 5 && processed_ < 20) { @@ -1169,6 +1211,7 @@ private: bool stats_log_ = false; uint64_t stats_interval_ = 100; bool debug_det_ = false; + BboxExpandConfig bbox_expand_{}; int64_t infer_interval_ms_ = 0; int64_t last_infer_pts_ms_ = 0;