Add configurable person box expansion

This commit is contained in:
tian 2026-03-14 13:25:45 +08:00
parent fa71ccb0d4
commit 3383e13e88
2 changed files with 52 additions and 1 deletions

View File

@ -45,7 +45,15 @@
"num_classes": 80,
"conf": 0.3,
"nms": 0.45,
"class_filter": [0]
"class_filter": [0],
"bbox_expand": {
"enable": true,
"class_id": 0,
"left": 0.06,
"right": 0.06,
"top": 0.04,
"bottom": 0.16
}
},
{
"id": "person_trk",

View File

@ -87,6 +87,15 @@ DetCoordContext BuildDetCoordContext(const Frame& frame, int model_input_w, int
return ctx;
}
struct BboxExpandConfig {
bool enable = false;
int class_id = 0;
float left = 0.05f;
float right = 0.05f;
float top = 0.05f;
float bottom = 0.12f;
};
Rect DecodeToOutputRect(float x, float y, float w, float h, const DetCoordContext& ctx) {
float ox = x;
float oy = y;
@ -115,6 +124,24 @@ Rect DecodeToOutputRect(float x, float y, float w, float h, const DetCoordContex
return r;
}
Rect ExpandRect(const Rect& in, int img_w, int img_h, const BboxExpandConfig& cfg) {
if (!cfg.enable || img_w <= 0 || img_h <= 0) return in;
const float ex = in.w * cfg.left;
const float ey = in.h * cfg.top;
const float ew = in.w * (cfg.left + cfg.right);
const float eh = in.h * (cfg.top + cfg.bottom);
Rect out{};
out.x = std::max(0.0f, in.x - ex);
out.y = std::max(0.0f, in.y - ey);
out.w = std::min(static_cast<float>(img_w) - out.x, in.w + ew);
out.h = std::min(static_cast<float>(img_h) - out.y, in.h + eh);
out.w = std::max(0.0f, out.w);
out.h = std::max(0.0f, out.h);
return out;
}
inline int32_t ClipFloat(float val, float min_val, float max_val) {
return static_cast<int32_t>(val <= min_val ? min_val : (val >= max_val ? max_val : val));
}
@ -677,6 +704,15 @@ public:
}
}
if (const SimpleJson* expand = config.Find("bbox_expand"); expand && expand->IsObject()) {
bbox_expand_.enable = expand->ValueOr<bool>("enable", false);
bbox_expand_.class_id = expand->ValueOr<int>("class_id", bbox_expand_.class_id);
bbox_expand_.left = expand->ValueOr<float>("left", bbox_expand_.left);
bbox_expand_.right = expand->ValueOr<float>("right", bbox_expand_.right);
bbox_expand_.top = expand->ValueOr<float>("top", bbox_expand_.top);
bbox_expand_.bottom = expand->ValueOr<float>("bottom", bbox_expand_.bottom);
}
input_queue_ = ctx.input_queue;
if (!input_queue_) {
LogError("[ai_yolo] no input queue for node " + id_);
@ -977,6 +1013,9 @@ private:
det.cls_id = cls_id;
det.score = obj_probs[i];
det.bbox = DecodeToOutputRect(x1, y1, w, h, coord_ctx);
if (bbox_expand_.enable && det.cls_id == bbox_expand_.class_id) {
det.bbox = ExpandRect(det.bbox, coord_ctx.out_w, coord_ctx.out_h, bbox_expand_);
}
det.track_id = -1;
if (debug_det_ && det_result->items.size() < 5 && processed_ < 20) {
@ -1127,6 +1166,9 @@ private:
det.cls_id = cls_id;
det.score = obj_probs[i];
det.bbox = DecodeToOutputRect(x1, y1, w, h, coord_ctx);
if (bbox_expand_.enable && det.cls_id == bbox_expand_.class_id) {
det.bbox = ExpandRect(det.bbox, coord_ctx.out_w, coord_ctx.out_h, bbox_expand_);
}
det.track_id = -1;
if (debug_det_ && det_result->items.size() < 5 && processed_ < 20) {
@ -1169,6 +1211,7 @@ private:
bool stats_log_ = false;
uint64_t stats_interval_ = 100;
bool debug_det_ = false;
BboxExpandConfig bbox_expand_{};
int64_t infer_interval_ms_ = 0;
int64_t last_infer_pts_ms_ = 0;