diff --git a/plugins/ai_face_det/ai_face_det_node.cpp b/plugins/ai_face_det/ai_face_det_node.cpp index fd1ebb2..7a4c558 100644 --- a/plugins/ai_face_det/ai_face_det_node.cpp +++ b/plugins/ai_face_det/ai_face_det_node.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,104 @@ struct Prior { float h = 0.0f; }; +struct FaceDetConfigSnapshot { + float conf_thresh = 0.6f; + float nms_thresh = 0.4f; + int max_faces = 10; + bool output_landmarks = true; + + std::string input_format = "rgb"; + std::string input_dtype = "uint8"; + + float norm_scale = 1.0f; + float norm_bias = 0.0f; + bool norm_use_mean_std = false; + std::array norm_mean{{0.0f, 0.0f, 0.0f}}; + std::array norm_std{{1.0f, 1.0f, 1.0f}}; + + // RetinaFace priors defaults for 320 input (MobileNet0.25). + std::vector steps{8, 16, 32}; + std::vector> min_sizes{{16, 32}, {64, 128}, {256, 512}}; +}; + +static bool BuildFaceDetConfigSnapshot(const SimpleJson& config, + const std::shared_ptr& base, + std::shared_ptr& out) { + auto snap = std::make_shared(); + if (base) *snap = *base; + + snap->conf_thresh = config.ValueOr("conf", snap->conf_thresh); + snap->nms_thresh = config.ValueOr("nms", snap->nms_thresh); + snap->max_faces = std::max(1, config.ValueOr("max_faces", snap->max_faces)); + snap->output_landmarks = config.ValueOr("output_landmarks", snap->output_landmarks); + + { + std::string fmt = config.ValueOr("input_format", snap->input_format); + for (auto& c : fmt) c = static_cast(std::tolower(static_cast(c))); + snap->input_format = std::move(fmt); + } + { + std::string dtype = config.ValueOr("input_dtype", snap->input_dtype); + for (auto& c : dtype) c = static_cast(std::tolower(static_cast(c))); + snap->input_dtype = std::move(dtype); + } + + if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) { + bool use_ms = false; + if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) { + for (int i = 0; i < 3; ++i) { + snap->norm_mean[static_cast(i)] = + static_cast(mean->AsArray()[static_cast(i)].AsNumber(snap->norm_mean[static_cast(i)])); + } + use_ms = true; + } + if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) { + for (int i = 0; i < 3; ++i) { + snap->norm_std[static_cast(i)] = + static_cast(st->AsArray()[static_cast(i)].AsNumber(snap->norm_std[static_cast(i)])); + } + use_ms = true; + } + snap->norm_use_mean_std = use_ms; + snap->norm_scale = norm->ValueOr("scale", snap->norm_scale); + snap->norm_bias = norm->ValueOr("bias", snap->norm_bias); + } + + if (const SimpleJson* pri = config.Find("prior"); pri && pri->IsObject()) { + std::vector new_steps = snap->steps; + std::vector> new_mins = snap->min_sizes; + + if (const SimpleJson* steps = pri->Find("steps"); steps && steps->IsArray()) { + std::vector tmp; + for (const auto& v : steps->AsArray()) tmp.push_back(std::max(1, v.AsInt(1))); + if (!tmp.empty()) new_steps = std::move(tmp); + } + if (const SimpleJson* mins = pri->Find("min_sizes"); mins && mins->IsArray()) { + std::vector> tmp; + for (const auto& grp : mins->AsArray()) { + std::vector g; + for (const auto& v : grp.AsArray()) g.push_back(std::max(1, v.AsInt(1))); + if (!g.empty()) tmp.push_back(std::move(g)); + } + if (!tmp.empty()) new_mins = std::move(tmp); + } + + if (!new_steps.empty() && !new_mins.empty() && new_steps.size() == new_mins.size()) { + snap->steps = std::move(new_steps); + snap->min_sizes = std::move(new_mins); + } else { + // Best-effort: keep previous priors to avoid per-frame mismatch. + if (base) { + snap->steps = base->steps; + snap->min_sizes = base->min_sizes; + } + } + } + + out = std::move(snap); + return true; +} + float IoU(const Rect& a, const Rect& b) { const float ax1 = a.x; const float ay1 = a.y; @@ -320,58 +419,12 @@ public: bool Init(const SimpleJson& config, const NodeContext& ctx) override { id_ = config.ValueOr("id", "face_det"); model_path_ = config.ValueOr("model_path", ""); - conf_thresh_ = config.ValueOr("conf", 0.6f); - nms_thresh_ = config.ValueOr("nms", 0.4f); - max_faces_ = std::max(1, config.ValueOr("max_faces", 10)); - output_landmarks_ = config.ValueOr("output_landmarks", true); - - const std::string fmt = config.ValueOr("input_format", "rgb"); - input_format_ = fmt; - for (auto& c : input_format_) c = static_cast(std::tolower(static_cast(c))); - - input_dtype_ = config.ValueOr("input_dtype", input_dtype_); - for (auto& c : input_dtype_) c = static_cast(std::tolower(static_cast(c))); - - // Optional normalization when input_dtype is float. - // - scale/bias: x = x*scale + bias - // - mean/std: x = (x - mean[c]) / std[c] - if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) { - if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) { - for (int i = 0; i < 3; ++i) { - norm_mean_[static_cast(i)] = static_cast(mean->AsArray()[static_cast(i)].AsNumber(norm_mean_[static_cast(i)])); - } - norm_use_mean_std_ = true; - } - if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) { - for (int i = 0; i < 3; ++i) { - norm_std_[static_cast(i)] = static_cast(st->AsArray()[static_cast(i)].AsNumber(norm_std_[static_cast(i)])); - } - norm_use_mean_std_ = true; - } - norm_scale_ = norm->ValueOr("scale", norm_scale_); - norm_bias_ = norm->ValueOr("bias", norm_bias_); - } - - // RetinaFace priors defaults for 320 input (MobileNet0.25). - steps_ = {8, 16, 32}; - min_sizes_ = {{16, 32}, {64, 128}, {256, 512}}; - if (const SimpleJson* pri = config.Find("prior"); pri && pri->IsObject()) { - if (const SimpleJson* steps = pri->Find("steps"); steps && steps->IsArray()) { - steps_.clear(); - for (const auto& v : steps->AsArray()) { - steps_.push_back(std::max(1, v.AsInt(1))); - } - } - if (const SimpleJson* mins = pri->Find("min_sizes"); mins && mins->IsArray()) { - min_sizes_.clear(); - for (const auto& grp : mins->AsArray()) { - std::vector g; - for (const auto& v : grp.AsArray()) { - g.push_back(std::max(1, v.AsInt(1))); - } - if (!g.empty()) min_sizes_.push_back(std::move(g)); - } - } + std::shared_ptr snap; + BuildFaceDetConfigSnapshot(config, nullptr, snap); + { + std::lock_guard lock(mu_); + cfg_ = std::move(snap); + priors_cache_ = {}; } input_queue_ = ctx.input_queue; @@ -412,8 +465,16 @@ public: } bool Start() override { - LogInfo("[ai_face_det] start id=" + id_ + " conf=" + std::to_string(conf_thresh_) + - " nms=" + std::to_string(nms_thresh_) + " max_faces=" + std::to_string(max_faces_)); + std::shared_ptr cfg; + { + std::lock_guard lock(mu_); + cfg = cfg_; + } + const float conf = cfg ? cfg->conf_thresh : 0.0f; + const float nms = cfg ? cfg->nms_thresh : 0.0f; + const int max_faces = cfg ? cfg->max_faces : 0; + LogInfo("[ai_face_det] start id=" + id_ + " conf=" + std::to_string(conf) + + " nms=" + std::to_string(nms) + " max_faces=" + std::to_string(max_faces)); return true; } @@ -426,52 +487,19 @@ public: // Changing model requires graph rebuild. return false; } - - conf_thresh_ = new_config.ValueOr("conf", conf_thresh_); - nms_thresh_ = new_config.ValueOr("nms", nms_thresh_); - max_faces_ = std::max(1, new_config.ValueOr("max_faces", max_faces_)); - output_landmarks_ = new_config.ValueOr("output_landmarks", output_landmarks_); - - std::string dtype = new_config.ValueOr("input_dtype", input_dtype_); - for (auto& c : dtype) c = static_cast(std::tolower(static_cast(c))); - input_dtype_ = std::move(dtype); - - if (const SimpleJson* norm = new_config.Find("normalize"); norm && norm->IsObject()) { - bool use_ms = false; - if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) { - for (int i = 0; i < 3; ++i) { - norm_mean_[static_cast(i)] = static_cast(mean->AsArray()[static_cast(i)].AsNumber(norm_mean_[static_cast(i)])); - } - use_ms = true; - } - if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) { - for (int i = 0; i < 3; ++i) { - norm_std_[static_cast(i)] = static_cast(st->AsArray()[static_cast(i)].AsNumber(norm_std_[static_cast(i)])); - } - use_ms = true; - } - norm_use_mean_std_ = use_ms; - norm_scale_ = norm->ValueOr("scale", norm_scale_); - norm_bias_ = norm->ValueOr("bias", norm_bias_); + std::shared_ptr base; + { + std::lock_guard lock(mu_); + base = cfg_; } - if (const SimpleJson* pri = new_config.Find("prior"); pri && pri->IsObject()) { - if (const SimpleJson* steps = pri->Find("steps"); steps && steps->IsArray()) { - std::vector new_steps; - for (const auto& v : steps->AsArray()) new_steps.push_back(std::max(1, v.AsInt(1))); - if (!new_steps.empty()) steps_ = std::move(new_steps); - } - if (const SimpleJson* mins = pri->Find("min_sizes"); mins && mins->IsArray()) { - std::vector> new_mins; - for (const auto& grp : mins->AsArray()) { - std::vector g; - for (const auto& v : grp.AsArray()) g.push_back(std::max(1, v.AsInt(1))); - if (!g.empty()) new_mins.push_back(std::move(g)); - } - if (!new_mins.empty()) min_sizes_ = std::move(new_mins); - } + std::shared_ptr snap; + BuildFaceDetConfigSnapshot(new_config, base, snap); + { + std::lock_guard lock(mu_); + cfg_ = std::move(snap); + priors_cache_ = {}; } - return true; } @@ -516,8 +544,15 @@ private: : (frame->stride > 0 ? frame->stride : static_cast(src_row)); if (!src || src_stride <= 0) return; - const bool need_swap = (frame->format == PixelFormat::BGR && input_format_ == "rgb") || - (frame->format == PixelFormat::RGB && input_format_ == "bgr"); + std::shared_ptr cfg; + { + std::lock_guard lock(mu_); + cfg = cfg_; + } + if (!cfg) return; + + const bool need_swap = (frame->format == PixelFormat::BGR && cfg->input_format == "rgb") || + (frame->format == PixelFormat::RGB && cfg->input_format == "bgr"); const int in_w = model_w_ > 0 ? model_w_ : src_w; const int in_h = model_h_ > 0 ? model_h_ : src_h; @@ -550,18 +585,19 @@ private: input.is_nhwc = true; // Default: keep existing UINT8 behavior. - if (input_dtype_ == "float" || input_dtype_ == "f32" || input_dtype_ == "float32") { + if (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32") { float_input_buf_.resize(static_cast(in_w) * static_cast(in_h) * 3); const size_t pix = static_cast(in_w) * static_cast(in_h); const uint8_t* p = reinterpret_cast(input_ptr); for (size_t i = 0; i < pix; ++i) { for (int c = 0; c < 3; ++c) { float x = static_cast(p[i * 3 + static_cast(c)]); - if (norm_use_mean_std_) { - const float st = std::fabs(norm_std_[static_cast(c)]) < 1e-6f ? 1.0f : norm_std_[static_cast(c)]; - x = (x - norm_mean_[static_cast(c)]) / st; + if (cfg->norm_use_mean_std) { + const float st = std::fabs(cfg->norm_std[static_cast(c)]) < 1e-6f ? 1.0f + : cfg->norm_std[static_cast(c)]; + x = (x - cfg->norm_mean[static_cast(c)]) / st; } else { - x = x * norm_scale_ + norm_bias_; + x = x * cfg->norm_scale + cfg->norm_bias; } float_input_buf_[i * 3 + static_cast(c)] = x; } @@ -600,13 +636,47 @@ private: det.img_h = src_h; det.model_name = "retinaface"; - DecodeRetinaFace(tensors, src_w, src_h, in_w, in_h, det); + std::shared_ptr> priors = GetRetinaFacePriors(cfg, in_w, in_h); + DecodeRetinaFace(tensors, src_w, src_h, in_w, in_h, *cfg, priors.get(), det); frame->face_det = std::make_shared(std::move(det)); } + struct PriorsCache { + int in_w = 0; + int in_h = 0; + const FaceDetConfigSnapshot* cfg_ptr = nullptr; + std::shared_ptr> priors; + }; + + std::shared_ptr> GetRetinaFacePriors(const std::shared_ptr& cfg, + int in_w, int in_h) { + if (!cfg || in_w <= 0 || in_h <= 0) return nullptr; + + { + std::lock_guard lock(mu_); + if (priors_cache_.priors && priors_cache_.cfg_ptr == cfg.get() && + priors_cache_.in_w == in_w && priors_cache_.in_h == in_h) { + return priors_cache_.priors; + } + } + + const std::vector built = GenerateRetinaFacePriors(in_w, in_h, cfg->steps, cfg->min_sizes); + auto sp = std::make_shared>(built); + { + std::lock_guard lock(mu_); + priors_cache_.cfg_ptr = cfg.get(); + priors_cache_.in_w = in_w; + priors_cache_.in_h = in_h; + priors_cache_.priors = sp; + return priors_cache_.priors; + } + } + void DecodeRetinaFace(const std::vector& outs, int orig_w, int orig_h, int in_w, int in_h, + const FaceDetConfigSnapshot& cfg, + const std::vector* priors_ptr, FaceDetResult& out) { // Find loc/conf/landms tensors. std::vector locs; @@ -654,12 +724,13 @@ private: NcTensor loc = Concat(locs); NcTensor conf = Concat(confs); NcTensor lmk; - if (output_landmarks_ && !landms.empty()) lmk = Concat(landms); + if (cfg.output_landmarks && !landms.empty()) lmk = Concat(landms); if (loc.n <= 0 || conf.n != loc.n) return; const int n = loc.n; - const std::vector priors = GenerateRetinaFacePriors(in_w, in_h, steps_, min_sizes_); + const std::vector empty_priors; + const std::vector& priors = priors_ptr ? *priors_ptr : empty_priors; if (!priors.empty() && static_cast(priors.size()) != n) { // Mismatch: can't reliably decode. std::cerr << "[ai_face_det] prior mismatch: priors=" << priors.size() << " n=" << n << "\n"; @@ -674,7 +745,7 @@ private: std::vector> lmks; boxes.reserve(static_cast(n)); scores.reserve(static_cast(n)); - if (output_landmarks_) lmks.reserve(static_cast(n)); + if (cfg.output_landmarks) lmks.reserve(static_cast(n)); constexpr float var0 = 0.1f; constexpr float var1 = 0.2f; @@ -688,7 +759,7 @@ private: } else { score = Softmax2(s0, s1); } - if (score < conf_thresh_) continue; + if (score < cfg.conf_thresh) continue; const Prior p = priors.empty() ? Prior{0, 0, 0, 0} : priors[static_cast(i)]; @@ -724,7 +795,7 @@ private: boxes.push_back(bb); scores.push_back(score); - if (output_landmarks_ && !lmk.data.empty() && lmk.n == n) { + if (cfg.output_landmarks && !lmk.data.empty() && lmk.n == n) { std::array pts{}; for (int k = 0; k < 5; ++k) { const float lx = lmk.data[static_cast(i) * 10 + k * 2 + 0]; @@ -741,10 +812,10 @@ private: if (boxes.empty()) return; std::vector keep; - NmsSorted(boxes, scores, nms_thresh_, keep); + NmsSorted(boxes, scores, cfg.nms_thresh, keep); if (keep.empty()) return; - const int out_n = std::min(max_faces_, static_cast(keep.size())); + const int out_n = std::min(cfg.max_faces, static_cast(keep.size())); out.faces.reserve(static_cast(out_n)); for (int i = 0; i < out_n; ++i) { const int k = keep[static_cast(i)]; @@ -752,7 +823,7 @@ private: item.bbox = boxes[static_cast(k)]; item.score = scores[static_cast(k)]; item.track_id = -1; - if (output_landmarks_ && k < static_cast(lmks.size())) { + if (cfg.output_landmarks && k < static_cast(lmks.size())) { item.has_landmarks = true; item.landmarks = lmks[static_cast(k)]; } @@ -765,23 +836,9 @@ private: std::string id_; std::string model_path_; - float conf_thresh_ = 0.6f; - float nms_thresh_ = 0.4f; - int max_faces_ = 10; - bool output_landmarks_ = true; - - std::string input_format_ = "rgb"; - - // Model input dtype: "uint8" (default) or "float32". - std::string input_dtype_ = "uint8"; - float norm_scale_ = 1.0f; - float norm_bias_ = 0.0f; - bool norm_use_mean_std_ = false; - std::array norm_mean_{{0.0f, 0.0f, 0.0f}}; - std::array norm_std_{{1.0f, 1.0f, 1.0f}}; - - std::vector steps_; - std::vector> min_sizes_; + mutable std::mutex mu_; + std::shared_ptr cfg_; + PriorsCache priors_cache_; std::shared_ptr> input_queue_; std::vector>> output_queues_; diff --git a/plugins/ai_face_recog/ai_face_recog_node.cpp b/plugins/ai_face_recog/ai_face_recog_node.cpp index f0c5b1e..2375fc9 100644 --- a/plugins/ai_face_recog/ai_face_recog_node.cpp +++ b/plugins/ai_face_recog/ai_face_recog_node.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -546,6 +547,91 @@ bool DecodeEmbedding(const AiScheduler::BorrowedOutput& /*o*/, std::vector& /*v*/) {} #endif +struct FaceRecogConfigSnapshot { + bool align = true; + bool emit_embedding = false; + int max_faces = 10; + + float thr_accept = 0.45f; + float thr_margin = 0.05f; + + std::string model_input_format = "rgb"; + std::string input_dtype = "uint8"; + + float norm_scale = 1.0f; + float norm_bias = 0.0f; + bool norm_use_mean_std = false; + std::array norm_mean{{0.0f, 0.0f, 0.0f}}; + std::array norm_std{{1.0f, 1.0f, 1.0f}}; + + std::string gallery_backend = "file"; + std::string gallery_path; + bool gallery_load_on_start = true; + int gallery_expected_dim = 512; + std::string gallery_dtype = "auto"; +}; + +static bool BuildFaceRecogConfigSnapshot(const SimpleJson& config, + const std::shared_ptr& base, + std::shared_ptr& out) { + auto snap = std::make_shared(); + if (base) *snap = *base; + + snap->align = config.ValueOr("align", snap->align); + snap->emit_embedding = config.ValueOr("emit_embedding", snap->emit_embedding); + snap->max_faces = std::max(1, config.ValueOr("max_faces", snap->max_faces)); + + if (const SimpleJson* th = config.Find("threshold"); th && th->IsObject()) { + snap->thr_accept = th->ValueOr("accept", snap->thr_accept); + snap->thr_margin = th->ValueOr("margin", snap->thr_margin); + } + + { + std::string fmt = config.ValueOr("input_format", snap->model_input_format); + for (auto& c : fmt) c = static_cast(std::tolower(static_cast(c))); + snap->model_input_format = std::move(fmt); + } + { + std::string dtype = config.ValueOr("input_dtype", snap->input_dtype); + for (auto& c : dtype) c = static_cast(std::tolower(static_cast(c))); + snap->input_dtype = std::move(dtype); + } + + if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) { + bool use_ms = false; + if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) { + for (int i = 0; i < 3; ++i) { + snap->norm_mean[static_cast(i)] = + static_cast(mean->AsArray()[static_cast(i)].AsNumber(snap->norm_mean[static_cast(i)])); + } + use_ms = true; + } + if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) { + for (int i = 0; i < 3; ++i) { + snap->norm_std[static_cast(i)] = + static_cast(st->AsArray()[static_cast(i)].AsNumber(snap->norm_std[static_cast(i)])); + } + use_ms = true; + } + snap->norm_use_mean_std = use_ms; + snap->norm_scale = norm->ValueOr("scale", snap->norm_scale); + snap->norm_bias = norm->ValueOr("bias", snap->norm_bias); + } + + if (const SimpleJson* g = config.Find("gallery"); g && g->IsObject()) { + snap->gallery_backend = g->ValueOr("backend", snap->gallery_backend); + snap->gallery_path = g->ValueOr("path", snap->gallery_path); + snap->gallery_load_on_start = g->ValueOr("load_on_start", snap->gallery_load_on_start); + snap->gallery_expected_dim = std::max(0, g->ValueOr("expected_dim", snap->gallery_expected_dim)); + snap->gallery_dtype = g->ValueOr("dtype", snap->gallery_dtype); + } + for (auto& c : snap->gallery_backend) c = static_cast(std::tolower(static_cast(c))); + for (auto& c : snap->gallery_dtype) c = static_cast(std::tolower(static_cast(c))); + + out = std::move(snap); + return true; +} + } // namespace class AiFaceRecogNode : public INode { @@ -556,50 +642,14 @@ public: bool Init(const SimpleJson& config, const NodeContext& ctx) override { id_ = config.ValueOr("id", "face_recog"); model_path_ = config.ValueOr("model_path", ""); - align_ = config.ValueOr("align", true); - emit_embedding_ = config.ValueOr("emit_embedding", false); - max_faces_ = std::max(1, config.ValueOr("max_faces", 10)); - - if (const SimpleJson* th = config.Find("threshold"); th && th->IsObject()) { - thr_accept_ = th->ValueOr("accept", thr_accept_); - thr_margin_ = th->ValueOr("margin", thr_margin_); + std::shared_ptr snap; + BuildFaceRecogConfigSnapshot(config, nullptr, snap); + { + std::lock_guard lock(mu_); + cfg_ = std::move(snap); + gallery_.reset(); } - const std::string fmt = config.ValueOr("input_format", "rgb"); - model_input_format_ = fmt; - for (auto& c : model_input_format_) c = static_cast(std::tolower(static_cast(c))); - - input_dtype_ = config.ValueOr("input_dtype", input_dtype_); - for (auto& c : input_dtype_) c = static_cast(std::tolower(static_cast(c))); - - if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) { - if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) { - for (int i = 0; i < 3; ++i) { - norm_mean_[static_cast(i)] = static_cast(mean->AsArray()[static_cast(i)].AsNumber(norm_mean_[static_cast(i)])); - } - norm_use_mean_std_ = true; - } - if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) { - for (int i = 0; i < 3; ++i) { - norm_std_[static_cast(i)] = static_cast(st->AsArray()[static_cast(i)].AsNumber(norm_std_[static_cast(i)])); - } - norm_use_mean_std_ = true; - } - norm_scale_ = norm->ValueOr("scale", norm_scale_); - norm_bias_ = norm->ValueOr("bias", norm_bias_); - } - - // Gallery - if (const SimpleJson* g = config.Find("gallery"); g && g->IsObject()) { - gallery_backend_ = g->ValueOr("backend", gallery_backend_); - gallery_path_ = g->ValueOr("path", gallery_path_); - gallery_load_on_start_ = g->ValueOr("load_on_start", gallery_load_on_start_); - gallery_expected_dim_ = std::max(0, g->ValueOr("expected_dim", gallery_expected_dim_)); - gallery_dtype_ = g->ValueOr("dtype", gallery_dtype_); - } - for (auto& c : gallery_backend_) c = static_cast(std::tolower(static_cast(c))); - for (auto& c : gallery_dtype_) c = static_cast(std::tolower(static_cast(c))); - input_queue_ = ctx.input_queue; output_queues_ = ctx.output_queues; if (!input_queue_) { @@ -636,11 +686,19 @@ public: } bool Start() override { - if (gallery_load_on_start_) { - ReloadGallery(); + std::shared_ptr cfg; + { + std::lock_guard lock(mu_); + cfg = cfg_; } - LogInfo("[ai_face_recog] start id=" + id_ + " align=" + std::string(align_ ? "true" : "false") + - " thr_accept=" + std::to_string(thr_accept_) + " thr_margin=" + std::to_string(thr_margin_)); + if (cfg && cfg->gallery_load_on_start) { + ReloadGallery(*cfg); + } + const bool align = cfg ? cfg->align : false; + const float thr_accept = cfg ? cfg->thr_accept : 0.0f; + const float thr_margin = cfg ? cfg->thr_margin : 0.0f; + LogInfo("[ai_face_recog] start id=" + id_ + " align=" + std::string(align ? "true" : "false") + + " thr_accept=" + std::to_string(thr_accept) + " thr_margin=" + std::to_string(thr_margin)); return true; } @@ -654,62 +712,31 @@ public: return false; } - align_ = new_config.ValueOr("align", align_); - emit_embedding_ = new_config.ValueOr("emit_embedding", emit_embedding_); - max_faces_ = std::max(1, new_config.ValueOr("max_faces", max_faces_)); - - if (const SimpleJson* th = new_config.Find("threshold"); th && th->IsObject()) { - thr_accept_ = th->ValueOr("accept", thr_accept_); - thr_margin_ = th->ValueOr("margin", thr_margin_); + std::shared_ptr base; + { + std::lock_guard lock(mu_); + base = cfg_; } - std::string dtype = new_config.ValueOr("input_dtype", input_dtype_); - for (auto& c : dtype) c = static_cast(std::tolower(static_cast(c))); - input_dtype_ = std::move(dtype); + std::shared_ptr snap; + BuildFaceRecogConfigSnapshot(new_config, base, snap); - if (const SimpleJson* norm = new_config.Find("normalize"); norm && norm->IsObject()) { - bool use_ms = false; - if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) { - for (int i = 0; i < 3; ++i) { - norm_mean_[static_cast(i)] = static_cast(mean->AsArray()[static_cast(i)].AsNumber(norm_mean_[static_cast(i)])); - } - use_ms = true; - } - if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) { - for (int i = 0; i < 3; ++i) { - norm_std_[static_cast(i)] = static_cast(st->AsArray()[static_cast(i)].AsNumber(norm_std_[static_cast(i)])); - } - use_ms = true; - } - norm_use_mean_std_ = use_ms; - norm_scale_ = norm->ValueOr("scale", norm_scale_); - norm_bias_ = norm->ValueOr("bias", norm_bias_); - } - - // Gallery updates bool reload = false; - if (const SimpleJson* g = new_config.Find("gallery"); g && g->IsObject()) { - std::string backend = g->ValueOr("backend", gallery_backend_); - std::string path = g->ValueOr("path", gallery_path_); - bool los = g->ValueOr("load_on_start", gallery_load_on_start_); - int expected_dim = std::max(0, g->ValueOr("expected_dim", gallery_expected_dim_)); - std::string dtype = g->ValueOr("dtype", gallery_dtype_); - - for (auto& c : backend) c = static_cast(std::tolower(static_cast(c))); - for (auto& c : dtype) c = static_cast(std::tolower(static_cast(c))); - if (backend != gallery_backend_ || path != gallery_path_ || expected_dim != gallery_expected_dim_ || dtype != gallery_dtype_) { - reload = true; - } - gallery_backend_ = std::move(backend); - gallery_path_ = std::move(path); - gallery_load_on_start_ = los; - gallery_expected_dim_ = expected_dim; - gallery_dtype_ = std::move(dtype); - } - if (reload) { - ReloadGallery(); + if (base && snap) { + reload = (snap->gallery_backend != base->gallery_backend || + snap->gallery_path != base->gallery_path || + snap->gallery_expected_dim != base->gallery_expected_dim || + snap->gallery_dtype != base->gallery_dtype); } + { + std::lock_guard lock(mu_); + cfg_ = snap; + } + + if (reload && snap) { + ReloadGallery(*snap); + } return true; } @@ -738,21 +765,21 @@ private: for (auto& q : output_queues_) q->Push(frame); } - void ReloadGallery() { - if (gallery_path_.empty()) return; + void ReloadGallery(const FaceRecogConfigSnapshot& cfg) { + if (cfg.gallery_path.empty()) return; std::string err; FaceGallery g; - g.SetExpectedDim(gallery_expected_dim_); - g.SetPreferredDtype(gallery_dtype_); + g.SetExpectedDim(cfg.gallery_expected_dim); + g.SetPreferredDtype(cfg.gallery_dtype); bool ok = false; - if (gallery_backend_ == "sqlite") { - ok = g.LoadSqliteBackend(gallery_path_, err); - } else if (gallery_backend_ == "file") { - ok = g.LoadFileBackend(gallery_path_, err); + if (cfg.gallery_backend == "sqlite") { + ok = g.LoadSqliteBackend(cfg.gallery_path, err); + } else if (cfg.gallery_backend == "file") { + ok = g.LoadFileBackend(cfg.gallery_path, err); } else { - err = "unknown gallery backend: " + gallery_backend_; + err = "unknown gallery backend: " + cfg.gallery_backend; } if (!ok) { @@ -760,9 +787,13 @@ private: return; } - gallery_ = std::move(g); - LogInfo("[ai_face_recog] gallery loaded: n=" + std::to_string(gallery_.Size()) + - " dim=" + std::to_string(gallery_.Dim())); + auto sp = std::make_shared(std::move(g)); + { + std::lock_guard lock(mu_); + gallery_ = sp; + } + LogInfo("[ai_face_recog] gallery loaded: n=" + std::to_string(sp->Size()) + + " dim=" + std::to_string(sp->Dim())); } #if defined(RK3588_ENABLE_RKNN) @@ -781,22 +812,31 @@ private: : (frame->stride > 0 ? frame->stride : w * 3); if (!src || stride <= 0) return; - const bool need_swap = (frame->format == PixelFormat::BGR && model_input_format_ == "rgb") || - (frame->format == PixelFormat::RGB && model_input_format_ == "bgr"); + std::shared_ptr cfg; + std::shared_ptr gallery; + { + std::lock_guard lock(mu_); + cfg = cfg_; + gallery = gallery_; + } + if (!cfg) return; + + const bool need_swap = (frame->format == PixelFormat::BGR && cfg->model_input_format == "rgb") || + (frame->format == PixelFormat::RGB && cfg->model_input_format == "bgr"); FaceRecogResult rr; rr.img_w = w; rr.img_h = h; rr.model_name = "arcface"; - const int limit = std::min(max_faces_, static_cast(frame->face_det->faces.size())); + const int limit = std::min(cfg->max_faces, static_cast(frame->face_det->faces.size())); rr.items.reserve(static_cast(limit)); for (int i = 0; i < limit; ++i) { const FaceDetItem& face = frame->face_det->faces[static_cast(i)]; face_buf_.resize(static_cast(model_w_) * static_cast(model_h_) * 3); - if (align_ && face.has_landmarks && model_w_ == 112 && model_h_ == 112) { + if (cfg->align && face.has_landmarks && model_w_ == 112 && model_h_ == 112) { const std::array dst = { Point2f{38.2946f, 51.6963f}, Point2f{73.5318f, 51.5014f}, @@ -820,18 +860,19 @@ private: in.height = model_h_; in.is_nhwc = true; - if (input_dtype_ == "float" || input_dtype_ == "f32" || input_dtype_ == "float32") { + if (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32") { float_input_buf_.resize(static_cast(model_w_) * static_cast(model_h_) * 3); const size_t pix = static_cast(model_w_) * static_cast(model_h_); const uint8_t* p = face_buf_.data(); for (size_t ii = 0; ii < pix; ++ii) { for (int c = 0; c < 3; ++c) { float x = static_cast(p[ii * 3 + static_cast(c)]); - if (norm_use_mean_std_) { - const float st = std::fabs(norm_std_[static_cast(c)]) < 1e-6f ? 1.0f : norm_std_[static_cast(c)]; - x = (x - norm_mean_[static_cast(c)]) / st; + if (cfg->norm_use_mean_std) { + const float st = std::fabs(cfg->norm_std[static_cast(c)]) < 1e-6f ? 1.0f + : cfg->norm_std[static_cast(c)]; + x = (x - cfg->norm_mean[static_cast(c)]) / st; } else { - x = x * norm_scale_ + norm_bias_; + x = x * cfg->norm_scale + cfg->norm_bias; } float_input_buf_[ii * 3 + static_cast(c)] = x; } @@ -859,13 +900,13 @@ private: L2Normalize(emb); FaceGallery::SearchResult sr; - if (gallery_.Size() > 0) { - sr = gallery_.SearchTop2(emb); + if (gallery && gallery->Size() > 0) { + sr = gallery->SearchTop2(emb); } const bool accept = (sr.best_person_id >= 0) && - (sr.best_sim >= thr_accept_) && - ((thr_margin_ <= 0.0f) || ((sr.best_sim - sr.second_sim) >= thr_margin_)); + (sr.best_sim >= cfg->thr_accept) && + ((cfg->thr_margin <= 0.0f) || ((sr.best_sim - sr.second_sim) >= cfg->thr_margin)); FaceRecogItem item; item.bbox = face.bbox; @@ -878,7 +919,7 @@ private: item.second_sim = sr.second_sim; item.unknown = !accept; - if (emit_embedding_) item.embedding = emb; + if (cfg->emit_embedding) item.embedding = emb; rr.items.push_back(std::move(item)); } @@ -889,28 +930,9 @@ private: std::string id_; std::string model_path_; - bool align_ = true; - bool emit_embedding_ = false; - int max_faces_ = 10; - - float thr_accept_ = 0.45f; - float thr_margin_ = 0.05f; - - std::string model_input_format_ = "rgb"; - - std::string input_dtype_ = "uint8"; - float norm_scale_ = 1.0f; - float norm_bias_ = 0.0f; - bool norm_use_mean_std_ = false; - std::array norm_mean_{{0.0f, 0.0f, 0.0f}}; - std::array norm_std_{{1.0f, 1.0f, 1.0f}}; - - std::string gallery_backend_ = "file"; - std::string gallery_path_; // base path without extension: .json / .bin - bool gallery_load_on_start_ = true; - int gallery_expected_dim_ = 512; - std::string gallery_dtype_ = "auto"; - FaceGallery gallery_; + mutable std::mutex mu_; + std::shared_ptr cfg_; + std::shared_ptr gallery_; std::shared_ptr> input_queue_; std::vector>> output_queues_; diff --git a/plugins/preprocess/preprocess_node.cpp b/plugins/preprocess/preprocess_node.cpp index b60871b..c81a219 100644 --- a/plugins/preprocess/preprocess_node.cpp +++ b/plugins/preprocess/preprocess_node.cpp @@ -334,6 +334,15 @@ private: } } + void WarnMetaResizeOnce(const FramePtr& frame, int out_w, int out_h) { + if (warned_meta_resize_) return; + if (!frame) return; + if (frame->width == out_w && frame->height == out_h) return; + if (!frame->det && !frame->face_det && !frame->face_recog) return; + warned_meta_resize_ = true; + LogWarn("[preprocess] resized frame but forwarded det/face meta without coordinate scaling; ensure det/recog/osd use same resolution (id=" + id_ + ")"); + } + void ProcessPassthrough(FramePtr frame) { PushToDownstream(frame); ++processed_; @@ -364,6 +373,10 @@ private: bool need_cvt = (src_fmt_rga != dst_fmt_rga); bool need_resize = (frame->width != out_w || frame->height != out_h); + if (need_resize) { + WarnMetaResizeOnce(frame, out_w, out_h); + } + // If no processing needed, passthrough directly if (!need_cvt && !need_resize) { PushToDownstream(frame); @@ -607,6 +620,10 @@ private: out_h = (out_h + 1) & ~1; } + if (frame->width != out_w || frame->height != out_h) { + WarnMetaResizeOnce(frame, out_w, out_h); + } + AVPixelFormat src_av_fmt = ToAvFormat(frame->format); AVPixelFormat dst_av_fmt = ToAvFormat(out_fmt); @@ -790,6 +807,8 @@ private: bool stats_log_ = false; uint64_t stats_interval_ = 100; + bool warned_meta_resize_ = false; + std::shared_ptr> input_queue_; std::vector>> output_queues_; uint64_t processed_ = 0;