#include #include #include #include #include #include #include #include #include #include #include #include #include "hw/i_infer_backend.h" #include "face/face_result.h" #include "node.h" #include "utils/dma_alloc.h" #include "utils/logger.h" namespace rk3588 { namespace { inline int ClampInt(int v, int lo, int hi) { return v < lo ? lo : (v > hi ? hi : v); } struct Prior { float cx = 0.0f; float cy = 0.0f; float w = 0.0f; float h = 0.0f; }; struct FaceDetConfigSnapshot { float conf_thresh = 0.6f; float nms_thresh = 0.4f; int max_faces = 10; bool output_landmarks = true; std::string input_format = "rgb"; std::string input_dtype = "uint8"; float norm_scale = 1.0f; float norm_bias = 0.0f; bool norm_use_mean_std = false; std::array norm_mean{{0.0f, 0.0f, 0.0f}}; std::array norm_std{{1.0f, 1.0f, 1.0f}}; // RetinaFace priors defaults for 320 input (MobileNet0.25). std::vector steps{8, 16, 32}; std::vector> min_sizes{{16, 32}, {64, 128}, {256, 512}}; }; static bool BuildFaceDetConfigSnapshot(const SimpleJson& config, const std::shared_ptr& base, std::shared_ptr& out) { auto snap = std::make_shared(); if (base) *snap = *base; snap->conf_thresh = config.ValueOr("conf", snap->conf_thresh); snap->nms_thresh = config.ValueOr("nms", snap->nms_thresh); snap->max_faces = std::max(1, config.ValueOr("max_faces", snap->max_faces)); snap->output_landmarks = config.ValueOr("output_landmarks", snap->output_landmarks); { std::string fmt = config.ValueOr("input_format", snap->input_format); for (auto& c : fmt) c = static_cast(std::tolower(static_cast(c))); snap->input_format = std::move(fmt); } { std::string dtype = config.ValueOr("input_dtype", snap->input_dtype); for (auto& c : dtype) c = static_cast(std::tolower(static_cast(c))); snap->input_dtype = std::move(dtype); } if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) { bool use_ms = false; if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) { for (int i = 0; i < 3; ++i) { snap->norm_mean[static_cast(i)] = static_cast(mean->AsArray()[static_cast(i)].AsNumber(snap->norm_mean[static_cast(i)])); } use_ms = true; } if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) { for (int i = 0; i < 3; ++i) { snap->norm_std[static_cast(i)] = static_cast(st->AsArray()[static_cast(i)].AsNumber(snap->norm_std[static_cast(i)])); } use_ms = true; } snap->norm_use_mean_std = use_ms; snap->norm_scale = norm->ValueOr("scale", snap->norm_scale); snap->norm_bias = norm->ValueOr("bias", snap->norm_bias); } if (const SimpleJson* pri = config.Find("prior"); pri && pri->IsObject()) { std::vector new_steps = snap->steps; std::vector> new_mins = snap->min_sizes; if (const SimpleJson* steps = pri->Find("steps"); steps && steps->IsArray()) { std::vector tmp; for (const auto& v : steps->AsArray()) tmp.push_back(std::max(1, v.AsInt(1))); if (!tmp.empty()) new_steps = std::move(tmp); } if (const SimpleJson* mins = pri->Find("min_sizes"); mins && mins->IsArray()) { std::vector> tmp; for (const auto& grp : mins->AsArray()) { std::vector g; for (const auto& v : grp.AsArray()) g.push_back(std::max(1, v.AsInt(1))); if (!g.empty()) tmp.push_back(std::move(g)); } if (!tmp.empty()) new_mins = std::move(tmp); } if (!new_steps.empty() && !new_mins.empty() && new_steps.size() == new_mins.size()) { snap->steps = std::move(new_steps); snap->min_sizes = std::move(new_mins); } else { // Best-effort: keep previous priors to avoid per-frame mismatch. if (base) { snap->steps = base->steps; snap->min_sizes = base->min_sizes; } } } out = std::move(snap); return true; } float IoU(const Rect& a, const Rect& b) { const float ax1 = a.x; const float ay1 = a.y; const float ax2 = a.x + a.w; const float ay2 = a.y + a.h; const float bx1 = b.x; const float by1 = b.y; const float bx2 = b.x + b.w; const float by2 = b.y + b.h; const float ix1 = std::max(ax1, bx1); const float iy1 = std::max(ay1, by1); const float ix2 = std::min(ax2, bx2); const float iy2 = std::min(ay2, by2); const float iw = std::max(0.0f, ix2 - ix1); const float ih = std::max(0.0f, iy2 - iy1); const float inter = iw * ih; const float ua = a.w * a.h + b.w * b.h - inter; return ua <= 0.0f ? 0.0f : (inter / ua); } void NmsSorted(const std::vector& boxes, const std::vector& scores, float nms_thresh, std::vector& keep) { keep.clear(); std::vector order(scores.size()); std::iota(order.begin(), order.end(), 0); std::sort(order.begin(), order.end(), [&](int a, int b) { return scores[a] > scores[b]; }); for (int idx : order) { bool suppressed = false; for (int kept : keep) { if (IoU(boxes[idx], boxes[kept]) > nms_thresh) { suppressed = true; break; } } if (!suppressed) keep.push_back(idx); } } inline float Sigmoid(float x) { return 1.0f / (1.0f + std::exp(-x)); } inline float Softmax2(float a, float b) { const float m = std::max(a, b); const float ea = std::exp(a - m); const float eb = std::exp(b - m); return eb / (ea + eb); } inline float HalfToFloat(uint16_t h) { const uint32_t sign = (static_cast(h & 0x8000u)) << 16; uint32_t exp = (h & 0x7C00u) >> 10; uint32_t mant = (h & 0x03FFu); uint32_t f = 0; if (exp == 0) { if (mant == 0) { f = sign; } else { // Subnormal exp = 1; while ((mant & 0x0400u) == 0) { mant <<= 1; --exp; } mant &= 0x03FFu; exp = exp + (127 - 15); f = sign | (exp << 23) | (mant << 13); } } else if (exp == 31) { // Inf/NaN f = sign | 0x7F800000u | (mant << 13); } else { exp = exp + (127 - 15); f = sign | (exp << 23) | (mant << 13); } float out; memcpy(&out, &f, sizeof(out)); return out; } template inline float Dequant(T q, int32_t zp, float scale) { return (static_cast(q) - static_cast(zp)) * scale; } struct Tensor { const uint8_t* data = nullptr; size_t size = 0; int32_t zp = 0; float scale = 1.0f; std::vector dims; #if defined(RK3588_ENABLE_RKNN) rknn_tensor_type type = RKNN_TENSOR_UINT8; #endif }; struct NcTensor { int n = 0; int c = 0; std::vector data; // N*C row-major }; bool ExtractNc(const Tensor& t, int c, NcTensor& out) { out = {}; out.c = c; if (!t.data || t.size == 0) return false; size_t elem_size = 1; bool is_float32 = false; bool is_float16 = false; #if defined(RK3588_ENABLE_RKNN) if (t.type == RKNN_TENSOR_FLOAT16) { elem_size = 2; is_float16 = true; } if (t.type == RKNN_TENSOR_FLOAT32) { elem_size = 4; is_float32 = true; } #endif const size_t elem_cnt = elem_size > 0 ? (t.size / elem_size) : 0; if (elem_cnt == 0) return false; int n = 0; bool transposed = false; if (t.dims.size() == 3) { // Common: [1, C, N] or [1, N, C] const uint32_t d1 = t.dims[1]; const uint32_t d2 = t.dims[2]; if (static_cast(d2) == c) { n = static_cast(d1); transposed = false; // NxC } else if (static_cast(d1) == c) { n = static_cast(d2); transposed = true; // CxN } else { return false; // 明确拒绝，不 fallback } } else if (t.dims.size() == 2) { // [N, C] or [C, N] const uint32_t d0 = t.dims[0]; const uint32_t d1 = t.dims[1]; if (static_cast(d1) == c) { n = static_cast(d0); transposed = false; } else if (static_cast(d0) == c) { n = static_cast(d1); transposed = true; } } if (n <= 0) { if (elem_cnt % static_cast(c) != 0) return false; n = static_cast(elem_cnt / static_cast(c)); transposed = false; } if (static_cast(n) * static_cast(c) != elem_cnt) { return false; } out.n = n; out.data.resize(static_cast(n) * static_cast(c)); auto ReadElem = [&](size_t idx) -> float { if (is_float32) { const float* fp = reinterpret_cast(t.data); return fp[idx]; } #if defined(RK3588_ENABLE_RKNN) if (is_float16) { const uint16_t* hp = reinterpret_cast(t.data); return HalfToFloat(hp[idx]); } if (t.type == RKNN_TENSOR_INT8) { const int8_t* p = reinterpret_cast(t.data); return Dequant(p[idx], t.zp, t.scale); } #endif const uint8_t* p = reinterpret_cast(t.data); return Dequant(p[idx], t.zp, t.scale); }; if (!transposed) { for (size_t i = 0; i < out.data.size(); ++i) { out.data[i] = ReadElem(i); } } else { // Input is [C, N] contiguous. Transpose to [N, C]. for (int ci = 0; ci < c; ++ci) { for (int ni = 0; ni < n; ++ni) { const size_t src_idx = static_cast(ci) * static_cast(n) + static_cast(ni); const size_t dst_idx = static_cast(ni) * static_cast(c) + static_cast(ci); out.data[dst_idx] = ReadElem(src_idx); } } } return true; } std::vector GenerateRetinaFacePriors(int in_w, int in_h, const std::vector& steps, const std::vector>& min_sizes) { std::vector priors; if (steps.empty() || steps.size() != min_sizes.size()) return priors; priors.reserve(5000); for (size_t s = 0; s < steps.size(); ++s) { const int step = steps[s]; const int fm_w = in_w / step; const int fm_h = in_h / step; for (int i = 0; i < fm_h; ++i) { for (int j = 0; j < fm_w; ++j) { for (int ms : min_sizes[s]) { const float s_kx = static_cast(ms) / static_cast(in_w); const float s_ky = static_cast(ms) / static_cast(in_h); const float cx = (static_cast(j) + 0.5f) * static_cast(step) / static_cast(in_w); const float cy = (static_cast(i) + 0.5f) * static_cast(step) / static_cast(in_h); priors.push_back(Prior{cx, cy, s_kx, s_ky}); } } } } return priors; } void ResizeRgbBilinear(const uint8_t* src, int src_w, int src_h, int src_stride, uint8_t* dst, int dst_w, int dst_h, bool swap_rb) { const float scale_x = static_cast(src_w) / static_cast(dst_w); const float scale_y = static_cast(src_h) / static_cast(dst_h); for (int y = 0; y < dst_h; ++y) { const float fy = (static_cast(y) + 0.5f) * scale_y - 0.5f; int y0 = static_cast(std::floor(fy)); int y1 = y0 + 1; const float wy1 = fy - static_cast(y0); const float wy0 = 1.0f - wy1; y0 = ClampInt(y0, 0, src_h - 1); y1 = ClampInt(y1, 0, src_h - 1); const uint8_t* row0 = src + static_cast(y0) * static_cast(src_stride); const uint8_t* row1 = src + static_cast(y1) * static_cast(src_stride); uint8_t* out = dst + static_cast(y) * static_cast(dst_w) * 3; for (int x = 0; x < dst_w; ++x) { const float fx = (static_cast(x) + 0.5f) * scale_x - 0.5f; int x0 = static_cast(std::floor(fx)); int x1 = x0 + 1; const float wx1 = fx - static_cast(x0); const float wx0 = 1.0f - wx1; x0 = ClampInt(x0, 0, src_w - 1); x1 = ClampInt(x1, 0, src_w - 1); const uint8_t* p00 = row0 + x0 * 3; const uint8_t* p01 = row0 + x1 * 3; const uint8_t* p10 = row1 + x0 * 3; const uint8_t* p11 = row1 + x1 * 3; for (int c = 0; c < 3; ++c) { const float v = (static_cast(p00[c]) * wx0 + static_cast(p01[c]) * wx1) * wy0 + (static_cast(p10[c]) * wx0 + static_cast(p11[c]) * wx1) * wy1; out[c] = static_cast(ClampInt(static_cast(v + 0.5f), 0, 255)); } if (swap_rb) { std::swap(out[0], out[2]); } out += 3; } } } } // namespace class AiFaceDetNode : public INode { public: std::string Id() const override { return id_; } std::string Type() const override { return "ai_face_det"; } bool Init(const SimpleJson& config, const NodeContext& ctx) override { id_ = config.ValueOr("id", "face_det"); model_path_ = config.ValueOr("model_path", ""); std::shared_ptr snap; BuildFaceDetConfigSnapshot(config, nullptr, snap); { std::lock_guard lock(mu_); cfg_ = std::move(snap); #if defined(RK3588_ENABLE_RKNN) priors_cache_ = {}; #endif } input_queue_ = ctx.input_queue; output_queues_ = ctx.output_queues; if (!input_queue_) { LogError("[ai_face_det] no input queue for node " + id_); return false; } if (output_queues_.empty()) { LogError("[ai_face_det] no output queue for node " + id_); return false; } infer_backend_ = ctx.infer_backend; if (!infer_backend_) { LogError("[ai_face_det] no infer backend for node " + id_); return false; } #if defined(RK3588_ENABLE_RKNN) if (model_path_.empty()) { LogError("[ai_face_det] model_path is required"); return false; } std::string err; model_handle_ = infer_backend_->LoadModel(model_path_, err); if (model_handle_ == kInvalidModelHandle) { LogError("[ai_face_det] failed to load model: " + err); return false; } ModelInfo info; if (infer_backend_->GetModelInfo(model_handle_, info)) { model_w_ = info.input_width; model_h_ = info.input_height; n_output_ = info.n_output; } LogInfo("[ai_face_det] model loaded: " + model_path_ + " (" + std::to_string(model_w_) + "x" + std::to_string(model_h_) + ", outputs=" + std::to_string(n_output_) + ")"); #else LogWarn("[ai_face_det] RKNN disabled, will passthrough frames"); #endif return true; } bool Start() override { std::shared_ptr cfg; { std::lock_guard lock(mu_); cfg = cfg_; } const float conf = cfg ? cfg->conf_thresh : 0.0f; const float nms = cfg ? cfg->nms_thresh : 0.0f; const int max_faces = cfg ? cfg->max_faces : 0; LogInfo("[ai_face_det] start id=" + id_ + " conf=" + std::to_string(conf) + " nms=" + std::to_string(nms) + " max_faces=" + std::to_string(max_faces)); return true; } bool UpdateConfig(const SimpleJson& new_config) override { const std::string new_id = new_config.ValueOr("id", id_); if (!new_id.empty() && new_id != id_) return false; const std::string new_model = new_config.ValueOr("model_path", model_path_); if (new_model != model_path_) { // Changing model requires graph rebuild. return false; } std::shared_ptr base; { std::lock_guard lock(mu_); base = cfg_; } std::shared_ptr snap; BuildFaceDetConfigSnapshot(new_config, base, snap); { std::lock_guard lock(mu_); cfg_ = std::move(snap); #if defined(RK3588_ENABLE_RKNN) priors_cache_ = {}; #endif } return true; } void Stop() override { #if defined(RK3588_ENABLE_RKNN) if (model_handle_ != kInvalidModelHandle) { infer_backend_->UnloadModel(model_handle_); model_handle_ = kInvalidModelHandle; } #endif LogInfo("[ai_face_det] stop id=" + id_); } NodeStatus Process(FramePtr frame) override { if (!frame) return NodeStatus::DROP; #if defined(RK3588_ENABLE_RKNN) Run(frame); #endif Push(frame); return NodeStatus::OK; } private: void Push(FramePtr frame) { for (auto& q : output_queues_) q->Push(frame); } #if defined(RK3588_ENABLE_RKNN) void Run(FramePtr frame) { if (!frame->data || frame->data_size == 0) return; if (frame->format != PixelFormat::RGB && frame->format != PixelFormat::BGR) { LogWarn("[ai_face_det] input must be RGB/BGR"); return; } const int src_w = frame->width; const int src_h = frame->height; const size_t src_row = static_cast(src_w) * 3; const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data; const int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride : (frame->stride > 0 ? frame->stride : static_cast(src_row)); if (!src || src_stride <= 0) return; std::shared_ptr cfg; { std::lock_guard lock(mu_); cfg = cfg_; } if (!cfg) return; const bool need_swap = (frame->format == PixelFormat::BGR && cfg->input_format == "rgb") || (frame->format == PixelFormat::RGB && cfg->input_format == "bgr"); const bool want_float_input = (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32"); const int in_w = model_w_ > 0 ? model_w_ : src_w; const int in_h = model_h_ > 0 ? model_h_ : src_h; const size_t in_size = static_cast(in_w) * static_cast(in_h) * 3; const uint8_t* input_ptr = nullptr; // Fast path: already packed, correct size, no channel swap. const bool fast_path = (!need_swap && src_w == in_w && src_h == in_h && static_cast(src_stride) == src_row && frame->data_size >= src_row * static_cast(src_h)); const bool sync_src = (frame->DmaFd() >= 0) && (want_float_input || !fast_path); if (sync_src) frame->SyncStart(); if (fast_path) { input_ptr = src; } else { input_buf_.resize(in_size); if (src_w == in_w && src_h == in_h && static_cast(src_stride) == src_row) { memcpy(input_buf_.data(), src, in_size); if (need_swap) { for (size_t i = 0; i < in_size; i += 3) { std::swap(input_buf_[i], input_buf_[i + 2]); } } } else { ResizeRgbBilinear(src, src_w, src_h, src_stride, input_buf_.data(), in_w, in_h, need_swap); } input_ptr = input_buf_.data(); } InferInput input; input.width = in_w; input.height = in_h; input.is_nhwc = true; // Default: keep existing UINT8 behavior. if (want_float_input) { float_input_buf_.resize(static_cast(in_w) * static_cast(in_h) * 3); const size_t pix = static_cast(in_w) * static_cast(in_h); const uint8_t* p = reinterpret_cast(input_ptr); for (size_t i = 0; i < pix; ++i) { for (int c = 0; c < 3; ++c) { float x = static_cast(p[i * 3 + static_cast(c)]); if (cfg->norm_use_mean_std) { const float st = std::fabs(cfg->norm_std[static_cast(c)]) < 1e-6f ? 1.0f : cfg->norm_std[static_cast(c)]; x = (x - cfg->norm_mean[static_cast(c)]) / st; } else { x = x * cfg->norm_scale + cfg->norm_bias; } float_input_buf_[i * 3 + static_cast(c)] = x; } } input.data = float_input_buf_.data(); input.size = float_input_buf_.size() * sizeof(float); input.type = RKNN_TENSOR_FLOAT32; } else { input.data = input_ptr; input.size = in_size; input.type = RKNN_TENSOR_UINT8; } if (sync_src) frame->SyncEnd(); auto r = infer_backend_->InferBorrowed(model_handle_, input); if (!r.success) { LogWarn("[ai_face_det] inference failed: " + r.error); return; } std::vector tensors; tensors.reserve(r.outputs.size()); for (const auto& o : r.outputs) { Tensor t; t.data = o.data; t.size = o.size; t.zp = o.zp; t.scale = o.scale; t.dims = o.dims; t.type = o.type; tensors.push_back(std::move(t)); } FaceDetResult det; det.img_w = src_w; det.img_h = src_h; det.model_name = "retinaface"; std::shared_ptr> priors = GetRetinaFacePriors(cfg, in_w, in_h); DecodeRetinaFace(tensors, src_w, src_h, in_w, in_h, *cfg, priors.get(), det); frame->face_det = std::make_shared(std::move(det)); } struct PriorsCache { int in_w = 0; int in_h = 0; const FaceDetConfigSnapshot* cfg_ptr = nullptr; std::shared_ptr> priors; }; std::shared_ptr> GetRetinaFacePriors(const std::shared_ptr& cfg, int in_w, int in_h) { if (!cfg || in_w <= 0 || in_h <= 0) return nullptr; { std::lock_guard lock(mu_); if (priors_cache_.priors && priors_cache_.cfg_ptr == cfg.get() && priors_cache_.in_w == in_w && priors_cache_.in_h == in_h) { return priors_cache_.priors; } } const std::vector built = GenerateRetinaFacePriors(in_w, in_h, cfg->steps, cfg->min_sizes); auto sp = std::make_shared>(built); { std::lock_guard lock(mu_); priors_cache_.cfg_ptr = cfg.get(); priors_cache_.in_w = in_w; priors_cache_.in_h = in_h; priors_cache_.priors = sp; return priors_cache_.priors; } } void DecodeRetinaFace(const std::vector& outs, int orig_w, int orig_h, int in_w, int in_h, const FaceDetConfigSnapshot& cfg, const std::vector* priors_ptr, FaceDetResult& out) { // Find loc/conf/landms tensors. std::vector locs; std::vector confs; std::vector landms; locs.reserve(4); confs.reserve(4); landms.reserve(4); for (const auto& t : outs) { NcTensor tmp; if (ExtractNc(t, 4, tmp)) { locs.push_back(std::move(tmp)); continue; } if (ExtractNc(t, 2, tmp)) { confs.push_back(std::move(tmp)); continue; } if (ExtractNc(t, 10, tmp)) { landms.push_back(std::move(tmp)); continue; } } if (locs.empty() || confs.empty()) return; // Concatenate along N. auto Concat = [](const std::vector& parts) -> NcTensor { NcTensor all; if (parts.empty()) return all; all.c = parts[0].c; int total_n = 0; for (const auto& p : parts) total_n += p.n; all.n = total_n; all.data.resize(static_cast(all.n) * static_cast(all.c)); size_t off = 0; for (const auto& p : parts) { if (p.c != all.c) continue; memcpy(all.data.data() + off, p.data.data(), p.data.size() * sizeof(float)); off += p.data.size(); } return all; }; NcTensor loc = Concat(locs); NcTensor conf = Concat(confs); NcTensor lmk; if (cfg.output_landmarks && !landms.empty()) lmk = Concat(landms); if (loc.n <= 0 || conf.n != loc.n) return; const int n = loc.n; const std::vector empty_priors; const std::vector& priors = priors_ptr ? *priors_ptr : empty_priors; if (!priors.empty() && static_cast(priors.size()) != n) { // Mismatch: can't reliably decode. LogWarn("[ai_face_det] prior mismatch: priors=" + std::to_string(priors.size()) + " n=" + std::to_string(n)); return; } const float sx = static_cast(orig_w) / static_cast(in_w); const float sy = static_cast(orig_h) / static_cast(in_h); std::vector boxes; std::vector scores; std::vector> lmks; boxes.reserve(static_cast(n)); scores.reserve(static_cast(n)); if (cfg.output_landmarks) lmks.reserve(static_cast(n)); constexpr float var0 = 0.1f; constexpr float var1 = 0.2f; for (int i = 0; i < n; ++i) { const float s0 = conf.data[static_cast(i) * 2 + 0]; const float s1 = conf.data[static_cast(i) * 2 + 1]; float score; if (s0 >= 0.0f && s0 <= 1.0f && s1 >= 0.0f && s1 <= 1.0f && std::fabs((s0 + s1) - 1.0f) < 0.1f) { score = s1; } else { score = Softmax2(s0, s1); } if (score < cfg.conf_thresh) continue; const Prior p = priors.empty() ? Prior{0, 0, 0, 0} : priors[static_cast(i)]; const float dx = loc.data[static_cast(i) * 4 + 0]; const float dy = loc.data[static_cast(i) * 4 + 1]; const float dw = loc.data[static_cast(i) * 4 + 2]; const float dh = loc.data[static_cast(i) * 4 + 3]; const float cx = p.cx + dx * var0 * p.w; const float cy = p.cy + dy * var0 * p.h; const float ww = p.w * std::exp(dw * var1); const float hh = p.h * std::exp(dh * var1); float x1 = (cx - ww * 0.5f) * static_cast(in_w); float y1 = (cy - hh * 0.5f) * static_cast(in_h); float x2 = (cx + ww * 0.5f) * static_cast(in_w); float y2 = (cy + hh * 0.5f) * static_cast(in_h); x1 *= sx; x2 *= sx; y1 *= sy; y2 *= sy; Rect bb; bb.x = static_cast(ClampInt(static_cast(x1), 0, orig_w - 1)); bb.y = static_cast(ClampInt(static_cast(y1), 0, orig_h - 1)); const float rx2 = static_cast(ClampInt(static_cast(x2), 0, orig_w - 1)); const float ry2 = static_cast(ClampInt(static_cast(y2), 0, orig_h - 1)); bb.w = std::max(0.0f, rx2 - bb.x); bb.h = std::max(0.0f, ry2 - bb.y); if (bb.w <= 1.0f || bb.h <= 1.0f) continue; boxes.push_back(bb); scores.push_back(score); if (cfg.output_landmarks && !lmk.data.empty() && lmk.n == n) { std::array pts{}; for (int k = 0; k < 5; ++k) { const float lx = lmk.data[static_cast(i) * 10 + k * 2 + 0]; const float ly = lmk.data[static_cast(i) * 10 + k * 2 + 1]; const float px = (p.cx + lx * var0 * p.w) * static_cast(in_w) * sx; const float py = (p.cy + ly * var0 * p.h) * static_cast(in_h) * sy; pts[k].x = static_cast(ClampInt(static_cast(px), 0, orig_w - 1)); pts[k].y = static_cast(ClampInt(static_cast(py), 0, orig_h - 1)); } lmks.push_back(pts); } } if (boxes.empty()) return; std::vector keep; NmsSorted(boxes, scores, cfg.nms_thresh, keep); if (keep.empty()) return; const int out_n = std::min(cfg.max_faces, static_cast(keep.size())); out.faces.reserve(static_cast(out_n)); for (int i = 0; i < out_n; ++i) { const int k = keep[static_cast(i)]; FaceDetItem item; item.bbox = boxes[static_cast(k)]; item.score = scores[static_cast(k)]; item.track_id = -1; if (cfg.output_landmarks && k < static_cast(lmks.size())) { item.has_landmarks = true; item.landmarks = lmks[static_cast(k)]; } out.faces.push_back(std::move(item)); } } #endif std::string id_; std::string model_path_; mutable std::mutex mu_; std::shared_ptr cfg_; #if defined(RK3588_ENABLE_RKNN) PriorsCache priors_cache_; #endif std::shared_ptr> input_queue_; std::vector>> output_queues_; std::shared_ptr infer_backend_; std::vector input_buf_; std::vector float_input_buf_; ModelHandle model_handle_ = kInvalidModelHandle; int model_w_ = 320; int model_h_ = 320; uint32_t n_output_ = 0; }; REGISTER_NODE(AiFaceDetNode, "ai_face_det"); } // namespace rk3588