#include #include #include #include #include #include #include #include #include #include #include #include #include #include "ai_scheduler.h" #include "face/face_result.h" #include "node.h" #include "utils/dma_alloc.h" #include "utils/logger.h" #if defined(RK3588_ENABLE_SQLITE3) #include #endif namespace rk3588 { namespace { inline int ClampInt(int v, int lo, int hi) { return v < lo ? lo : (v > hi ? hi : v); } bool ReadFileToString(const std::string& path, std::string& out) { std::ifstream ifs(path, std::ios::binary); if (!ifs) return false; ifs.seekg(0, std::ios::end); std::streamsize sz = ifs.tellg(); if (sz < 0) return false; ifs.seekg(0, std::ios::beg); out.resize(static_cast(sz)); if (sz == 0) return true; ifs.read(&out[0], sz); return ifs.good(); } struct GalleryEntry { int person_id = -1; std::string name; std::vector emb; // L2 normalized }; inline float HalfToFloat(uint16_t h) { const uint32_t sign = (static_cast(h & 0x8000u)) << 16; uint32_t exp = (h & 0x7C00u) >> 10; uint32_t mant = (h & 0x03FFu); uint32_t f = 0; if (exp == 0) { if (mant == 0) { f = sign; } else { exp = 1; while ((mant & 0x0400u) == 0) { mant <<= 1; --exp; } mant &= 0x03FFu; exp = exp + (127 - 15); f = sign | (exp << 23) | (mant << 13); } } else if (exp == 31) { f = sign | 0x7F800000u | (mant << 13); } else { exp = exp + (127 - 15); f = sign | (exp << 23) | (mant << 13); } float out; memcpy(&out, &f, sizeof(out)); return out; } class FaceGallery { public: void SetExpectedDim(int dim) { expected_dim_ = dim; } void SetPreferredDtype(std::string dtype) { for (auto& c : dtype) c = static_cast(std::tolower(static_cast(c))); preferred_dtype_ = std::move(dtype); } bool LoadSqliteBackend(const std::string& db_path, std::string& err) { #if defined(RK3588_ENABLE_SQLITE3) entries_.clear(); dim_ = 0; sqlite3* db = nullptr; if (sqlite3_open_v2(db_path.c_str(), &db, SQLITE_OPEN_READONLY, nullptr) != SQLITE_OK || !db) { err = "failed to open sqlite db: " + db_path; if (db) sqlite3_close(db); return false; } const char* sql = "SELECT p.id, p.name, e.emb " "FROM embedding e JOIN person p ON e.person_id = p.id"; sqlite3_stmt* stmt = nullptr; if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK || !stmt) { err = "sqlite prepare failed"; if (stmt) sqlite3_finalize(stmt); sqlite3_close(db); return false; } while (sqlite3_step(stmt) == SQLITE_ROW) { const int person_id = sqlite3_column_int(stmt, 0); const unsigned char* name_u8 = sqlite3_column_text(stmt, 1); const void* blob = sqlite3_column_blob(stmt, 2); const int blob_sz = sqlite3_column_bytes(stmt, 2); if (!blob || blob_sz <= 0) continue; const int expected_dim = expected_dim_; int dim = 0; enum class BlobType { F16, F32, F64 } blob_type = BlobType::F32; if (expected_dim > 0) { if (blob_sz == expected_dim * 4) { dim = expected_dim; blob_type = BlobType::F32; } else if (blob_sz == expected_dim * 2) { dim = expected_dim; blob_type = BlobType::F16; } else if (blob_sz == expected_dim * 8) { dim = expected_dim; blob_type = BlobType::F64; } else { continue; } } else { if ((blob_sz % 4) != 0) continue; dim = blob_sz / 4; blob_type = BlobType::F32; } // Optional dtype preference (only affects ambiguous cases when expected_dim==0). if (expected_dim <= 0) { if (preferred_dtype_ == "f16" && (blob_sz % 2) == 0) { blob_type = BlobType::F16; dim = blob_sz / 2; } else if (preferred_dtype_ == "f64" && (blob_sz % 8) == 0) { blob_type = BlobType::F64; dim = blob_sz / 8; } } if (dim_ == 0) dim_ = dim; if (dim != dim_) continue; GalleryEntry e; e.person_id = person_id; e.name = name_u8 ? reinterpret_cast(name_u8) : std::string{}; e.emb.resize(static_cast(dim_)); if (blob_type == BlobType::F32) { memcpy(e.emb.data(), blob, static_cast(dim_) * sizeof(float)); } else if (blob_type == BlobType::F16) { const uint16_t* hp = reinterpret_cast(blob); for (int i = 0; i < dim_; ++i) e.emb[static_cast(i)] = HalfToFloat(hp[i]); } else { const double* dp = reinterpret_cast(blob); for (int i = 0; i < dim_; ++i) e.emb[static_cast(i)] = static_cast(dp[i]); } L2Normalize(e.emb); entries_.push_back(std::move(e)); } sqlite3_finalize(stmt); sqlite3_close(db); return true; #else (void)db_path; err = "sqlite3 support not enabled at build time"; return false; #endif } bool LoadFileBackend(const std::string& base_path, std::string& err) { entries_.clear(); dim_ = 0; const std::string json_path = base_path + ".json"; const std::string bin_path = base_path + ".bin"; std::string json_text; if (!ReadFileToString(json_path, json_text)) { err = "failed to read " + json_path; return false; } SimpleJson root; std::string jerr; if (!ParseSimpleJson(json_text, root, jerr) || !root.IsObject()) { err = "invalid json: " + jerr; return false; } dim_ = root.ValueOr("dim", 0); if (dim_ <= 0) { err = "gallery dim missing"; return false; } const SimpleJson* persons = root.Find("persons"); if (!persons || !persons->IsArray()) { err = "gallery persons missing"; return false; } const size_t n = persons->AsArray().size(); if (n == 0) { // Empty gallery is valid. return true; } std::ifstream ifs(bin_path, std::ios::binary); if (!ifs) { err = "failed to open " + bin_path; return false; } const size_t total_floats = n * static_cast(dim_); std::vector buf(total_floats); ifs.read(reinterpret_cast(buf.data()), static_cast(total_floats * sizeof(float))); if (!ifs.good()) { err = "failed to read embeddings from " + bin_path; return false; } entries_.reserve(n); for (size_t i = 0; i < n; ++i) { const SimpleJson& p = persons->AsArray()[i]; GalleryEntry e; e.person_id = p.ValueOr("id", -1); e.name = p.ValueOr("name", ""); e.emb.resize(static_cast(dim_)); memcpy(e.emb.data(), buf.data() + i * static_cast(dim_), static_cast(dim_) * sizeof(float)); L2Normalize(e.emb); entries_.push_back(std::move(e)); } return true; } int Dim() const { return dim_; } size_t Size() const { return entries_.size(); } struct SearchResult { int best_person_id = -1; std::string best_name; float best_sim = 0.0f; float second_sim = 0.0f; }; SearchResult SearchTop2(const std::vector& emb_normed) const { SearchResult r; if (entries_.empty() || dim_ <= 0) return r; if (static_cast(emb_normed.size()) != dim_) return r; float best = -std::numeric_limits::infinity(); float second = -std::numeric_limits::infinity(); int best_idx = -1; for (size_t i = 0; i < entries_.size(); ++i) { const float sim = Dot(emb_normed, entries_[i].emb); if (sim > best) { second = best; best = sim; best_idx = static_cast(i); } else if (sim > second) { second = sim; } } if (best_idx >= 0) { r.best_person_id = entries_[static_cast(best_idx)].person_id; r.best_name = entries_[static_cast(best_idx)].name; r.best_sim = best; r.second_sim = std::isfinite(second) ? second : 0.0f; } return r; } private: static float Dot(const std::vector& a, const std::vector& b) { float s = 0.0f; for (size_t i = 0; i < a.size(); ++i) s += a[i] * b[i]; return s; } static void L2Normalize(std::vector& v) { double ss = 0.0; for (float x : v) ss += static_cast(x) * static_cast(x); const double norm = std::sqrt(ss); if (norm <= 0.0) return; const float inv = static_cast(1.0 / norm); for (float& x : v) x *= inv; } int dim_ = 0; int expected_dim_ = 512; std::string preferred_dtype_ = "auto"; std::vector entries_; }; struct SimilarityTransform { // x' = a*x - b*y + c // y' = b*x + a*y + d float a = 1.0f; float b = 0.0f; float c = 0.0f; float d = 0.0f; }; bool Solve4x4(float A[4][4], float b[4], float x[4]) { // Gaussian elimination with partial pivoting. for (int i = 0; i < 4; ++i) { int pivot = i; float best = std::fabs(A[i][i]); for (int r = i + 1; r < 4; ++r) { float v = std::fabs(A[r][i]); if (v > best) { best = v; pivot = r; } } if (best < 1e-8f) return false; if (pivot != i) { for (int c = i; c < 4; ++c) std::swap(A[i][c], A[pivot][c]); std::swap(b[i], b[pivot]); } const float diag = A[i][i]; for (int c = i; c < 4; ++c) A[i][c] /= diag; b[i] /= diag; for (int r = 0; r < 4; ++r) { if (r == i) continue; const float f = A[r][i]; if (std::fabs(f) < 1e-8f) continue; for (int c = i; c < 4; ++c) A[r][c] -= f * A[i][c]; b[r] -= f * b[i]; } } for (int i = 0; i < 4; ++i) x[i] = b[i]; return true; } bool ComputeSimilarity(const std::array& src, const std::array& dst, SimilarityTransform& out) { // Least squares on similarity model. float ATA[4][4] = {}; float ATb[4] = {}; auto Acc = [&](const float row[4], float rhs) { for (int i = 0; i < 4; ++i) { ATb[i] += row[i] * rhs; for (int j = 0; j < 4; ++j) { ATA[i][j] += row[i] * row[j]; } } }; for (int i = 0; i < 5; ++i) { const float x = src[static_cast(i)].x; const float y = src[static_cast(i)].y; const float u = dst[static_cast(i)].x; const float v = dst[static_cast(i)].y; const float r1[4] = {x, -y, 1.0f, 0.0f}; const float r2[4] = {y, x, 0.0f, 1.0f}; Acc(r1, u); Acc(r2, v); } float A[4][4]; float b[4]; for (int i = 0; i < 4; ++i) { b[i] = ATb[i]; for (int j = 0; j < 4; ++j) A[i][j] = ATA[i][j]; } float x[4]; if (!Solve4x4(A, b, x)) return false; out.a = x[0]; out.b = x[1]; out.c = x[2]; out.d = x[3]; return true; } struct InvTransform { float m00 = 1.0f, m01 = 0.0f, m02 = 0.0f; float m10 = 0.0f, m11 = 1.0f, m12 = 0.0f; }; bool InvertSimilarity(const SimilarityTransform& t, InvTransform& inv) { const float det = t.a * t.a + t.b * t.b; if (det < 1e-12f) return false; const float inv_det = 1.0f / det; inv.m00 = t.a * inv_det; inv.m01 = t.b * inv_det; inv.m10 = -t.b * inv_det; inv.m11 = t.a * inv_det; inv.m02 = -(t.a * t.c + t.b * t.d) * inv_det; inv.m12 = (t.b * t.c - t.a * t.d) * inv_det; return true; } inline uint8_t BilinearAt(const uint8_t* src, int w, int h, int stride, float x, float y, int c) { if (x < 0.0f || y < 0.0f || x > static_cast(w - 1) || y > static_cast(h - 1)) return 0; const int x0 = ClampInt(static_cast(std::floor(x)), 0, w - 1); const int y0 = ClampInt(static_cast(std::floor(y)), 0, h - 1); const int x1 = ClampInt(x0 + 1, 0, w - 1); const int y1 = ClampInt(y0 + 1, 0, h - 1); const float wx = x - static_cast(x0); const float wy = y - static_cast(y0); const float w00 = (1.0f - wx) * (1.0f - wy); const float w01 = wx * (1.0f - wy); const float w10 = (1.0f - wx) * wy; const float w11 = wx * wy; const uint8_t* p00 = src + y0 * stride + x0 * 3; const uint8_t* p01 = src + y0 * stride + x1 * 3; const uint8_t* p10 = src + y1 * stride + x0 * 3; const uint8_t* p11 = src + y1 * stride + x1 * 3; const float v = static_cast(p00[c]) * w00 + static_cast(p01[c]) * w01 + static_cast(p10[c]) * w10 + static_cast(p11[c]) * w11; return static_cast(ClampInt(static_cast(v + 0.5f), 0, 255)); } void WarpFace(const uint8_t* src, int w, int h, int stride, const InvTransform& inv, uint8_t* dst, int dst_w, int dst_h, bool swap_rb) { for (int y = 0; y < dst_h; ++y) { uint8_t* row = dst + static_cast(y) * static_cast(dst_w) * 3; for (int x = 0; x < dst_w; ++x) { const float xs = inv.m00 * static_cast(x) + inv.m01 * static_cast(y) + inv.m02; const float ys = inv.m10 * static_cast(x) + inv.m11 * static_cast(y) + inv.m12; uint8_t r = BilinearAt(src, w, h, stride, xs, ys, 0); uint8_t g = BilinearAt(src, w, h, stride, xs, ys, 1); uint8_t b = BilinearAt(src, w, h, stride, xs, ys, 2); if (swap_rb) std::swap(r, b); row[0] = r; row[1] = g; row[2] = b; row += 3; } } } void CropResize(const uint8_t* src, int w, int h, int stride, const Rect& bbox, uint8_t* dst, int dst_w, int dst_h, bool swap_rb) { const float x0 = bbox.x; const float y0 = bbox.y; const float bw = std::max(1.0f, bbox.w); const float bh = std::max(1.0f, bbox.h); for (int y = 0; y < dst_h; ++y) { uint8_t* row = dst + static_cast(y) * static_cast(dst_w) * 3; const float sy = y0 + (static_cast(y) + 0.5f) * (bh / static_cast(dst_h)) - 0.5f; for (int x = 0; x < dst_w; ++x) { const float sx = x0 + (static_cast(x) + 0.5f) * (bw / static_cast(dst_w)) - 0.5f; uint8_t r = BilinearAt(src, w, h, stride, sx, sy, 0); uint8_t g = BilinearAt(src, w, h, stride, sx, sy, 1); uint8_t b = BilinearAt(src, w, h, stride, sx, sy, 2); if (swap_rb) std::swap(r, b); row[0] = r; row[1] = g; row[2] = b; row += 3; } } } #if defined(RK3588_ENABLE_RKNN) template inline float Dequant(T q, int32_t zp, float scale) { return (static_cast(q) - static_cast(zp)) * scale; } bool DecodeEmbedding(const AiScheduler::BorrowedOutput& o, std::vector& emb) { if (!o.data || o.size == 0) return false; size_t elem_size = 1; bool is_float = false; bool is_float16 = false; if (o.type == RKNN_TENSOR_FLOAT16) { elem_size = 2; is_float16 = true; } if (o.type == RKNN_TENSOR_FLOAT32) { elem_size = 4; is_float = true; } const size_t elem_cnt = elem_size > 0 ? (o.size / elem_size) : 0; if (elem_cnt == 0) return false; emb.resize(elem_cnt); if (is_float) { const float* fp = reinterpret_cast(o.data); for (size_t i = 0; i < elem_cnt; ++i) emb[i] = fp[i]; return true; } if (is_float16) { const uint16_t* hp = reinterpret_cast(o.data); for (size_t i = 0; i < elem_cnt; ++i) emb[i] = HalfToFloat(hp[i]); return true; } if (o.type == RKNN_TENSOR_INT8) { const int8_t* p = reinterpret_cast(o.data); for (size_t i = 0; i < elem_cnt; ++i) emb[i] = Dequant(p[i], o.zp, o.scale); return true; } const uint8_t* p = reinterpret_cast(o.data); for (size_t i = 0; i < elem_cnt; ++i) emb[i] = Dequant(p[i], o.zp, o.scale); return true; } void L2Normalize(std::vector& v) { double ss = 0.0; for (float x : v) ss += static_cast(x) * static_cast(x); const double norm = std::sqrt(ss); if (norm <= 0.0) return; const float inv = static_cast(1.0 / norm); for (float& x : v) x *= inv; } #else bool DecodeEmbedding(const AiScheduler::BorrowedOutput& /*o*/, std::vector& /*emb*/) { return false; } void L2Normalize(std::vector& /*v*/) {} #endif struct FaceRecogConfigSnapshot { bool align = true; bool emit_embedding = false; int max_faces = 10; float thr_accept = 0.45f; float thr_margin = 0.05f; std::string model_input_format = "rgb"; std::string input_dtype = "uint8"; float norm_scale = 1.0f; float norm_bias = 0.0f; bool norm_use_mean_std = false; std::array norm_mean{{0.0f, 0.0f, 0.0f}}; std::array norm_std{{1.0f, 1.0f, 1.0f}}; std::string gallery_backend = "sqlite"; std::string gallery_path = "./models/face_gallery.db"; bool gallery_load_on_start = true; int gallery_expected_dim = 512; std::string gallery_dtype = "auto"; int gallery_reload_seq = 0; }; static bool BuildFaceRecogConfigSnapshot(const SimpleJson& config, const std::shared_ptr& base, std::shared_ptr& out) { auto snap = std::make_shared(); if (base) *snap = *base; snap->align = config.ValueOr("align", snap->align); snap->emit_embedding = config.ValueOr("emit_embedding", snap->emit_embedding); snap->max_faces = std::max(1, config.ValueOr("max_faces", snap->max_faces)); if (const SimpleJson* th = config.Find("threshold"); th && th->IsObject()) { snap->thr_accept = th->ValueOr("accept", snap->thr_accept); snap->thr_margin = th->ValueOr("margin", snap->thr_margin); } { std::string fmt = config.ValueOr("input_format", snap->model_input_format); for (auto& c : fmt) c = static_cast(std::tolower(static_cast(c))); snap->model_input_format = std::move(fmt); } { std::string dtype = config.ValueOr("input_dtype", snap->input_dtype); for (auto& c : dtype) c = static_cast(std::tolower(static_cast(c))); snap->input_dtype = std::move(dtype); } if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) { bool use_ms = false; if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) { for (int i = 0; i < 3; ++i) { snap->norm_mean[static_cast(i)] = static_cast(mean->AsArray()[static_cast(i)].AsNumber(snap->norm_mean[static_cast(i)])); } use_ms = true; } if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) { for (int i = 0; i < 3; ++i) { snap->norm_std[static_cast(i)] = static_cast(st->AsArray()[static_cast(i)].AsNumber(snap->norm_std[static_cast(i)])); } use_ms = true; } snap->norm_use_mean_std = use_ms; snap->norm_scale = norm->ValueOr("scale", snap->norm_scale); snap->norm_bias = norm->ValueOr("bias", snap->norm_bias); } if (const SimpleJson* g = config.Find("gallery"); g && g->IsObject()) { snap->gallery_backend = g->ValueOr("backend", snap->gallery_backend); snap->gallery_path = g->ValueOr("path", snap->gallery_path); snap->gallery_load_on_start = g->ValueOr("load_on_start", snap->gallery_load_on_start); snap->gallery_expected_dim = std::max(0, g->ValueOr("expected_dim", snap->gallery_expected_dim)); snap->gallery_dtype = g->ValueOr("dtype", snap->gallery_dtype); snap->gallery_reload_seq = g->ValueOr("reload_seq", snap->gallery_reload_seq); } for (auto& c : snap->gallery_backend) c = static_cast(std::tolower(static_cast(c))); for (auto& c : snap->gallery_dtype) c = static_cast(std::tolower(static_cast(c))); out = std::move(snap); return true; } } // namespace class AiFaceRecogNode : public INode { public: std::string Id() const override { return id_; } std::string Type() const override { return "ai_face_recog"; } bool Init(const SimpleJson& config, const NodeContext& ctx) override { id_ = config.ValueOr("id", "face_recog"); model_path_ = config.ValueOr("model_path", ""); std::shared_ptr snap; BuildFaceRecogConfigSnapshot(config, nullptr, snap); { std::lock_guard lock(mu_); cfg_ = std::move(snap); gallery_.reset(); } input_queue_ = ctx.input_queue; output_queues_ = ctx.output_queues; if (!input_queue_) { LogError("[ai_face_recog] no input queue for node " + id_); return false; } if (output_queues_.empty()) { LogError("[ai_face_recog] no output queue for node " + id_); return false; } #if defined(RK3588_ENABLE_RKNN) if (model_path_.empty()) { LogError("[ai_face_recog] model_path is required"); return false; } std::string err; model_handle_ = AiScheduler::Instance().LoadModel(model_path_, err); if (model_handle_ == kInvalidModelHandle) { LogError("[ai_face_recog] failed to load model: " + err); return false; } ModelInfo info; if (AiScheduler::Instance().GetModelInfo(model_handle_, info)) { model_w_ = info.input_width; model_h_ = info.input_height; } LogInfo("[ai_face_recog] model loaded: " + model_path_ + " (" + std::to_string(model_w_) + "x" + std::to_string(model_h_) + ")"); #else LogWarn("[ai_face_recog] RKNN disabled, will passthrough frames"); #endif return true; } bool Start() override { std::shared_ptr cfg; { std::lock_guard lock(mu_); cfg = cfg_; } if (cfg && cfg->gallery_load_on_start) { ReloadGallery(*cfg); } const bool align = cfg ? cfg->align : false; const float thr_accept = cfg ? cfg->thr_accept : 0.0f; const float thr_margin = cfg ? cfg->thr_margin : 0.0f; LogInfo("[ai_face_recog] start id=" + id_ + " align=" + std::string(align ? "true" : "false") + " thr_accept=" + std::to_string(thr_accept) + " thr_margin=" + std::to_string(thr_margin)); return true; } bool UpdateConfig(const SimpleJson& new_config) override { const std::string new_id = new_config.ValueOr("id", id_); if (!new_id.empty() && new_id != id_) return false; const std::string new_model = new_config.ValueOr("model_path", model_path_); if (new_model != model_path_) { // Changing model requires graph rebuild. return false; } std::shared_ptr base; { std::lock_guard lock(mu_); base = cfg_; } std::shared_ptr snap; BuildFaceRecogConfigSnapshot(new_config, base, snap); bool reload = false; if (base && snap) { reload = (snap->gallery_backend != base->gallery_backend || snap->gallery_path != base->gallery_path || snap->gallery_expected_dim != base->gallery_expected_dim || snap->gallery_dtype != base->gallery_dtype || snap->gallery_reload_seq != base->gallery_reload_seq); } { std::lock_guard lock(mu_); cfg_ = snap; } if (reload && snap) { ReloadGallery(*snap); } return true; } void Stop() override { #if defined(RK3588_ENABLE_RKNN) if (model_handle_ != kInvalidModelHandle) { AiScheduler::Instance().UnloadModel(model_handle_); model_handle_ = kInvalidModelHandle; } #endif LogInfo("[ai_face_recog] stop id=" + id_); } NodeStatus Process(FramePtr frame) override { if (!frame) return NodeStatus::DROP; #if defined(RK3588_ENABLE_RKNN) Run(frame); #endif Push(frame); return NodeStatus::OK; } private: void Push(FramePtr frame) { for (auto& q : output_queues_) q->Push(frame); } void ReloadGallery(const FaceRecogConfigSnapshot& cfg) { if (cfg.gallery_path.empty()) return; std::string err; FaceGallery g; g.SetExpectedDim(cfg.gallery_expected_dim); g.SetPreferredDtype(cfg.gallery_dtype); bool ok = false; if (cfg.gallery_backend == "sqlite") { ok = g.LoadSqliteBackend(cfg.gallery_path, err); } else if (cfg.gallery_backend == "file") { ok = g.LoadFileBackend(cfg.gallery_path, err); } else { err = "unknown gallery backend: " + cfg.gallery_backend; } if (!ok) { if (!err.empty()) LogWarn("[ai_face_recog] gallery load failed: " + err); return; } auto sp = std::make_shared(std::move(g)); { std::lock_guard lock(mu_); gallery_ = sp; } LogInfo("[ai_face_recog] gallery loaded: n=" + std::to_string(sp->Size()) + " dim=" + std::to_string(sp->Dim())); } #if defined(RK3588_ENABLE_RKNN) void Run(FramePtr frame) { if (!frame->face_det || frame->face_det->faces.empty()) return; if (!frame->data || frame->data_size == 0) return; if (frame->format != PixelFormat::RGB && frame->format != PixelFormat::BGR) { LogWarn("[ai_face_recog] input must be RGB/BGR"); return; } const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data; const int w = frame->width; const int h = frame->height; const int stride = frame->planes[0].stride > 0 ? frame->planes[0].stride : (frame->stride > 0 ? frame->stride : w * 3); if (!src || stride <= 0) return; std::shared_ptr cfg; std::shared_ptr gallery; { std::lock_guard lock(mu_); cfg = cfg_; gallery = gallery_; } if (!cfg) return; const bool need_swap = (frame->format == PixelFormat::BGR && cfg->model_input_format == "rgb") || (frame->format == PixelFormat::RGB && cfg->model_input_format == "bgr"); const bool sync_src = (frame->dma_fd >= 0); if (sync_src) DmaSyncStartFd(frame->dma_fd); FaceRecogResult rr; rr.img_w = w; rr.img_h = h; rr.model_name = "arcface"; const int limit = std::min(cfg->max_faces, static_cast(frame->face_det->faces.size())); rr.items.reserve(static_cast(limit)); for (int i = 0; i < limit; ++i) { const FaceDetItem& face = frame->face_det->faces[static_cast(i)]; face_buf_.resize(static_cast(model_w_) * static_cast(model_h_) * 3); if (cfg->align && face.has_landmarks && model_w_ == 112 && model_h_ == 112) { const std::array dst = { Point2f{38.2946f, 51.6963f}, Point2f{73.5318f, 51.5014f}, Point2f{56.0252f, 71.7366f}, Point2f{41.5493f, 92.3655f}, Point2f{70.7299f, 92.2041f}, }; SimilarityTransform t; InvTransform inv; if (ComputeSimilarity(face.landmarks, dst, t) && InvertSimilarity(t, inv)) { WarpFace(src, w, h, stride, inv, face_buf_.data(), model_w_, model_h_, need_swap); } else { CropResize(src, w, h, stride, face.bbox, face_buf_.data(), model_w_, model_h_, need_swap); } } else { CropResize(src, w, h, stride, face.bbox, face_buf_.data(), model_w_, model_h_, need_swap); } InferInput in; in.width = model_w_; in.height = model_h_; in.is_nhwc = true; if (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32") { float_input_buf_.resize(static_cast(model_w_) * static_cast(model_h_) * 3); const size_t pix = static_cast(model_w_) * static_cast(model_h_); const uint8_t* p = face_buf_.data(); for (size_t ii = 0; ii < pix; ++ii) { for (int c = 0; c < 3; ++c) { float x = static_cast(p[ii * 3 + static_cast(c)]); if (cfg->norm_use_mean_std) { const float st = std::fabs(cfg->norm_std[static_cast(c)]) < 1e-6f ? 1.0f : cfg->norm_std[static_cast(c)]; x = (x - cfg->norm_mean[static_cast(c)]) / st; } else { x = x * cfg->norm_scale + cfg->norm_bias; } float_input_buf_[ii * 3 + static_cast(c)] = x; } } in.data = float_input_buf_.data(); in.size = float_input_buf_.size() * sizeof(float); in.type = RKNN_TENSOR_FLOAT32; } else { in.data = face_buf_.data(); in.size = face_buf_.size(); in.type = RKNN_TENSOR_UINT8; } auto r = AiScheduler::Instance().InferBorrowed(model_handle_, in); if (!r.success || r.outputs.empty()) { LogWarn(std::string("[ai_face_recog] inference failed: ") + (r.error.empty() ? "unknown" : r.error)); continue; } std::vector emb; if (!DecodeEmbedding(r.outputs[0], emb)) { continue; } L2Normalize(emb); FaceGallery::SearchResult sr; if (gallery && gallery->Size() > 0) { sr = gallery->SearchTop2(emb); } const bool accept = (sr.best_person_id >= 0) && (sr.best_sim >= cfg->thr_accept) && ((cfg->thr_margin <= 0.0f) || ((sr.best_sim - sr.second_sim) >= cfg->thr_margin)); FaceRecogItem item; item.bbox = face.bbox; item.has_landmarks = face.has_landmarks; item.landmarks = face.landmarks; item.best_person_id = accept ? sr.best_person_id : -1; item.best_name = accept ? sr.best_name : "unknown"; item.best_sim = sr.best_sim; item.second_sim = sr.second_sim; item.unknown = !accept; if (cfg->emit_embedding) item.embedding = emb; rr.items.push_back(std::move(item)); } if (sync_src) DmaSyncEndFd(frame->dma_fd); frame->face_recog = std::make_shared(std::move(rr)); } #endif std::string id_; std::string model_path_; mutable std::mutex mu_; std::shared_ptr cfg_; std::shared_ptr gallery_; std::shared_ptr> input_queue_; std::vector>> output_queues_; std::vector face_buf_; std::vector float_input_buf_; ModelHandle model_handle_ = kInvalidModelHandle; int model_w_ = 112; int model_h_ = 112; }; REGISTER_NODE(AiFaceRecogNode, "ai_face_recog"); } // namespace rk3588