OrangePi3588Media/plugins/ai_face_recog/ai_face_recog_node.cpp
sladro 57e4af1d92
Some checks are pending
CI / host-build (push) Waiting to run
CI / rk3588-cross-build (push) Waiting to run
修复人脸识别问题,测试模型问题,添加了调试
2026-01-08 15:44:23 +08:00

1019 lines
36 KiB
C++

#include <algorithm>
#include <cctype>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <mutex>
#include <string>
#include <utility>
#include <vector>
// For test image loading
#define STB_IMAGE_IMPLEMENTATION
#define STBI_ONLY_PNG
#define STBI_NO_FAILURE_STRINGS
#include "../../third_party/rknpu2/examples/3rdparty/stb/stb_image.h"
#include "ai_scheduler.h"
#include "face/face_result.h"
#include "node.h"
#include "utils/logger.h"
#if defined(RK3588_ENABLE_SQLITE3)
#include <sqlite3.h>
#endif
namespace rk3588 {
namespace {
inline int ClampInt(int v, int lo, int hi) {
return v < lo ? lo : (v > hi ? hi : v);
}
bool ReadFileToString(const std::string& path, std::string& out) {
std::ifstream ifs(path, std::ios::binary);
if (!ifs) return false;
ifs.seekg(0, std::ios::end);
std::streamsize sz = ifs.tellg();
if (sz < 0) return false;
ifs.seekg(0, std::ios::beg);
out.resize(static_cast<size_t>(sz));
if (sz == 0) return true;
ifs.read(&out[0], sz);
return ifs.good();
}
struct GalleryEntry {
int person_id = -1;
std::string name;
std::vector<float> emb; // L2 normalized
};
inline float HalfToFloat(uint16_t h) {
const uint32_t sign = (static_cast<uint32_t>(h & 0x8000u)) << 16;
uint32_t exp = (h & 0x7C00u) >> 10;
uint32_t mant = (h & 0x03FFu);
uint32_t f = 0;
if (exp == 0) {
if (mant == 0) {
f = sign;
} else {
exp = 1;
while ((mant & 0x0400u) == 0) {
mant <<= 1;
--exp;
}
mant &= 0x03FFu;
exp = exp + (127 - 15);
f = sign | (exp << 23) | (mant << 13);
}
} else if (exp == 31) {
f = sign | 0x7F800000u | (mant << 13);
} else {
exp = exp + (127 - 15);
f = sign | (exp << 23) | (mant << 13);
}
float out;
memcpy(&out, &f, sizeof(out));
return out;
}
class FaceGallery {
public:
void SetExpectedDim(int dim) { expected_dim_ = dim; }
void SetPreferredDtype(std::string dtype) {
for (auto& c : dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
preferred_dtype_ = std::move(dtype);
}
bool LoadSqliteBackend(const std::string& db_path, std::string& err) {
#if defined(RK3588_ENABLE_SQLITE3)
entries_.clear();
dim_ = 0;
sqlite3* db = nullptr;
if (sqlite3_open_v2(db_path.c_str(), &db, SQLITE_OPEN_READONLY, nullptr) != SQLITE_OK || !db) {
err = "failed to open sqlite db: " + db_path;
if (db) sqlite3_close(db);
return false;
}
const char* sql =
"SELECT p.id, p.name, e.emb "
"FROM embedding e JOIN person p ON e.person_id = p.id";
sqlite3_stmt* stmt = nullptr;
if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK || !stmt) {
err = "sqlite prepare failed";
if (stmt) sqlite3_finalize(stmt);
sqlite3_close(db);
return false;
}
while (sqlite3_step(stmt) == SQLITE_ROW) {
const int person_id = sqlite3_column_int(stmt, 0);
const unsigned char* name_u8 = sqlite3_column_text(stmt, 1);
const void* blob = sqlite3_column_blob(stmt, 2);
const int blob_sz = sqlite3_column_bytes(stmt, 2);
if (!blob || blob_sz <= 0) continue;
const int expected_dim = expected_dim_;
int dim = 0;
enum class BlobType { F16, F32, F64 } blob_type = BlobType::F32;
if (expected_dim > 0) {
if (blob_sz == expected_dim * 4) {
dim = expected_dim;
blob_type = BlobType::F32;
} else if (blob_sz == expected_dim * 2) {
dim = expected_dim;
blob_type = BlobType::F16;
} else if (blob_sz == expected_dim * 8) {
dim = expected_dim;
blob_type = BlobType::F64;
} else {
continue;
}
} else {
if ((blob_sz % 4) != 0) continue;
dim = blob_sz / 4;
blob_type = BlobType::F32;
}
// Optional dtype preference (only affects ambiguous cases when expected_dim==0).
if (expected_dim <= 0) {
if (preferred_dtype_ == "f16" && (blob_sz % 2) == 0) {
blob_type = BlobType::F16;
dim = blob_sz / 2;
} else if (preferred_dtype_ == "f64" && (blob_sz % 8) == 0) {
blob_type = BlobType::F64;
dim = blob_sz / 8;
}
}
if (dim_ == 0) dim_ = dim;
if (dim != dim_) continue;
GalleryEntry e;
e.person_id = person_id;
e.name = name_u8 ? reinterpret_cast<const char*>(name_u8) : std::string{};
e.emb.resize(static_cast<size_t>(dim_));
if (blob_type == BlobType::F32) {
memcpy(e.emb.data(), blob, static_cast<size_t>(dim_) * sizeof(float));
} else if (blob_type == BlobType::F16) {
const uint16_t* hp = reinterpret_cast<const uint16_t*>(blob);
for (int i = 0; i < dim_; ++i) e.emb[static_cast<size_t>(i)] = HalfToFloat(hp[i]);
} else {
const double* dp = reinterpret_cast<const double*>(blob);
for (int i = 0; i < dim_; ++i) e.emb[static_cast<size_t>(i)] = static_cast<float>(dp[i]);
}
L2Normalize(e.emb);
// DEBUG: 打印数据库中 embedding 的前 8 个值
{
std::string dbg = "[FaceGallery] db emb[0:8] for " + e.name + ": ";
for (int di = 0; di < 8 && di < static_cast<int>(e.emb.size()); ++di) {
dbg += std::to_string(e.emb[di]) + " ";
}
std::cerr << dbg << "\n";
}
entries_.push_back(std::move(e));
}
sqlite3_finalize(stmt);
sqlite3_close(db);
return true;
#else
(void)db_path;
err = "sqlite3 support not enabled at build time";
return false;
#endif
}
bool LoadFileBackend(const std::string& base_path, std::string& err) {
entries_.clear();
dim_ = 0;
const std::string json_path = base_path + ".json";
const std::string bin_path = base_path + ".bin";
std::string json_text;
if (!ReadFileToString(json_path, json_text)) {
err = "failed to read " + json_path;
return false;
}
SimpleJson root;
std::string jerr;
if (!ParseSimpleJson(json_text, root, jerr) || !root.IsObject()) {
err = "invalid json: " + jerr;
return false;
}
dim_ = root.ValueOr<int>("dim", 0);
if (dim_ <= 0) {
err = "gallery dim missing";
return false;
}
const SimpleJson* persons = root.Find("persons");
if (!persons || !persons->IsArray()) {
err = "gallery persons missing";
return false;
}
const size_t n = persons->AsArray().size();
if (n == 0) {
// Empty gallery is valid.
return true;
}
std::ifstream ifs(bin_path, std::ios::binary);
if (!ifs) {
err = "failed to open " + bin_path;
return false;
}
const size_t total_floats = n * static_cast<size_t>(dim_);
std::vector<float> buf(total_floats);
ifs.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(total_floats * sizeof(float)));
if (!ifs.good()) {
err = "failed to read embeddings from " + bin_path;
return false;
}
entries_.reserve(n);
for (size_t i = 0; i < n; ++i) {
const SimpleJson& p = persons->AsArray()[i];
GalleryEntry e;
e.person_id = p.ValueOr<int>("id", -1);
e.name = p.ValueOr<std::string>("name", "");
e.emb.resize(static_cast<size_t>(dim_));
memcpy(e.emb.data(), buf.data() + i * static_cast<size_t>(dim_), static_cast<size_t>(dim_) * sizeof(float));
L2Normalize(e.emb);
entries_.push_back(std::move(e));
}
return true;
}
int Dim() const { return dim_; }
size_t Size() const { return entries_.size(); }
struct SearchResult {
int best_person_id = -1;
std::string best_name;
float best_sim = 0.0f;
float second_sim = 0.0f;
};
SearchResult SearchTop2(const std::vector<float>& emb_normed) const {
SearchResult r;
if (entries_.empty() || dim_ <= 0) return r;
if (static_cast<int>(emb_normed.size()) != dim_) return r;
float best = -std::numeric_limits<float>::infinity();
float second = -std::numeric_limits<float>::infinity();
int best_idx = -1;
for (size_t i = 0; i < entries_.size(); ++i) {
const float sim = Dot(emb_normed, entries_[i].emb);
if (sim > best) {
second = best;
best = sim;
best_idx = static_cast<int>(i);
} else if (sim > second) {
second = sim;
}
}
if (best_idx >= 0) {
r.best_person_id = entries_[static_cast<size_t>(best_idx)].person_id;
r.best_name = entries_[static_cast<size_t>(best_idx)].name;
r.best_sim = best;
r.second_sim = std::isfinite(second) ? second : 0.0f;
}
return r;
}
private:
static float Dot(const std::vector<float>& a, const std::vector<float>& b) {
float s = 0.0f;
for (size_t i = 0; i < a.size(); ++i) s += a[i] * b[i];
return s;
}
static void L2Normalize(std::vector<float>& v) {
double ss = 0.0;
for (float x : v) ss += static_cast<double>(x) * static_cast<double>(x);
const double norm = std::sqrt(ss);
if (norm <= 0.0) return;
const float inv = static_cast<float>(1.0 / norm);
for (float& x : v) x *= inv;
}
int dim_ = 0;
int expected_dim_ = 512;
std::string preferred_dtype_ = "auto";
std::vector<GalleryEntry> entries_;
};
struct SimilarityTransform {
// x' = a*x - b*y + c
// y' = b*x + a*y + d
float a = 1.0f;
float b = 0.0f;
float c = 0.0f;
float d = 0.0f;
};
bool Solve4x4(float A[4][4], float b[4], float x[4]) {
// Gaussian elimination with partial pivoting.
for (int i = 0; i < 4; ++i) {
int pivot = i;
float best = std::fabs(A[i][i]);
for (int r = i + 1; r < 4; ++r) {
float v = std::fabs(A[r][i]);
if (v > best) {
best = v;
pivot = r;
}
}
if (best < 1e-8f) return false;
if (pivot != i) {
for (int c = i; c < 4; ++c) std::swap(A[i][c], A[pivot][c]);
std::swap(b[i], b[pivot]);
}
const float diag = A[i][i];
for (int c = i; c < 4; ++c) A[i][c] /= diag;
b[i] /= diag;
for (int r = 0; r < 4; ++r) {
if (r == i) continue;
const float f = A[r][i];
if (std::fabs(f) < 1e-8f) continue;
for (int c = i; c < 4; ++c) A[r][c] -= f * A[i][c];
b[r] -= f * b[i];
}
}
for (int i = 0; i < 4; ++i) x[i] = b[i];
return true;
}
bool ComputeSimilarity(const std::array<Point2f, 5>& src,
const std::array<Point2f, 5>& dst,
SimilarityTransform& out) {
// Least squares on similarity model.
float ATA[4][4] = {};
float ATb[4] = {};
auto Acc = [&](const float row[4], float rhs) {
for (int i = 0; i < 4; ++i) {
ATb[i] += row[i] * rhs;
for (int j = 0; j < 4; ++j) {
ATA[i][j] += row[i] * row[j];
}
}
};
for (int i = 0; i < 5; ++i) {
const float x = src[static_cast<size_t>(i)].x;
const float y = src[static_cast<size_t>(i)].y;
const float u = dst[static_cast<size_t>(i)].x;
const float v = dst[static_cast<size_t>(i)].y;
const float r1[4] = {x, -y, 1.0f, 0.0f};
const float r2[4] = {y, x, 0.0f, 1.0f};
Acc(r1, u);
Acc(r2, v);
}
float A[4][4];
float b[4];
for (int i = 0; i < 4; ++i) {
b[i] = ATb[i];
for (int j = 0; j < 4; ++j) A[i][j] = ATA[i][j];
}
float x[4];
if (!Solve4x4(A, b, x)) return false;
out.a = x[0];
out.b = x[1];
out.c = x[2];
out.d = x[3];
return true;
}
struct InvTransform {
float m00 = 1.0f, m01 = 0.0f, m02 = 0.0f;
float m10 = 0.0f, m11 = 1.0f, m12 = 0.0f;
};
bool InvertSimilarity(const SimilarityTransform& t, InvTransform& inv) {
const float det = t.a * t.a + t.b * t.b;
if (det < 1e-12f) return false;
const float inv_det = 1.0f / det;
inv.m00 = t.a * inv_det;
inv.m01 = t.b * inv_det;
inv.m10 = -t.b * inv_det;
inv.m11 = t.a * inv_det;
inv.m02 = -(t.a * t.c + t.b * t.d) * inv_det;
inv.m12 = (t.b * t.c - t.a * t.d) * inv_det;
return true;
}
inline uint8_t BilinearAt(const uint8_t* src, int w, int h, int stride, float x, float y, int c) {
if (x < 0.0f || y < 0.0f || x > static_cast<float>(w - 1) || y > static_cast<float>(h - 1)) return 0;
const int x0 = ClampInt(static_cast<int>(std::floor(x)), 0, w - 1);
const int y0 = ClampInt(static_cast<int>(std::floor(y)), 0, h - 1);
const int x1 = ClampInt(x0 + 1, 0, w - 1);
const int y1 = ClampInt(y0 + 1, 0, h - 1);
const float wx = x - static_cast<float>(x0);
const float wy = y - static_cast<float>(y0);
const float w00 = (1.0f - wx) * (1.0f - wy);
const float w01 = wx * (1.0f - wy);
const float w10 = (1.0f - wx) * wy;
const float w11 = wx * wy;
const uint8_t* p00 = src + y0 * stride + x0 * 3;
const uint8_t* p01 = src + y0 * stride + x1 * 3;
const uint8_t* p10 = src + y1 * stride + x0 * 3;
const uint8_t* p11 = src + y1 * stride + x1 * 3;
const float v =
static_cast<float>(p00[c]) * w00 +
static_cast<float>(p01[c]) * w01 +
static_cast<float>(p10[c]) * w10 +
static_cast<float>(p11[c]) * w11;
return static_cast<uint8_t>(ClampInt(static_cast<int>(v + 0.5f), 0, 255));
}
void WarpFace(const uint8_t* src, int w, int h, int stride,
const InvTransform& inv, uint8_t* dst, int dst_w, int dst_h, bool swap_rb) {
for (int y = 0; y < dst_h; ++y) {
uint8_t* row = dst + static_cast<size_t>(y) * static_cast<size_t>(dst_w) * 3;
for (int x = 0; x < dst_w; ++x) {
const float xs = inv.m00 * static_cast<float>(x) + inv.m01 * static_cast<float>(y) + inv.m02;
const float ys = inv.m10 * static_cast<float>(x) + inv.m11 * static_cast<float>(y) + inv.m12;
uint8_t r = BilinearAt(src, w, h, stride, xs, ys, 0);
uint8_t g = BilinearAt(src, w, h, stride, xs, ys, 1);
uint8_t b = BilinearAt(src, w, h, stride, xs, ys, 2);
if (swap_rb) std::swap(r, b);
row[0] = r;
row[1] = g;
row[2] = b;
row += 3;
}
}
}
void CropResize(const uint8_t* src, int w, int h, int stride,
const Rect& bbox, uint8_t* dst, int dst_w, int dst_h, bool swap_rb) {
const float x0 = bbox.x;
const float y0 = bbox.y;
const float bw = std::max(1.0f, bbox.w);
const float bh = std::max(1.0f, bbox.h);
for (int y = 0; y < dst_h; ++y) {
uint8_t* row = dst + static_cast<size_t>(y) * static_cast<size_t>(dst_w) * 3;
const float sy = y0 + (static_cast<float>(y) + 0.5f) * (bh / static_cast<float>(dst_h)) - 0.5f;
for (int x = 0; x < dst_w; ++x) {
const float sx = x0 + (static_cast<float>(x) + 0.5f) * (bw / static_cast<float>(dst_w)) - 0.5f;
uint8_t r = BilinearAt(src, w, h, stride, sx, sy, 0);
uint8_t g = BilinearAt(src, w, h, stride, sx, sy, 1);
uint8_t b = BilinearAt(src, w, h, stride, sx, sy, 2);
if (swap_rb) std::swap(r, b);
row[0] = r;
row[1] = g;
row[2] = b;
row += 3;
}
}
}
#if defined(RK3588_ENABLE_RKNN)
template <typename T>
inline float Dequant(T q, int32_t zp, float scale) {
return (static_cast<float>(q) - static_cast<float>(zp)) * scale;
}
bool DecodeEmbedding(const AiScheduler::BorrowedOutput& o, std::vector<float>& emb) {
if (!o.data || o.size == 0) return false;
size_t elem_size = 1;
bool is_float = false;
bool is_float16 = false;
if (o.type == RKNN_TENSOR_FLOAT16) {
elem_size = 2;
is_float16 = true;
}
if (o.type == RKNN_TENSOR_FLOAT32) {
elem_size = 4;
is_float = true;
}
const size_t elem_cnt = elem_size > 0 ? (o.size / elem_size) : 0;
if (elem_cnt == 0) return false;
emb.resize(elem_cnt);
if (is_float) {
const float* fp = reinterpret_cast<const float*>(o.data);
for (size_t i = 0; i < elem_cnt; ++i) emb[i] = fp[i];
return true;
}
if (is_float16) {
const uint16_t* hp = reinterpret_cast<const uint16_t*>(o.data);
for (size_t i = 0; i < elem_cnt; ++i) emb[i] = HalfToFloat(hp[i]);
return true;
}
if (o.type == RKNN_TENSOR_INT8) {
const int8_t* p = reinterpret_cast<const int8_t*>(o.data);
for (size_t i = 0; i < elem_cnt; ++i) emb[i] = Dequant(p[i], o.zp, o.scale);
return true;
}
const uint8_t* p = reinterpret_cast<const uint8_t*>(o.data);
for (size_t i = 0; i < elem_cnt; ++i) emb[i] = Dequant(p[i], o.zp, o.scale);
return true;
}
void L2Normalize(std::vector<float>& v) {
double ss = 0.0;
for (float x : v) ss += static_cast<double>(x) * static_cast<double>(x);
const double norm = std::sqrt(ss);
if (norm <= 0.0) return;
const float inv = static_cast<float>(1.0 / norm);
for (float& x : v) x *= inv;
}
#else
bool DecodeEmbedding(const AiScheduler::BorrowedOutput& /*o*/, std::vector<float>& /*emb*/) {
return false;
}
void L2Normalize(std::vector<float>& /*v*/) {}
#endif
struct FaceRecogConfigSnapshot {
bool align = true;
bool emit_embedding = false;
int max_faces = 10;
float thr_accept = 0.45f;
float thr_margin = 0.05f;
std::string model_input_format = "rgb";
std::string input_dtype = "uint8";
float norm_scale = 1.0f;
float norm_bias = 0.0f;
bool norm_use_mean_std = false;
std::array<float, 3> norm_mean{{0.0f, 0.0f, 0.0f}};
std::array<float, 3> norm_std{{1.0f, 1.0f, 1.0f}};
std::string gallery_backend = "file";
std::string gallery_path;
bool gallery_load_on_start = true;
int gallery_expected_dim = 512;
std::string gallery_dtype = "auto";
};
static bool BuildFaceRecogConfigSnapshot(const SimpleJson& config,
const std::shared_ptr<const FaceRecogConfigSnapshot>& base,
std::shared_ptr<const FaceRecogConfigSnapshot>& out) {
auto snap = std::make_shared<FaceRecogConfigSnapshot>();
if (base) *snap = *base;
snap->align = config.ValueOr<bool>("align", snap->align);
snap->emit_embedding = config.ValueOr<bool>("emit_embedding", snap->emit_embedding);
snap->max_faces = std::max(1, config.ValueOr<int>("max_faces", snap->max_faces));
if (const SimpleJson* th = config.Find("threshold"); th && th->IsObject()) {
snap->thr_accept = th->ValueOr<float>("accept", snap->thr_accept);
snap->thr_margin = th->ValueOr<float>("margin", snap->thr_margin);
}
{
std::string fmt = config.ValueOr<std::string>("input_format", snap->model_input_format);
for (auto& c : fmt) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
snap->model_input_format = std::move(fmt);
}
{
std::string dtype = config.ValueOr<std::string>("input_dtype", snap->input_dtype);
for (auto& c : dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
snap->input_dtype = std::move(dtype);
}
if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) {
bool use_ms = false;
if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) {
for (int i = 0; i < 3; ++i) {
snap->norm_mean[static_cast<size_t>(i)] =
static_cast<float>(mean->AsArray()[static_cast<size_t>(i)].AsNumber(snap->norm_mean[static_cast<size_t>(i)]));
}
use_ms = true;
}
if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) {
for (int i = 0; i < 3; ++i) {
snap->norm_std[static_cast<size_t>(i)] =
static_cast<float>(st->AsArray()[static_cast<size_t>(i)].AsNumber(snap->norm_std[static_cast<size_t>(i)]));
}
use_ms = true;
}
snap->norm_use_mean_std = use_ms;
snap->norm_scale = norm->ValueOr<float>("scale", snap->norm_scale);
snap->norm_bias = norm->ValueOr<float>("bias", snap->norm_bias);
}
if (const SimpleJson* g = config.Find("gallery"); g && g->IsObject()) {
snap->gallery_backend = g->ValueOr<std::string>("backend", snap->gallery_backend);
snap->gallery_path = g->ValueOr<std::string>("path", snap->gallery_path);
snap->gallery_load_on_start = g->ValueOr<bool>("load_on_start", snap->gallery_load_on_start);
snap->gallery_expected_dim = std::max(0, g->ValueOr<int>("expected_dim", snap->gallery_expected_dim));
snap->gallery_dtype = g->ValueOr<std::string>("dtype", snap->gallery_dtype);
}
for (auto& c : snap->gallery_backend) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
for (auto& c : snap->gallery_dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
out = std::move(snap);
return true;
}
} // namespace
class AiFaceRecogNode : public INode {
public:
std::string Id() const override { return id_; }
std::string Type() const override { return "ai_face_recog"; }
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
id_ = config.ValueOr<std::string>("id", "face_recog");
model_path_ = config.ValueOr<std::string>("model_path", "");
std::shared_ptr<const FaceRecogConfigSnapshot> snap;
BuildFaceRecogConfigSnapshot(config, nullptr, snap);
{
std::lock_guard<std::mutex> lock(mu_);
cfg_ = std::move(snap);
gallery_.reset();
}
input_queue_ = ctx.input_queue;
output_queues_ = ctx.output_queues;
if (!input_queue_) {
std::cerr << "[ai_face_recog] no input queue for node " << id_ << "\n";
return false;
}
if (output_queues_.empty()) {
std::cerr << "[ai_face_recog] no output queue for node " << id_ << "\n";
return false;
}
#if defined(RK3588_ENABLE_RKNN)
if (model_path_.empty()) {
std::cerr << "[ai_face_recog] model_path is required\n";
return false;
}
std::string err;
model_handle_ = AiScheduler::Instance().LoadModel(model_path_, err);
if (model_handle_ == kInvalidModelHandle) {
std::cerr << "[ai_face_recog] failed to load model: " << err << "\n";
return false;
}
ModelInfo info;
if (AiScheduler::Instance().GetModelInfo(model_handle_, info)) {
model_w_ = info.input_width;
model_h_ = info.input_height;
}
LogInfo("[ai_face_recog] model loaded: " + model_path_ +
" (" + std::to_string(model_w_) + "x" + std::to_string(model_h_) + ")");
#else
LogWarn("[ai_face_recog] RKNN disabled, will passthrough frames");
#endif
return true;
}
bool Start() override {
std::shared_ptr<const FaceRecogConfigSnapshot> cfg;
{
std::lock_guard<std::mutex> lock(mu_);
cfg = cfg_;
}
if (cfg && cfg->gallery_load_on_start) {
ReloadGallery(*cfg);
}
const bool align = cfg ? cfg->align : false;
const float thr_accept = cfg ? cfg->thr_accept : 0.0f;
const float thr_margin = cfg ? cfg->thr_margin : 0.0f;
LogInfo("[ai_face_recog] start id=" + id_ + " align=" + std::string(align ? "true" : "false") +
" thr_accept=" + std::to_string(thr_accept) + " thr_margin=" + std::to_string(thr_margin));
// ========== TEST: Load aligned image and run inference ==========
#if defined(RK3588_ENABLE_RKNN)
{
const char* test_img_path = "./003_aligned.png";
int img_w = 0, img_h = 0, img_c = 0;
unsigned char* img_data = stbi_load(test_img_path, &img_w, &img_h, &img_c, 3);
if (img_data && img_w == 112 && img_h == 112) {
std::cerr << "[TEST] Loaded " << test_img_path << " (" << img_w << "x" << img_h << "x" << img_c << ")\n";
InferInput in;
in.width = 112;
in.height = 112;
in.is_nhwc = true;
in.data = img_data;
in.size = 112 * 112 * 3;
in.type = RKNN_TENSOR_UINT8;
auto r = AiScheduler::Instance().InferBorrowed(model_handle_, in);
if (r.success && !r.outputs.empty()) {
std::vector<float> emb;
if (DecodeEmbedding(r.outputs[0], emb)) {
L2Normalize(emb);
std::cerr << "[TEST] RKNN embedding[0:8]: ";
for (int i = 0; i < 8 && i < static_cast<int>(emb.size()); ++i) {
std::cerr << emb[i] << " ";
}
std::cerr << "\n";
} else {
std::cerr << "[TEST] DecodeEmbedding failed\n";
}
} else {
std::cerr << "[TEST] Inference failed: " << r.error << "\n";
}
stbi_image_free(img_data);
} else {
if (img_data) stbi_image_free(img_data);
std::cerr << "[TEST] Skip: " << test_img_path << " not found or wrong size\n";
}
}
#endif
// ========== END TEST ==========
return true;
}
bool UpdateConfig(const SimpleJson& new_config) override {
const std::string new_id = new_config.ValueOr<std::string>("id", id_);
if (!new_id.empty() && new_id != id_) return false;
const std::string new_model = new_config.ValueOr<std::string>("model_path", model_path_);
if (new_model != model_path_) {
// Changing model requires graph rebuild.
return false;
}
std::shared_ptr<const FaceRecogConfigSnapshot> base;
{
std::lock_guard<std::mutex> lock(mu_);
base = cfg_;
}
std::shared_ptr<const FaceRecogConfigSnapshot> snap;
BuildFaceRecogConfigSnapshot(new_config, base, snap);
bool reload = false;
if (base && snap) {
reload = (snap->gallery_backend != base->gallery_backend ||
snap->gallery_path != base->gallery_path ||
snap->gallery_expected_dim != base->gallery_expected_dim ||
snap->gallery_dtype != base->gallery_dtype);
}
{
std::lock_guard<std::mutex> lock(mu_);
cfg_ = snap;
}
if (reload && snap) {
ReloadGallery(*snap);
}
return true;
}
void Stop() override {
#if defined(RK3588_ENABLE_RKNN)
if (model_handle_ != kInvalidModelHandle) {
AiScheduler::Instance().UnloadModel(model_handle_);
model_handle_ = kInvalidModelHandle;
}
#endif
LogInfo("[ai_face_recog] stop id=" + id_);
}
NodeStatus Process(FramePtr frame) override {
if (!frame) return NodeStatus::DROP;
#if defined(RK3588_ENABLE_RKNN)
Run(frame);
#endif
Push(frame);
return NodeStatus::OK;
}
private:
void Push(FramePtr frame) {
for (auto& q : output_queues_) q->Push(frame);
}
void ReloadGallery(const FaceRecogConfigSnapshot& cfg) {
if (cfg.gallery_path.empty()) return;
std::string err;
FaceGallery g;
g.SetExpectedDim(cfg.gallery_expected_dim);
g.SetPreferredDtype(cfg.gallery_dtype);
bool ok = false;
if (cfg.gallery_backend == "sqlite") {
ok = g.LoadSqliteBackend(cfg.gallery_path, err);
} else if (cfg.gallery_backend == "file") {
ok = g.LoadFileBackend(cfg.gallery_path, err);
} else {
err = "unknown gallery backend: " + cfg.gallery_backend;
}
if (!ok) {
if (!err.empty()) LogWarn("[ai_face_recog] gallery load failed: " + err);
return;
}
auto sp = std::make_shared<FaceGallery>(std::move(g));
{
std::lock_guard<std::mutex> lock(mu_);
gallery_ = sp;
}
LogInfo("[ai_face_recog] gallery loaded: n=" + std::to_string(sp->Size()) +
" dim=" + std::to_string(sp->Dim()));
}
#if defined(RK3588_ENABLE_RKNN)
void Run(FramePtr frame) {
if (!frame->face_det || frame->face_det->faces.empty()) return;
if (!frame->data || frame->data_size == 0) return;
if (frame->format != PixelFormat::RGB && frame->format != PixelFormat::BGR) {
std::cerr << "[ai_face_recog] input must be RGB/BGR\n";
return;
}
const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
const int w = frame->width;
const int h = frame->height;
const int stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
: (frame->stride > 0 ? frame->stride : w * 3);
if (!src || stride <= 0) return;
std::shared_ptr<const FaceRecogConfigSnapshot> cfg;
std::shared_ptr<const FaceGallery> gallery;
{
std::lock_guard<std::mutex> lock(mu_);
cfg = cfg_;
gallery = gallery_;
}
if (!cfg) return;
const bool need_swap = (frame->format == PixelFormat::BGR && cfg->model_input_format == "rgb") ||
(frame->format == PixelFormat::RGB && cfg->model_input_format == "bgr");
FaceRecogResult rr;
rr.img_w = w;
rr.img_h = h;
rr.model_name = "arcface";
const int limit = std::min<int>(cfg->max_faces, static_cast<int>(frame->face_det->faces.size()));
rr.items.reserve(static_cast<size_t>(limit));
for (int i = 0; i < limit; ++i) {
const FaceDetItem& face = frame->face_det->faces[static_cast<size_t>(i)];
face_buf_.resize(static_cast<size_t>(model_w_) * static_cast<size_t>(model_h_) * 3);
if (cfg->align && face.has_landmarks && model_w_ == 112 && model_h_ == 112) {
const std::array<Point2f, 5> dst = {
Point2f{38.2946f, 51.6963f},
Point2f{73.5318f, 51.5014f},
Point2f{56.0252f, 71.7366f},
Point2f{41.5493f, 92.3655f},
Point2f{70.7299f, 92.2041f},
};
SimilarityTransform t;
InvTransform inv;
if (ComputeSimilarity(face.landmarks, dst, t) && InvertSimilarity(t, inv)) {
WarpFace(src, w, h, stride, inv, face_buf_.data(), model_w_, model_h_, need_swap);
} else {
CropResize(src, w, h, stride, face.bbox, face_buf_.data(), model_w_, model_h_, need_swap);
}
} else {
CropResize(src, w, h, stride, face.bbox, face_buf_.data(), model_w_, model_h_, need_swap);
}
InferInput in;
in.width = model_w_;
in.height = model_h_;
in.is_nhwc = true;
if (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32") {
float_input_buf_.resize(static_cast<size_t>(model_w_) * static_cast<size_t>(model_h_) * 3);
const size_t pix = static_cast<size_t>(model_w_) * static_cast<size_t>(model_h_);
const uint8_t* p = face_buf_.data();
for (size_t ii = 0; ii < pix; ++ii) {
for (int c = 0; c < 3; ++c) {
float x = static_cast<float>(p[ii * 3 + static_cast<size_t>(c)]);
if (cfg->norm_use_mean_std) {
const float st = std::fabs(cfg->norm_std[static_cast<size_t>(c)]) < 1e-6f ? 1.0f
: cfg->norm_std[static_cast<size_t>(c)];
x = (x - cfg->norm_mean[static_cast<size_t>(c)]) / st;
} else {
x = x * cfg->norm_scale + cfg->norm_bias;
}
float_input_buf_[ii * 3 + static_cast<size_t>(c)] = x;
}
}
in.data = float_input_buf_.data();
in.size = float_input_buf_.size() * sizeof(float);
in.type = RKNN_TENSOR_FLOAT32;
} else {
in.data = face_buf_.data();
in.size = face_buf_.size();
in.type = RKNN_TENSOR_UINT8;
}
auto r = AiScheduler::Instance().InferBorrowed(model_handle_, in);
if (!r.success || r.outputs.empty()) {
std::cerr << "[ai_face_recog] inference failed: " << (r.error.empty() ? "unknown" : r.error) << "\n";
continue;
}
std::vector<float> emb;
if (!DecodeEmbedding(r.outputs[0], emb)) {
continue;
}
L2Normalize(emb);
// DEBUG: 打印推理得到的 embedding 前 8 个值
{
std::string dbg = "[ai_face_recog] infer emb[0:8]: ";
for (int di = 0; di < 8 && di < static_cast<int>(emb.size()); ++di) {
dbg += std::to_string(emb[di]) + " ";
}
LogInfo(dbg);
}
FaceGallery::SearchResult sr;
if (gallery && gallery->Size() > 0) {
sr = gallery->SearchTop2(emb);
}
const bool accept = (sr.best_person_id >= 0) &&
(sr.best_sim >= cfg->thr_accept) &&
((cfg->thr_margin <= 0.0f) || ((sr.best_sim - sr.second_sim) >= cfg->thr_margin));
FaceRecogItem item;
item.bbox = face.bbox;
item.has_landmarks = face.has_landmarks;
item.landmarks = face.landmarks;
item.best_person_id = accept ? sr.best_person_id : -1;
item.best_name = accept ? sr.best_name : "unknown";
item.best_sim = sr.best_sim;
item.second_sim = sr.second_sim;
item.unknown = !accept;
if (cfg->emit_embedding) item.embedding = emb;
rr.items.push_back(std::move(item));
}
frame->face_recog = std::make_shared<FaceRecogResult>(std::move(rr));
}
#endif
std::string id_;
std::string model_path_;
mutable std::mutex mu_;
std::shared_ptr<const FaceRecogConfigSnapshot> cfg_;
std::shared_ptr<const FaceGallery> gallery_;
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
std::vector<uint8_t> face_buf_;
std::vector<float> float_input_buf_;
ModelHandle model_handle_ = kInvalidModelHandle;
int model_w_ = 112;
int model_h_ = 112;
};
REGISTER_NODE(AiFaceRecogNode, "ai_face_recog");
} // namespace rk3588