1191 lines
44 KiB
C++
1191 lines
44 KiB
C++
#include <algorithm>
|
|
#include <cctype>
|
|
#include <cmath>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <limits>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "hw/i_infer_backend.h"
|
|
#include "face/face_recog_debug.h"
|
|
#include "face/face_result.h"
|
|
#include "node.h"
|
|
#include "utils/dma_alloc.h"
|
|
#include "utils/logger.h"
|
|
#include "utils/shared_state.h"
|
|
|
|
#if defined(RK3588_ENABLE_SQLITE3)
|
|
#include <sqlite3.h>
|
|
#endif
|
|
|
|
namespace rk3588 {
|
|
|
|
namespace {
|
|
|
|
inline int ClampInt(int v, int lo, int hi) {
|
|
return v < lo ? lo : (v > hi ? hi : v);
|
|
}
|
|
|
|
bool ReadFileToString(const std::string& path, std::string& out) {
|
|
std::ifstream ifs(path, std::ios::binary);
|
|
if (!ifs) return false;
|
|
ifs.seekg(0, std::ios::end);
|
|
std::streamsize sz = ifs.tellg();
|
|
if (sz < 0) return false;
|
|
ifs.seekg(0, std::ios::beg);
|
|
out.resize(static_cast<size_t>(sz));
|
|
if (sz == 0) return true;
|
|
ifs.read(&out[0], sz);
|
|
return ifs.good();
|
|
}
|
|
|
|
inline float IntersectionArea(const Rect& a, const Rect& b) {
|
|
const float left = std::max(a.x, b.x);
|
|
const float top = std::max(a.y, b.y);
|
|
const float right = std::min(a.x + a.w, b.x + b.w);
|
|
const float bottom = std::min(a.y + a.h, b.y + b.h);
|
|
const float w = right - left;
|
|
const float h = bottom - top;
|
|
if (w <= 0.0f || h <= 0.0f) return 0.0f;
|
|
return w * h;
|
|
}
|
|
|
|
inline bool ContainsPoint(const Rect& r, float x, float y) {
|
|
return x >= r.x && y >= r.y && x <= (r.x + r.w) && y <= (r.y + r.h);
|
|
}
|
|
|
|
inline bool IsPersonDetection(const Detection& det, int person_class_id) {
|
|
return det.cls_id == person_class_id;
|
|
}
|
|
|
|
struct FaceTrackAssociationDiag {
|
|
std::string source = "frame_det";
|
|
int total_dets = 0;
|
|
int person_dets = 0;
|
|
int tracked_person_dets = 0;
|
|
int containing_tracked_person_dets = 0;
|
|
int best_track_id = -1;
|
|
float best_overlap = -1.0f;
|
|
};
|
|
|
|
template <typename ObjT, typename ClassFn, typename TrackFn, typename BboxFn>
|
|
int AssociateFaceToPersonTrackImpl(const Rect& face_bbox, const std::vector<ObjT>& objs, int person_class_id,
|
|
FaceTrackAssociationDiag* diag, ClassFn class_fn, TrackFn track_fn,
|
|
BboxFn bbox_fn) {
|
|
const float center_x = face_bbox.x + face_bbox.w * 0.5f;
|
|
const float center_y = face_bbox.y + face_bbox.h * 0.5f;
|
|
|
|
if (diag) *diag = FaceTrackAssociationDiag{};
|
|
if (diag) diag->total_dets = static_cast<int>(objs.size());
|
|
|
|
int best_track_id = -1;
|
|
float best_overlap = -1.0f;
|
|
for (const auto& obj : objs) {
|
|
if (diag && class_fn(obj) == person_class_id) ++diag->person_dets;
|
|
if (class_fn(obj) != person_class_id) continue;
|
|
if (track_fn(obj) < 0) continue;
|
|
if (diag) ++diag->tracked_person_dets;
|
|
const Rect bbox = bbox_fn(obj);
|
|
if (!ContainsPoint(bbox, center_x, center_y)) continue;
|
|
if (diag) ++diag->containing_tracked_person_dets;
|
|
|
|
const float overlap = IntersectionArea(face_bbox, bbox);
|
|
if (overlap > best_overlap) {
|
|
best_overlap = overlap;
|
|
best_track_id = track_fn(obj);
|
|
}
|
|
}
|
|
|
|
if (diag) {
|
|
diag->best_track_id = best_track_id;
|
|
diag->best_overlap = best_overlap;
|
|
}
|
|
return best_track_id;
|
|
}
|
|
|
|
int AssociateFaceToPersonTrack(const Rect& face_bbox, const std::vector<Detection>& dets, int person_class_id,
|
|
FaceTrackAssociationDiag* diag = nullptr) {
|
|
return AssociateFaceToPersonTrackImpl(
|
|
face_bbox, dets, person_class_id, diag, [](const Detection& det) { return det.cls_id; },
|
|
[](const Detection& det) { return det.track_id; }, [](const Detection& det) { return det.bbox; });
|
|
}
|
|
|
|
int AssociateFaceToTrackedObjects(const Rect& face_bbox, const std::vector<TrackedObject>& objs, int person_class_id,
|
|
FaceTrackAssociationDiag* diag = nullptr) {
|
|
return AssociateFaceToPersonTrackImpl(
|
|
face_bbox, objs, person_class_id, diag, [](const TrackedObject& obj) { return obj.cls_id; },
|
|
[](const TrackedObject& obj) { return obj.track_id; }, [](const TrackedObject& obj) { return obj.bbox; });
|
|
}
|
|
|
|
int AssociateFaceToPersonTrackWithFallback(const Rect& face_bbox, const DetectionResult* det_result,
|
|
int person_class_id, const std::string& track_state_key,
|
|
int64_t track_state_max_age_ms,
|
|
FaceTrackAssociationDiag* diag = nullptr) {
|
|
if (det_result) {
|
|
FaceTrackAssociationDiag det_diag;
|
|
const int track_id = AssociateFaceToPersonTrack(face_bbox, det_result->items, person_class_id,
|
|
diag ? &det_diag : nullptr);
|
|
if (diag) {
|
|
*diag = det_diag;
|
|
diag->source = "frame_det";
|
|
}
|
|
if (track_id >= 0) return track_id;
|
|
}
|
|
|
|
if (track_state_key.empty()) return -1;
|
|
|
|
auto snap = SharedState::Instance().GetTargets(track_state_key);
|
|
if (!snap) return -1;
|
|
|
|
if (track_state_max_age_ms > 0) {
|
|
const uint64_t now_us = NowSteadyUs();
|
|
const uint64_t age_us = (now_us > snap->update_steady_us) ? (now_us - snap->update_steady_us) : 0;
|
|
if (age_us > static_cast<uint64_t>(track_state_max_age_ms) * 1000ULL) return -1;
|
|
}
|
|
|
|
FaceTrackAssociationDiag state_diag;
|
|
const int track_id = AssociateFaceToTrackedObjects(face_bbox, snap->objects, person_class_id,
|
|
diag ? &state_diag : nullptr);
|
|
if (diag) {
|
|
*diag = state_diag;
|
|
diag->source = "shared_state";
|
|
}
|
|
return track_id;
|
|
}
|
|
|
|
std::string BuildFaceTrackAssociationDiagLine(const std::string& node_id, uint64_t frame_id, const Rect& face_bbox,
|
|
int person_class_id, const FaceTrackAssociationDiag& diag) {
|
|
std::ostringstream oss;
|
|
oss << "[ai_face_recog] track_assoc"
|
|
<< " id=" << node_id
|
|
<< " frame=" << frame_id
|
|
<< " source=" << diag.source
|
|
<< " person_class_id=" << person_class_id
|
|
<< " face_bbox=(" << static_cast<int>(std::lround(face_bbox.x))
|
|
<< "," << static_cast<int>(std::lround(face_bbox.y))
|
|
<< "," << static_cast<int>(std::lround(face_bbox.w))
|
|
<< "," << static_cast<int>(std::lround(face_bbox.h))
|
|
<< ")"
|
|
<< " dets=" << diag.total_dets
|
|
<< " person_dets=" << diag.person_dets
|
|
<< " tracked_person_dets=" << diag.tracked_person_dets
|
|
<< " containing_tracked_person_dets=" << diag.containing_tracked_person_dets
|
|
<< " best_track_id=" << diag.best_track_id
|
|
<< " best_overlap=" << std::fixed << std::setprecision(1) << diag.best_overlap;
|
|
return oss.str();
|
|
}
|
|
|
|
struct GalleryEntry {
|
|
int person_id = -1;
|
|
std::string name;
|
|
std::vector<float> emb; // L2 normalized
|
|
};
|
|
|
|
inline float HalfToFloat(uint16_t h) {
|
|
const uint32_t sign = (static_cast<uint32_t>(h & 0x8000u)) << 16;
|
|
uint32_t exp = (h & 0x7C00u) >> 10;
|
|
uint32_t mant = (h & 0x03FFu);
|
|
|
|
uint32_t f = 0;
|
|
if (exp == 0) {
|
|
if (mant == 0) {
|
|
f = sign;
|
|
} else {
|
|
exp = 1;
|
|
while ((mant & 0x0400u) == 0) {
|
|
mant <<= 1;
|
|
--exp;
|
|
}
|
|
mant &= 0x03FFu;
|
|
exp = exp + (127 - 15);
|
|
f = sign | (exp << 23) | (mant << 13);
|
|
}
|
|
} else if (exp == 31) {
|
|
f = sign | 0x7F800000u | (mant << 13);
|
|
} else {
|
|
exp = exp + (127 - 15);
|
|
f = sign | (exp << 23) | (mant << 13);
|
|
}
|
|
|
|
float out;
|
|
memcpy(&out, &f, sizeof(out));
|
|
return out;
|
|
}
|
|
|
|
class FaceGallery {
|
|
public:
|
|
void SetExpectedDim(int dim) { expected_dim_ = dim; }
|
|
void SetPreferredDtype(std::string dtype) {
|
|
for (auto& c : dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
|
preferred_dtype_ = std::move(dtype);
|
|
}
|
|
|
|
bool LoadSqliteBackend(const std::string& db_path, std::string& err) {
|
|
#if defined(RK3588_ENABLE_SQLITE3)
|
|
entries_.clear();
|
|
dim_ = 0;
|
|
|
|
sqlite3* db = nullptr;
|
|
if (sqlite3_open_v2(db_path.c_str(), &db, SQLITE_OPEN_READONLY, nullptr) != SQLITE_OK || !db) {
|
|
err = "failed to open sqlite db: " + db_path;
|
|
if (db) sqlite3_close(db);
|
|
return false;
|
|
}
|
|
|
|
const char* sql =
|
|
"SELECT p.id, p.name, e.emb "
|
|
"FROM embedding e JOIN person p ON e.person_id = p.id";
|
|
|
|
sqlite3_stmt* stmt = nullptr;
|
|
if (sqlite3_prepare_v2(db, sql, -1, &stmt, nullptr) != SQLITE_OK || !stmt) {
|
|
err = "sqlite prepare failed";
|
|
if (stmt) sqlite3_finalize(stmt);
|
|
sqlite3_close(db);
|
|
return false;
|
|
}
|
|
|
|
while (sqlite3_step(stmt) == SQLITE_ROW) {
|
|
const int person_id = sqlite3_column_int(stmt, 0);
|
|
const unsigned char* name_u8 = sqlite3_column_text(stmt, 1);
|
|
const void* blob = sqlite3_column_blob(stmt, 2);
|
|
const int blob_sz = sqlite3_column_bytes(stmt, 2);
|
|
if (!blob || blob_sz <= 0) continue;
|
|
|
|
const int expected_dim = expected_dim_;
|
|
int dim = 0;
|
|
enum class BlobType { F16, F32, F64 } blob_type = BlobType::F32;
|
|
|
|
if (expected_dim > 0) {
|
|
if (blob_sz == expected_dim * 4) {
|
|
dim = expected_dim;
|
|
blob_type = BlobType::F32;
|
|
} else if (blob_sz == expected_dim * 2) {
|
|
dim = expected_dim;
|
|
blob_type = BlobType::F16;
|
|
} else if (blob_sz == expected_dim * 8) {
|
|
dim = expected_dim;
|
|
blob_type = BlobType::F64;
|
|
} else {
|
|
continue;
|
|
}
|
|
} else {
|
|
if ((blob_sz % 4) != 0) continue;
|
|
dim = blob_sz / 4;
|
|
blob_type = BlobType::F32;
|
|
}
|
|
|
|
// Optional dtype preference (only affects ambiguous cases when expected_dim==0).
|
|
if (expected_dim <= 0) {
|
|
if (preferred_dtype_ == "f16" && (blob_sz % 2) == 0) {
|
|
blob_type = BlobType::F16;
|
|
dim = blob_sz / 2;
|
|
} else if (preferred_dtype_ == "f64" && (blob_sz % 8) == 0) {
|
|
blob_type = BlobType::F64;
|
|
dim = blob_sz / 8;
|
|
}
|
|
}
|
|
|
|
if (dim_ == 0) dim_ = dim;
|
|
if (dim != dim_) continue;
|
|
|
|
GalleryEntry e;
|
|
e.person_id = person_id;
|
|
e.name = name_u8 ? reinterpret_cast<const char*>(name_u8) : std::string{};
|
|
e.emb.resize(static_cast<size_t>(dim_));
|
|
if (blob_type == BlobType::F32) {
|
|
memcpy(e.emb.data(), blob, static_cast<size_t>(dim_) * sizeof(float));
|
|
} else if (blob_type == BlobType::F16) {
|
|
const uint16_t* hp = reinterpret_cast<const uint16_t*>(blob);
|
|
for (int i = 0; i < dim_; ++i) e.emb[static_cast<size_t>(i)] = HalfToFloat(hp[i]);
|
|
} else {
|
|
const double* dp = reinterpret_cast<const double*>(blob);
|
|
for (int i = 0; i < dim_; ++i) e.emb[static_cast<size_t>(i)] = static_cast<float>(dp[i]);
|
|
}
|
|
L2Normalize(e.emb);
|
|
entries_.push_back(std::move(e));
|
|
}
|
|
|
|
sqlite3_finalize(stmt);
|
|
sqlite3_close(db);
|
|
return true;
|
|
#else
|
|
(void)db_path;
|
|
err = "sqlite3 support not enabled at build time";
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool LoadFileBackend(const std::string& base_path, std::string& err) {
|
|
entries_.clear();
|
|
dim_ = 0;
|
|
|
|
const std::string json_path = base_path + ".json";
|
|
const std::string bin_path = base_path + ".bin";
|
|
|
|
std::string json_text;
|
|
if (!ReadFileToString(json_path, json_text)) {
|
|
err = "failed to read " + json_path;
|
|
return false;
|
|
}
|
|
|
|
SimpleJson root;
|
|
std::string jerr;
|
|
if (!ParseSimpleJson(json_text, root, jerr) || !root.IsObject()) {
|
|
err = "invalid json: " + jerr;
|
|
return false;
|
|
}
|
|
|
|
dim_ = root.ValueOr<int>("dim", 0);
|
|
if (dim_ <= 0) {
|
|
err = "gallery dim missing";
|
|
return false;
|
|
}
|
|
|
|
const SimpleJson* persons = root.Find("persons");
|
|
if (!persons || !persons->IsArray()) {
|
|
err = "gallery persons missing";
|
|
return false;
|
|
}
|
|
|
|
const size_t n = persons->AsArray().size();
|
|
if (n == 0) {
|
|
// Empty gallery is valid.
|
|
return true;
|
|
}
|
|
|
|
std::ifstream ifs(bin_path, std::ios::binary);
|
|
if (!ifs) {
|
|
err = "failed to open " + bin_path;
|
|
return false;
|
|
}
|
|
|
|
const size_t total_floats = n * static_cast<size_t>(dim_);
|
|
std::vector<float> buf(total_floats);
|
|
ifs.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(total_floats * sizeof(float)));
|
|
if (!ifs.good()) {
|
|
err = "failed to read embeddings from " + bin_path;
|
|
return false;
|
|
}
|
|
|
|
entries_.reserve(n);
|
|
for (size_t i = 0; i < n; ++i) {
|
|
const SimpleJson& p = persons->AsArray()[i];
|
|
GalleryEntry e;
|
|
e.person_id = p.ValueOr<int>("id", -1);
|
|
e.name = p.ValueOr<std::string>("name", "");
|
|
e.emb.resize(static_cast<size_t>(dim_));
|
|
memcpy(e.emb.data(), buf.data() + i * static_cast<size_t>(dim_), static_cast<size_t>(dim_) * sizeof(float));
|
|
L2Normalize(e.emb);
|
|
entries_.push_back(std::move(e));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int Dim() const { return dim_; }
|
|
size_t Size() const { return entries_.size(); }
|
|
|
|
struct SearchResult {
|
|
int best_person_id = -1;
|
|
std::string best_name;
|
|
float best_sim = 0.0f;
|
|
float second_sim = 0.0f;
|
|
};
|
|
|
|
SearchResult SearchTop2(const std::vector<float>& emb_normed) const {
|
|
SearchResult r;
|
|
if (entries_.empty() || dim_ <= 0) return r;
|
|
if (static_cast<int>(emb_normed.size()) != dim_) return r;
|
|
|
|
float best = -std::numeric_limits<float>::infinity();
|
|
float second = -std::numeric_limits<float>::infinity();
|
|
int best_idx = -1;
|
|
|
|
for (size_t i = 0; i < entries_.size(); ++i) {
|
|
const float sim = Dot(emb_normed, entries_[i].emb);
|
|
if (sim > best) {
|
|
second = best;
|
|
best = sim;
|
|
best_idx = static_cast<int>(i);
|
|
} else if (sim > second) {
|
|
second = sim;
|
|
}
|
|
}
|
|
|
|
if (best_idx >= 0) {
|
|
r.best_person_id = entries_[static_cast<size_t>(best_idx)].person_id;
|
|
r.best_name = entries_[static_cast<size_t>(best_idx)].name;
|
|
r.best_sim = best;
|
|
r.second_sim = std::isfinite(second) ? second : 0.0f;
|
|
}
|
|
return r;
|
|
}
|
|
|
|
private:
|
|
static float Dot(const std::vector<float>& a, const std::vector<float>& b) {
|
|
float s = 0.0f;
|
|
for (size_t i = 0; i < a.size(); ++i) s += a[i] * b[i];
|
|
return s;
|
|
}
|
|
|
|
static void L2Normalize(std::vector<float>& v) {
|
|
double ss = 0.0;
|
|
for (float x : v) ss += static_cast<double>(x) * static_cast<double>(x);
|
|
const double norm = std::sqrt(ss);
|
|
if (norm <= 0.0) return;
|
|
const float inv = static_cast<float>(1.0 / norm);
|
|
for (float& x : v) x *= inv;
|
|
}
|
|
|
|
int dim_ = 0;
|
|
int expected_dim_ = 512;
|
|
std::string preferred_dtype_ = "auto";
|
|
std::vector<GalleryEntry> entries_;
|
|
};
|
|
|
|
struct SimilarityTransform {
|
|
// x' = a*x - b*y + c
|
|
// y' = b*x + a*y + d
|
|
float a = 1.0f;
|
|
float b = 0.0f;
|
|
float c = 0.0f;
|
|
float d = 0.0f;
|
|
};
|
|
|
|
bool Solve4x4(float A[4][4], float b[4], float x[4]) {
|
|
// Gaussian elimination with partial pivoting.
|
|
for (int i = 0; i < 4; ++i) {
|
|
int pivot = i;
|
|
float best = std::fabs(A[i][i]);
|
|
for (int r = i + 1; r < 4; ++r) {
|
|
float v = std::fabs(A[r][i]);
|
|
if (v > best) {
|
|
best = v;
|
|
pivot = r;
|
|
}
|
|
}
|
|
if (best < 1e-8f) return false;
|
|
if (pivot != i) {
|
|
for (int c = i; c < 4; ++c) std::swap(A[i][c], A[pivot][c]);
|
|
std::swap(b[i], b[pivot]);
|
|
}
|
|
|
|
const float diag = A[i][i];
|
|
for (int c = i; c < 4; ++c) A[i][c] /= diag;
|
|
b[i] /= diag;
|
|
|
|
for (int r = 0; r < 4; ++r) {
|
|
if (r == i) continue;
|
|
const float f = A[r][i];
|
|
if (std::fabs(f) < 1e-8f) continue;
|
|
for (int c = i; c < 4; ++c) A[r][c] -= f * A[i][c];
|
|
b[r] -= f * b[i];
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < 4; ++i) x[i] = b[i];
|
|
return true;
|
|
}
|
|
|
|
bool ComputeSimilarity(const std::array<Point2f, 5>& src,
|
|
const std::array<Point2f, 5>& dst,
|
|
SimilarityTransform& out) {
|
|
// Least squares on similarity model.
|
|
float ATA[4][4] = {};
|
|
float ATb[4] = {};
|
|
|
|
auto Acc = [&](const float row[4], float rhs) {
|
|
for (int i = 0; i < 4; ++i) {
|
|
ATb[i] += row[i] * rhs;
|
|
for (int j = 0; j < 4; ++j) {
|
|
ATA[i][j] += row[i] * row[j];
|
|
}
|
|
}
|
|
};
|
|
|
|
for (int i = 0; i < 5; ++i) {
|
|
const float x = src[static_cast<size_t>(i)].x;
|
|
const float y = src[static_cast<size_t>(i)].y;
|
|
const float u = dst[static_cast<size_t>(i)].x;
|
|
const float v = dst[static_cast<size_t>(i)].y;
|
|
|
|
const float r1[4] = {x, -y, 1.0f, 0.0f};
|
|
const float r2[4] = {y, x, 0.0f, 1.0f};
|
|
Acc(r1, u);
|
|
Acc(r2, v);
|
|
}
|
|
|
|
float A[4][4];
|
|
float b[4];
|
|
for (int i = 0; i < 4; ++i) {
|
|
b[i] = ATb[i];
|
|
for (int j = 0; j < 4; ++j) A[i][j] = ATA[i][j];
|
|
}
|
|
float x[4];
|
|
if (!Solve4x4(A, b, x)) return false;
|
|
out.a = x[0];
|
|
out.b = x[1];
|
|
out.c = x[2];
|
|
out.d = x[3];
|
|
return true;
|
|
}
|
|
|
|
struct InvTransform {
|
|
float m00 = 1.0f, m01 = 0.0f, m02 = 0.0f;
|
|
float m10 = 0.0f, m11 = 1.0f, m12 = 0.0f;
|
|
};
|
|
|
|
bool InvertSimilarity(const SimilarityTransform& t, InvTransform& inv) {
|
|
const float det = t.a * t.a + t.b * t.b;
|
|
if (det < 1e-12f) return false;
|
|
const float inv_det = 1.0f / det;
|
|
inv.m00 = t.a * inv_det;
|
|
inv.m01 = t.b * inv_det;
|
|
inv.m10 = -t.b * inv_det;
|
|
inv.m11 = t.a * inv_det;
|
|
inv.m02 = -(t.a * t.c + t.b * t.d) * inv_det;
|
|
inv.m12 = (t.b * t.c - t.a * t.d) * inv_det;
|
|
return true;
|
|
}
|
|
|
|
inline uint8_t BilinearAt(const uint8_t* src, int w, int h, int stride, float x, float y, int c) {
|
|
if (x < 0.0f || y < 0.0f || x > static_cast<float>(w - 1) || y > static_cast<float>(h - 1)) return 0;
|
|
const int x0 = ClampInt(static_cast<int>(std::floor(x)), 0, w - 1);
|
|
const int y0 = ClampInt(static_cast<int>(std::floor(y)), 0, h - 1);
|
|
const int x1 = ClampInt(x0 + 1, 0, w - 1);
|
|
const int y1 = ClampInt(y0 + 1, 0, h - 1);
|
|
const float wx = x - static_cast<float>(x0);
|
|
const float wy = y - static_cast<float>(y0);
|
|
const float w00 = (1.0f - wx) * (1.0f - wy);
|
|
const float w01 = wx * (1.0f - wy);
|
|
const float w10 = (1.0f - wx) * wy;
|
|
const float w11 = wx * wy;
|
|
|
|
const uint8_t* p00 = src + y0 * stride + x0 * 3;
|
|
const uint8_t* p01 = src + y0 * stride + x1 * 3;
|
|
const uint8_t* p10 = src + y1 * stride + x0 * 3;
|
|
const uint8_t* p11 = src + y1 * stride + x1 * 3;
|
|
|
|
const float v =
|
|
static_cast<float>(p00[c]) * w00 +
|
|
static_cast<float>(p01[c]) * w01 +
|
|
static_cast<float>(p10[c]) * w10 +
|
|
static_cast<float>(p11[c]) * w11;
|
|
return static_cast<uint8_t>(ClampInt(static_cast<int>(v + 0.5f), 0, 255));
|
|
}
|
|
|
|
void WarpFace(const uint8_t* src, int w, int h, int stride,
|
|
const InvTransform& inv, uint8_t* dst, int dst_w, int dst_h, bool swap_rb) {
|
|
for (int y = 0; y < dst_h; ++y) {
|
|
uint8_t* row = dst + static_cast<size_t>(y) * static_cast<size_t>(dst_w) * 3;
|
|
for (int x = 0; x < dst_w; ++x) {
|
|
const float xs = inv.m00 * static_cast<float>(x) + inv.m01 * static_cast<float>(y) + inv.m02;
|
|
const float ys = inv.m10 * static_cast<float>(x) + inv.m11 * static_cast<float>(y) + inv.m12;
|
|
uint8_t r = BilinearAt(src, w, h, stride, xs, ys, 0);
|
|
uint8_t g = BilinearAt(src, w, h, stride, xs, ys, 1);
|
|
uint8_t b = BilinearAt(src, w, h, stride, xs, ys, 2);
|
|
if (swap_rb) std::swap(r, b);
|
|
row[0] = r;
|
|
row[1] = g;
|
|
row[2] = b;
|
|
row += 3;
|
|
}
|
|
}
|
|
}
|
|
|
|
void CropResize(const uint8_t* src, int w, int h, int stride,
|
|
const Rect& bbox, uint8_t* dst, int dst_w, int dst_h, bool swap_rb) {
|
|
const float x0 = bbox.x;
|
|
const float y0 = bbox.y;
|
|
const float bw = std::max(1.0f, bbox.w);
|
|
const float bh = std::max(1.0f, bbox.h);
|
|
for (int y = 0; y < dst_h; ++y) {
|
|
uint8_t* row = dst + static_cast<size_t>(y) * static_cast<size_t>(dst_w) * 3;
|
|
const float sy = y0 + (static_cast<float>(y) + 0.5f) * (bh / static_cast<float>(dst_h)) - 0.5f;
|
|
for (int x = 0; x < dst_w; ++x) {
|
|
const float sx = x0 + (static_cast<float>(x) + 0.5f) * (bw / static_cast<float>(dst_w)) - 0.5f;
|
|
uint8_t r = BilinearAt(src, w, h, stride, sx, sy, 0);
|
|
uint8_t g = BilinearAt(src, w, h, stride, sx, sy, 1);
|
|
uint8_t b = BilinearAt(src, w, h, stride, sx, sy, 2);
|
|
if (swap_rb) std::swap(r, b);
|
|
row[0] = r;
|
|
row[1] = g;
|
|
row[2] = b;
|
|
row += 3;
|
|
}
|
|
}
|
|
}
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
template <typename T>
|
|
inline float Dequant(T q, int32_t zp, float scale) {
|
|
return (static_cast<float>(q) - static_cast<float>(zp)) * scale;
|
|
}
|
|
|
|
bool DecodeEmbedding(const AiScheduler::BorrowedOutput& o, std::vector<float>& emb) {
|
|
if (!o.data || o.size == 0) return false;
|
|
|
|
size_t elem_size = 1;
|
|
bool is_float = false;
|
|
bool is_float16 = false;
|
|
if (o.type == RKNN_TENSOR_FLOAT16) {
|
|
elem_size = 2;
|
|
is_float16 = true;
|
|
}
|
|
if (o.type == RKNN_TENSOR_FLOAT32) {
|
|
elem_size = 4;
|
|
is_float = true;
|
|
}
|
|
const size_t elem_cnt = elem_size > 0 ? (o.size / elem_size) : 0;
|
|
if (elem_cnt == 0) return false;
|
|
|
|
emb.resize(elem_cnt);
|
|
if (is_float) {
|
|
const float* fp = reinterpret_cast<const float*>(o.data);
|
|
for (size_t i = 0; i < elem_cnt; ++i) emb[i] = fp[i];
|
|
return true;
|
|
}
|
|
|
|
if (is_float16) {
|
|
const uint16_t* hp = reinterpret_cast<const uint16_t*>(o.data);
|
|
for (size_t i = 0; i < elem_cnt; ++i) emb[i] = HalfToFloat(hp[i]);
|
|
return true;
|
|
}
|
|
|
|
if (o.type == RKNN_TENSOR_INT8) {
|
|
const int8_t* p = reinterpret_cast<const int8_t*>(o.data);
|
|
for (size_t i = 0; i < elem_cnt; ++i) emb[i] = Dequant(p[i], o.zp, o.scale);
|
|
return true;
|
|
}
|
|
|
|
const uint8_t* p = reinterpret_cast<const uint8_t*>(o.data);
|
|
for (size_t i = 0; i < elem_cnt; ++i) emb[i] = Dequant(p[i], o.zp, o.scale);
|
|
return true;
|
|
}
|
|
|
|
void L2Normalize(std::vector<float>& v) {
|
|
double ss = 0.0;
|
|
for (float x : v) ss += static_cast<double>(x) * static_cast<double>(x);
|
|
const double norm = std::sqrt(ss);
|
|
if (norm <= 0.0) return;
|
|
const float inv = static_cast<float>(1.0 / norm);
|
|
for (float& x : v) x *= inv;
|
|
}
|
|
#else
|
|
bool DecodeEmbedding(const AiScheduler::BorrowedOutput& /*o*/, std::vector<float>& /*emb*/) {
|
|
return false;
|
|
}
|
|
|
|
void L2Normalize(std::vector<float>& /*v*/) {}
|
|
#endif
|
|
|
|
struct FaceRecogConfigSnapshot {
|
|
bool align = true;
|
|
bool emit_embedding = false;
|
|
int max_faces = 10;
|
|
|
|
float thr_accept = 0.45f;
|
|
float thr_margin = 0.05f;
|
|
int person_class_id = 0;
|
|
std::string track_state_key;
|
|
int64_t track_state_max_age_ms = 1000;
|
|
|
|
std::string model_input_format = "rgb";
|
|
std::string input_dtype = "uint8";
|
|
|
|
float norm_scale = 1.0f;
|
|
float norm_bias = 0.0f;
|
|
bool norm_use_mean_std = false;
|
|
std::array<float, 3> norm_mean{{0.0f, 0.0f, 0.0f}};
|
|
std::array<float, 3> norm_std{{1.0f, 1.0f, 1.0f}};
|
|
|
|
std::string gallery_backend = "sqlite";
|
|
std::string gallery_path = "./models/face_gallery.db";
|
|
bool gallery_load_on_start = true;
|
|
int gallery_expected_dim = 512;
|
|
std::string gallery_dtype = "auto";
|
|
int gallery_reload_seq = 0;
|
|
|
|
FaceRecogDebugConfig debug;
|
|
};
|
|
|
|
static bool BuildFaceRecogConfigSnapshot(const SimpleJson& config,
|
|
const std::shared_ptr<const FaceRecogConfigSnapshot>& base,
|
|
std::shared_ptr<const FaceRecogConfigSnapshot>& out) {
|
|
auto snap = std::make_shared<FaceRecogConfigSnapshot>();
|
|
if (base) *snap = *base;
|
|
|
|
snap->align = config.ValueOr<bool>("align", snap->align);
|
|
snap->emit_embedding = config.ValueOr<bool>("emit_embedding", snap->emit_embedding);
|
|
snap->max_faces = std::max(1, config.ValueOr<int>("max_faces", snap->max_faces));
|
|
snap->person_class_id = config.ValueOr<int>("person_class_id", snap->person_class_id);
|
|
snap->track_state_key = config.ValueOr<std::string>("track_state_key", snap->track_state_key);
|
|
snap->track_state_max_age_ms =
|
|
std::max<int64_t>(0, static_cast<int64_t>(config.ValueOr<int>("track_state_max_age_ms",
|
|
static_cast<int>(snap->track_state_max_age_ms))));
|
|
|
|
if (const SimpleJson* th = config.Find("threshold"); th && th->IsObject()) {
|
|
snap->thr_accept = th->ValueOr<float>("accept", snap->thr_accept);
|
|
snap->thr_margin = th->ValueOr<float>("margin", snap->thr_margin);
|
|
}
|
|
|
|
{
|
|
std::string fmt = config.ValueOr<std::string>("input_format", snap->model_input_format);
|
|
for (auto& c : fmt) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
|
snap->model_input_format = std::move(fmt);
|
|
}
|
|
{
|
|
std::string dtype = config.ValueOr<std::string>("input_dtype", snap->input_dtype);
|
|
for (auto& c : dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
|
snap->input_dtype = std::move(dtype);
|
|
}
|
|
|
|
if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) {
|
|
bool use_ms = false;
|
|
if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) {
|
|
for (int i = 0; i < 3; ++i) {
|
|
snap->norm_mean[static_cast<size_t>(i)] =
|
|
static_cast<float>(mean->AsArray()[static_cast<size_t>(i)].AsNumber(snap->norm_mean[static_cast<size_t>(i)]));
|
|
}
|
|
use_ms = true;
|
|
}
|
|
if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) {
|
|
for (int i = 0; i < 3; ++i) {
|
|
snap->norm_std[static_cast<size_t>(i)] =
|
|
static_cast<float>(st->AsArray()[static_cast<size_t>(i)].AsNumber(snap->norm_std[static_cast<size_t>(i)]));
|
|
}
|
|
use_ms = true;
|
|
}
|
|
snap->norm_use_mean_std = use_ms;
|
|
snap->norm_scale = norm->ValueOr<float>("scale", snap->norm_scale);
|
|
snap->norm_bias = norm->ValueOr<float>("bias", snap->norm_bias);
|
|
}
|
|
|
|
if (const SimpleJson* g = config.Find("gallery"); g && g->IsObject()) {
|
|
snap->gallery_backend = g->ValueOr<std::string>("backend", snap->gallery_backend);
|
|
snap->gallery_path = g->ValueOr<std::string>("path", snap->gallery_path);
|
|
snap->gallery_load_on_start = g->ValueOr<bool>("load_on_start", snap->gallery_load_on_start);
|
|
snap->gallery_expected_dim = std::max(0, g->ValueOr<int>("expected_dim", snap->gallery_expected_dim));
|
|
snap->gallery_dtype = g->ValueOr<std::string>("dtype", snap->gallery_dtype);
|
|
snap->gallery_reload_seq = g->ValueOr<int>("reload_seq", snap->gallery_reload_seq);
|
|
}
|
|
snap->debug = ParseFaceRecogDebugConfig(config, snap->debug);
|
|
for (auto& c : snap->gallery_backend) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
|
for (auto& c : snap->gallery_dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
|
|
|
out = std::move(snap);
|
|
return true;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
class AiFaceRecogNode : public INode {
|
|
public:
|
|
std::string Id() const override { return id_; }
|
|
std::string Type() const override { return "ai_face_recog"; }
|
|
|
|
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
|
|
id_ = config.ValueOr<std::string>("id", "face_recog");
|
|
model_path_ = config.ValueOr<std::string>("model_path", "");
|
|
infer_interval_ms_ = std::max<int64_t>(
|
|
0, static_cast<int64_t>(config.ValueOr<int>("infer_interval_ms", 0)));
|
|
if (infer_interval_ms_ <= 0) {
|
|
const double infer_fps = config.ValueOr<double>("infer_fps", 0.0);
|
|
if (infer_fps > 0.0) {
|
|
infer_interval_ms_ = static_cast<int64_t>(1000.0 / infer_fps);
|
|
if (infer_interval_ms_ < 1) infer_interval_ms_ = 1;
|
|
}
|
|
}
|
|
infer_phase_ms_ = std::max<int64_t>(
|
|
0, static_cast<int64_t>(config.ValueOr<int>("infer_phase_ms", 0)));
|
|
if (infer_interval_ms_ > 0 && infer_phase_ms_ >= infer_interval_ms_) {
|
|
infer_phase_ms_ %= infer_interval_ms_;
|
|
}
|
|
std::shared_ptr<const FaceRecogConfigSnapshot> snap;
|
|
BuildFaceRecogConfigSnapshot(config, nullptr, snap);
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
cfg_ = std::move(snap);
|
|
gallery_.reset();
|
|
}
|
|
|
|
input_queue_ = ctx.input_queue;
|
|
output_queues_ = ctx.output_queues;
|
|
if (!input_queue_) {
|
|
LogError("[ai_face_recog] no input queue for node " + id_);
|
|
return false;
|
|
}
|
|
if (output_queues_.empty()) {
|
|
LogError("[ai_face_recog] no output queue for node " + id_);
|
|
return false;
|
|
}
|
|
|
|
infer_backend_ = ctx.infer_backend;
|
|
if (!infer_backend_) {
|
|
LogError("[ai_face_recog] no infer backend for node " + id_);
|
|
return false;
|
|
}
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (model_path_.empty()) {
|
|
LogError("[ai_face_recog] model_path is required");
|
|
return false;
|
|
}
|
|
std::string err;
|
|
model_handle_ = infer_backend_->LoadModel(model_path_, err);
|
|
if (model_handle_ == kInvalidModelHandle) {
|
|
LogError("[ai_face_recog] failed to load model: " + err);
|
|
return false;
|
|
}
|
|
ModelInfo info;
|
|
if (infer_backend_->GetModelInfo(model_handle_, info)) {
|
|
model_w_ = info.input_width;
|
|
model_h_ = info.input_height;
|
|
}
|
|
LogInfo("[ai_face_recog] model loaded: " + model_path_ +
|
|
" (" + std::to_string(model_w_) + "x" + std::to_string(model_h_) + ")");
|
|
#else
|
|
LogWarn("[ai_face_recog] RKNN disabled, will passthrough frames");
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
bool Start() override {
|
|
std::shared_ptr<const FaceRecogConfigSnapshot> cfg;
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
cfg = cfg_;
|
|
}
|
|
if (cfg && cfg->gallery_load_on_start) {
|
|
ReloadGallery(*cfg);
|
|
}
|
|
const bool align = cfg ? cfg->align : false;
|
|
const float thr_accept = cfg ? cfg->thr_accept : 0.0f;
|
|
const float thr_margin = cfg ? cfg->thr_margin : 0.0f;
|
|
const bool debug_enabled = cfg ? cfg->debug.enabled : false;
|
|
const bool debug_log_matches = cfg ? cfg->debug.log_matches : false;
|
|
const int debug_interval_ms = cfg ? cfg->debug.min_log_interval_ms : 0;
|
|
LogInfo("[ai_face_recog] start id=" + id_ + " align=" + std::string(align ? "true" : "false") +
|
|
" thr_accept=" + std::to_string(thr_accept) +
|
|
" thr_margin=" + std::to_string(thr_margin) +
|
|
" infer_interval_ms=" + std::to_string(infer_interval_ms_) +
|
|
" debug=" + std::string(debug_enabled ? "true" : "false") +
|
|
" debug_log_matches=" + std::string(debug_log_matches ? "true" : "false") +
|
|
" debug_min_log_interval_ms=" + std::to_string(debug_interval_ms));
|
|
return true;
|
|
}
|
|
|
|
bool UpdateConfig(const SimpleJson& new_config) override {
|
|
const std::string new_id = new_config.ValueOr<std::string>("id", id_);
|
|
if (!new_id.empty() && new_id != id_) return false;
|
|
|
|
const std::string new_model = new_config.ValueOr<std::string>("model_path", model_path_);
|
|
if (new_model != model_path_) {
|
|
// Changing model requires graph rebuild.
|
|
return false;
|
|
}
|
|
|
|
std::shared_ptr<const FaceRecogConfigSnapshot> base;
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
base = cfg_;
|
|
}
|
|
|
|
std::shared_ptr<const FaceRecogConfigSnapshot> snap;
|
|
BuildFaceRecogConfigSnapshot(new_config, base, snap);
|
|
|
|
bool reload = false;
|
|
if (base && snap) {
|
|
reload = (snap->gallery_backend != base->gallery_backend ||
|
|
snap->gallery_path != base->gallery_path ||
|
|
snap->gallery_expected_dim != base->gallery_expected_dim ||
|
|
snap->gallery_dtype != base->gallery_dtype ||
|
|
snap->gallery_reload_seq != base->gallery_reload_seq);
|
|
}
|
|
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
cfg_ = snap;
|
|
}
|
|
|
|
if (reload && snap) {
|
|
ReloadGallery(*snap);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Stop() override {
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (model_handle_ != kInvalidModelHandle) {
|
|
infer_backend_->UnloadModel(model_handle_);
|
|
model_handle_ = kInvalidModelHandle;
|
|
}
|
|
#endif
|
|
LogInfo("[ai_face_recog] stop id=" + id_);
|
|
}
|
|
|
|
NodeStatus Process(FramePtr frame) override {
|
|
if (!frame) return NodeStatus::DROP;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (infer_interval_ms_ > 0 && frame->pts > 0) {
|
|
const int64_t pts_ms = static_cast<int64_t>(frame->pts / 1000ULL);
|
|
const int64_t effective_pts_ms = pts_ms + infer_phase_ms_;
|
|
const int64_t delta_ms = effective_pts_ms - last_infer_pts_ms_;
|
|
if (last_infer_pts_ms_ > 0 && delta_ms > 0 && delta_ms < infer_interval_ms_) {
|
|
Push(frame);
|
|
return NodeStatus::OK;
|
|
}
|
|
last_infer_pts_ms_ = effective_pts_ms;
|
|
}
|
|
Run(frame);
|
|
#endif
|
|
Push(frame);
|
|
return NodeStatus::OK;
|
|
}
|
|
|
|
private:
|
|
void Push(FramePtr frame) {
|
|
for (auto& q : output_queues_) q->Push(frame);
|
|
}
|
|
|
|
void ReloadGallery(const FaceRecogConfigSnapshot& cfg) {
|
|
if (cfg.gallery_path.empty()) return;
|
|
|
|
std::string err;
|
|
FaceGallery g;
|
|
g.SetExpectedDim(cfg.gallery_expected_dim);
|
|
g.SetPreferredDtype(cfg.gallery_dtype);
|
|
bool ok = false;
|
|
|
|
if (cfg.gallery_backend == "sqlite") {
|
|
ok = g.LoadSqliteBackend(cfg.gallery_path, err);
|
|
} else if (cfg.gallery_backend == "file") {
|
|
ok = g.LoadFileBackend(cfg.gallery_path, err);
|
|
} else {
|
|
err = "unknown gallery backend: " + cfg.gallery_backend;
|
|
}
|
|
|
|
if (!ok) {
|
|
if (!err.empty()) LogWarn("[ai_face_recog] gallery load failed: " + err);
|
|
return;
|
|
}
|
|
|
|
auto sp = std::make_shared<FaceGallery>(std::move(g));
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
gallery_ = sp;
|
|
}
|
|
LogInfo("[ai_face_recog] gallery loaded: n=" + std::to_string(sp->Size()) +
|
|
" dim=" + std::to_string(sp->Dim()));
|
|
}
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
void Run(FramePtr frame) {
|
|
if (!frame->face_det || frame->face_det->faces.empty()) return;
|
|
if (!frame->data || frame->data_size == 0) return;
|
|
if (frame->format != PixelFormat::RGB && frame->format != PixelFormat::BGR) {
|
|
LogWarn("[ai_face_recog] input must be RGB/BGR");
|
|
return;
|
|
}
|
|
|
|
const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
|
|
const int w = frame->width;
|
|
const int h = frame->height;
|
|
const int stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
|
|
: (frame->stride > 0 ? frame->stride : w * 3);
|
|
if (!src || stride <= 0) return;
|
|
|
|
std::shared_ptr<const FaceRecogConfigSnapshot> cfg;
|
|
std::shared_ptr<const FaceGallery> gallery;
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
cfg = cfg_;
|
|
gallery = gallery_;
|
|
}
|
|
if (!cfg) return;
|
|
|
|
const bool need_swap = (frame->format == PixelFormat::BGR && cfg->model_input_format == "rgb") ||
|
|
(frame->format == PixelFormat::RGB && cfg->model_input_format == "bgr");
|
|
|
|
const bool sync_src = (frame->DmaFd() >= 0);
|
|
if (sync_src) frame->SyncStart();
|
|
|
|
FaceRecogResult rr;
|
|
rr.img_w = w;
|
|
rr.img_h = h;
|
|
rr.model_name = "arcface";
|
|
|
|
const int limit = std::min<int>(cfg->max_faces, static_cast<int>(frame->face_det->faces.size()));
|
|
rr.items.reserve(static_cast<size_t>(limit));
|
|
|
|
for (int i = 0; i < limit; ++i) {
|
|
const FaceDetItem& face = frame->face_det->faces[static_cast<size_t>(i)];
|
|
FaceTrackAssociationDiag assoc_diag;
|
|
|
|
face_buf_.resize(static_cast<size_t>(model_w_) * static_cast<size_t>(model_h_) * 3);
|
|
if (cfg->align && face.has_landmarks && model_w_ == 112 && model_h_ == 112) {
|
|
const std::array<Point2f, 5> dst = {
|
|
Point2f{38.2946f, 51.6963f},
|
|
Point2f{73.5318f, 51.5014f},
|
|
Point2f{56.0252f, 71.7366f},
|
|
Point2f{41.5493f, 92.3655f},
|
|
Point2f{70.7299f, 92.2041f},
|
|
};
|
|
SimilarityTransform t;
|
|
InvTransform inv;
|
|
if (ComputeSimilarity(face.landmarks, dst, t) && InvertSimilarity(t, inv)) {
|
|
WarpFace(src, w, h, stride, inv, face_buf_.data(), model_w_, model_h_, need_swap);
|
|
} else {
|
|
CropResize(src, w, h, stride, face.bbox, face_buf_.data(), model_w_, model_h_, need_swap);
|
|
}
|
|
} else {
|
|
CropResize(src, w, h, stride, face.bbox, face_buf_.data(), model_w_, model_h_, need_swap);
|
|
}
|
|
|
|
InferInput in;
|
|
in.width = model_w_;
|
|
in.height = model_h_;
|
|
in.is_nhwc = true;
|
|
|
|
if (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32") {
|
|
float_input_buf_.resize(static_cast<size_t>(model_w_) * static_cast<size_t>(model_h_) * 3);
|
|
const size_t pix = static_cast<size_t>(model_w_) * static_cast<size_t>(model_h_);
|
|
const uint8_t* p = face_buf_.data();
|
|
for (size_t ii = 0; ii < pix; ++ii) {
|
|
for (int c = 0; c < 3; ++c) {
|
|
float x = static_cast<float>(p[ii * 3 + static_cast<size_t>(c)]);
|
|
if (cfg->norm_use_mean_std) {
|
|
const float st = std::fabs(cfg->norm_std[static_cast<size_t>(c)]) < 1e-6f ? 1.0f
|
|
: cfg->norm_std[static_cast<size_t>(c)];
|
|
x = (x - cfg->norm_mean[static_cast<size_t>(c)]) / st;
|
|
} else {
|
|
x = x * cfg->norm_scale + cfg->norm_bias;
|
|
}
|
|
float_input_buf_[ii * 3 + static_cast<size_t>(c)] = x;
|
|
}
|
|
}
|
|
|
|
in.data = float_input_buf_.data();
|
|
in.size = float_input_buf_.size() * sizeof(float);
|
|
in.type = RKNN_TENSOR_FLOAT32;
|
|
} else {
|
|
in.data = face_buf_.data();
|
|
in.size = face_buf_.size();
|
|
in.type = RKNN_TENSOR_UINT8;
|
|
}
|
|
|
|
auto r = infer_backend_->InferBorrowed(model_handle_, in);
|
|
if (!r.success || r.outputs.empty()) {
|
|
LogWarn(std::string("[ai_face_recog] inference failed: ") + (r.error.empty() ? "unknown" : r.error));
|
|
continue;
|
|
}
|
|
|
|
std::vector<float> emb;
|
|
if (!DecodeEmbedding(r.outputs[0], emb)) {
|
|
continue;
|
|
}
|
|
L2Normalize(emb);
|
|
|
|
FaceGallery::SearchResult sr;
|
|
if (gallery && gallery->Size() > 0) {
|
|
sr = gallery->SearchTop2(emb);
|
|
}
|
|
|
|
const bool accept = (sr.best_person_id >= 0) &&
|
|
(sr.best_sim >= cfg->thr_accept) &&
|
|
((cfg->thr_margin <= 0.0f) || ((sr.best_sim - sr.second_sim) >= cfg->thr_margin));
|
|
|
|
FaceRecogItem item;
|
|
item.bbox = face.bbox;
|
|
item.person_track_id = AssociateFaceToPersonTrackWithFallback(
|
|
face.bbox, frame->det.get(), cfg->person_class_id, cfg->track_state_key,
|
|
cfg->track_state_max_age_ms, &assoc_diag);
|
|
item.has_landmarks = face.has_landmarks;
|
|
item.landmarks = face.landmarks;
|
|
|
|
item.candidate_person_id = sr.best_person_id;
|
|
item.candidate_name = sr.best_name;
|
|
item.best_person_id = accept ? sr.best_person_id : -1;
|
|
item.best_name = accept ? sr.best_name : "";
|
|
item.best_sim = sr.best_sim;
|
|
item.second_sim = sr.second_sim;
|
|
item.state = accept ? FaceRecogState::Known : FaceRecogState::Uncertain;
|
|
|
|
if (cfg->emit_embedding) item.embedding = emb;
|
|
rr.items.push_back(std::move(item));
|
|
|
|
if (cfg->debug.enabled && cfg->debug.log_matches) {
|
|
LogInfo(BuildFaceTrackAssociationDiagLine(id_, frame->frame_id, face.bbox,
|
|
cfg->person_class_id, assoc_diag));
|
|
}
|
|
}
|
|
|
|
if (sync_src) frame->SyncEnd();
|
|
|
|
frame->face_recog = std::make_shared<FaceRecogResult>(std::move(rr));
|
|
MaybeLogDebugFrame(*cfg, frame->frame_id, frame->pts,
|
|
frame->face_det ? frame->face_det->faces.size() : 0,
|
|
frame->face_recog ? frame->face_recog->items.size() : 0);
|
|
if (cfg->debug.enabled && cfg->debug.log_matches && frame->face_recog) {
|
|
for (const auto& item : frame->face_recog->items) {
|
|
LogInfo(BuildFaceRecogDebugSummaryLine(id_, frame->frame_id, item));
|
|
}
|
|
}
|
|
}
|
|
|
|
void MaybeLogDebugFrame(const FaceRecogConfigSnapshot& cfg, uint64_t frame_id, uint64_t pts_us,
|
|
size_t faces_in, size_t recog_items) {
|
|
if (!cfg.debug.enabled) return;
|
|
if (!ShouldEmitDebugLog(cfg, frame_id, pts_us)) return;
|
|
LogInfo(BuildFaceRecogDebugFrameLine(id_, frame_id, faces_in, recog_items));
|
|
}
|
|
|
|
bool ShouldEmitDebugLog(const FaceRecogConfigSnapshot& cfg, uint64_t frame_id, uint64_t pts_us) {
|
|
if (cfg.debug.min_log_interval_ms <= 0) return true;
|
|
const int64_t now_key_ms = pts_us > 0
|
|
? static_cast<int64_t>(pts_us / 1000ULL)
|
|
: static_cast<int64_t>(frame_id);
|
|
if (now_key_ms <= 0) return true;
|
|
if (last_debug_log_pts_ms_ > 0 &&
|
|
(now_key_ms - last_debug_log_pts_ms_) < cfg.debug.min_log_interval_ms) {
|
|
return false;
|
|
}
|
|
last_debug_log_pts_ms_ = now_key_ms;
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
std::string id_;
|
|
std::string model_path_;
|
|
|
|
mutable std::mutex mu_;
|
|
std::shared_ptr<const FaceRecogConfigSnapshot> cfg_;
|
|
std::shared_ptr<const FaceGallery> gallery_;
|
|
|
|
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
|
|
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
|
std::shared_ptr<IInferBackend> infer_backend_;
|
|
|
|
std::vector<uint8_t> face_buf_;
|
|
std::vector<float> float_input_buf_;
|
|
|
|
ModelHandle model_handle_ = kInvalidModelHandle;
|
|
int model_w_ = 112;
|
|
int model_h_ = 112;
|
|
int64_t infer_interval_ms_ = 0;
|
|
int64_t infer_phase_ms_ = 0;
|
|
int64_t last_infer_pts_ms_ = 0;
|
|
int64_t last_debug_log_pts_ms_ = std::numeric_limits<int64_t>::min();
|
|
};
|
|
|
|
REGISTER_NODE(AiFaceRecogNode, "ai_face_recog");
|
|
|
|
} // namespace rk3588
|