优化程序,下一步测试人脸识别和检测模型rknn
This commit is contained in:
parent
8bb5b51574
commit
5d8c0255ca
@ -7,6 +7,7 @@
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -31,6 +32,104 @@ struct Prior {
|
||||
float h = 0.0f;
|
||||
};
|
||||
|
||||
struct FaceDetConfigSnapshot {
|
||||
float conf_thresh = 0.6f;
|
||||
float nms_thresh = 0.4f;
|
||||
int max_faces = 10;
|
||||
bool output_landmarks = true;
|
||||
|
||||
std::string input_format = "rgb";
|
||||
std::string input_dtype = "uint8";
|
||||
|
||||
float norm_scale = 1.0f;
|
||||
float norm_bias = 0.0f;
|
||||
bool norm_use_mean_std = false;
|
||||
std::array<float, 3> norm_mean{{0.0f, 0.0f, 0.0f}};
|
||||
std::array<float, 3> norm_std{{1.0f, 1.0f, 1.0f}};
|
||||
|
||||
// RetinaFace priors defaults for 320 input (MobileNet0.25).
|
||||
std::vector<int> steps{8, 16, 32};
|
||||
std::vector<std::vector<int>> min_sizes{{16, 32}, {64, 128}, {256, 512}};
|
||||
};
|
||||
|
||||
static bool BuildFaceDetConfigSnapshot(const SimpleJson& config,
|
||||
const std::shared_ptr<const FaceDetConfigSnapshot>& base,
|
||||
std::shared_ptr<const FaceDetConfigSnapshot>& out) {
|
||||
auto snap = std::make_shared<FaceDetConfigSnapshot>();
|
||||
if (base) *snap = *base;
|
||||
|
||||
snap->conf_thresh = config.ValueOr<float>("conf", snap->conf_thresh);
|
||||
snap->nms_thresh = config.ValueOr<float>("nms", snap->nms_thresh);
|
||||
snap->max_faces = std::max(1, config.ValueOr<int>("max_faces", snap->max_faces));
|
||||
snap->output_landmarks = config.ValueOr<bool>("output_landmarks", snap->output_landmarks);
|
||||
|
||||
{
|
||||
std::string fmt = config.ValueOr<std::string>("input_format", snap->input_format);
|
||||
for (auto& c : fmt) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
snap->input_format = std::move(fmt);
|
||||
}
|
||||
{
|
||||
std::string dtype = config.ValueOr<std::string>("input_dtype", snap->input_dtype);
|
||||
for (auto& c : dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
snap->input_dtype = std::move(dtype);
|
||||
}
|
||||
|
||||
if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) {
|
||||
bool use_ms = false;
|
||||
if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
snap->norm_mean[static_cast<size_t>(i)] =
|
||||
static_cast<float>(mean->AsArray()[static_cast<size_t>(i)].AsNumber(snap->norm_mean[static_cast<size_t>(i)]));
|
||||
}
|
||||
use_ms = true;
|
||||
}
|
||||
if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
snap->norm_std[static_cast<size_t>(i)] =
|
||||
static_cast<float>(st->AsArray()[static_cast<size_t>(i)].AsNumber(snap->norm_std[static_cast<size_t>(i)]));
|
||||
}
|
||||
use_ms = true;
|
||||
}
|
||||
snap->norm_use_mean_std = use_ms;
|
||||
snap->norm_scale = norm->ValueOr<float>("scale", snap->norm_scale);
|
||||
snap->norm_bias = norm->ValueOr<float>("bias", snap->norm_bias);
|
||||
}
|
||||
|
||||
if (const SimpleJson* pri = config.Find("prior"); pri && pri->IsObject()) {
|
||||
std::vector<int> new_steps = snap->steps;
|
||||
std::vector<std::vector<int>> new_mins = snap->min_sizes;
|
||||
|
||||
if (const SimpleJson* steps = pri->Find("steps"); steps && steps->IsArray()) {
|
||||
std::vector<int> tmp;
|
||||
for (const auto& v : steps->AsArray()) tmp.push_back(std::max(1, v.AsInt(1)));
|
||||
if (!tmp.empty()) new_steps = std::move(tmp);
|
||||
}
|
||||
if (const SimpleJson* mins = pri->Find("min_sizes"); mins && mins->IsArray()) {
|
||||
std::vector<std::vector<int>> tmp;
|
||||
for (const auto& grp : mins->AsArray()) {
|
||||
std::vector<int> g;
|
||||
for (const auto& v : grp.AsArray()) g.push_back(std::max(1, v.AsInt(1)));
|
||||
if (!g.empty()) tmp.push_back(std::move(g));
|
||||
}
|
||||
if (!tmp.empty()) new_mins = std::move(tmp);
|
||||
}
|
||||
|
||||
if (!new_steps.empty() && !new_mins.empty() && new_steps.size() == new_mins.size()) {
|
||||
snap->steps = std::move(new_steps);
|
||||
snap->min_sizes = std::move(new_mins);
|
||||
} else {
|
||||
// Best-effort: keep previous priors to avoid per-frame mismatch.
|
||||
if (base) {
|
||||
snap->steps = base->steps;
|
||||
snap->min_sizes = base->min_sizes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out = std::move(snap);
|
||||
return true;
|
||||
}
|
||||
|
||||
float IoU(const Rect& a, const Rect& b) {
|
||||
const float ax1 = a.x;
|
||||
const float ay1 = a.y;
|
||||
@ -320,58 +419,12 @@ public:
|
||||
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
|
||||
id_ = config.ValueOr<std::string>("id", "face_det");
|
||||
model_path_ = config.ValueOr<std::string>("model_path", "");
|
||||
conf_thresh_ = config.ValueOr<float>("conf", 0.6f);
|
||||
nms_thresh_ = config.ValueOr<float>("nms", 0.4f);
|
||||
max_faces_ = std::max(1, config.ValueOr<int>("max_faces", 10));
|
||||
output_landmarks_ = config.ValueOr<bool>("output_landmarks", true);
|
||||
|
||||
const std::string fmt = config.ValueOr<std::string>("input_format", "rgb");
|
||||
input_format_ = fmt;
|
||||
for (auto& c : input_format_) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
|
||||
input_dtype_ = config.ValueOr<std::string>("input_dtype", input_dtype_);
|
||||
for (auto& c : input_dtype_) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
|
||||
// Optional normalization when input_dtype is float.
|
||||
// - scale/bias: x = x*scale + bias
|
||||
// - mean/std: x = (x - mean[c]) / std[c]
|
||||
if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) {
|
||||
if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
norm_mean_[static_cast<size_t>(i)] = static_cast<float>(mean->AsArray()[static_cast<size_t>(i)].AsNumber(norm_mean_[static_cast<size_t>(i)]));
|
||||
}
|
||||
norm_use_mean_std_ = true;
|
||||
}
|
||||
if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
norm_std_[static_cast<size_t>(i)] = static_cast<float>(st->AsArray()[static_cast<size_t>(i)].AsNumber(norm_std_[static_cast<size_t>(i)]));
|
||||
}
|
||||
norm_use_mean_std_ = true;
|
||||
}
|
||||
norm_scale_ = norm->ValueOr<float>("scale", norm_scale_);
|
||||
norm_bias_ = norm->ValueOr<float>("bias", norm_bias_);
|
||||
}
|
||||
|
||||
// RetinaFace priors defaults for 320 input (MobileNet0.25).
|
||||
steps_ = {8, 16, 32};
|
||||
min_sizes_ = {{16, 32}, {64, 128}, {256, 512}};
|
||||
if (const SimpleJson* pri = config.Find("prior"); pri && pri->IsObject()) {
|
||||
if (const SimpleJson* steps = pri->Find("steps"); steps && steps->IsArray()) {
|
||||
steps_.clear();
|
||||
for (const auto& v : steps->AsArray()) {
|
||||
steps_.push_back(std::max(1, v.AsInt(1)));
|
||||
}
|
||||
}
|
||||
if (const SimpleJson* mins = pri->Find("min_sizes"); mins && mins->IsArray()) {
|
||||
min_sizes_.clear();
|
||||
for (const auto& grp : mins->AsArray()) {
|
||||
std::vector<int> g;
|
||||
for (const auto& v : grp.AsArray()) {
|
||||
g.push_back(std::max(1, v.AsInt(1)));
|
||||
}
|
||||
if (!g.empty()) min_sizes_.push_back(std::move(g));
|
||||
}
|
||||
}
|
||||
std::shared_ptr<const FaceDetConfigSnapshot> snap;
|
||||
BuildFaceDetConfigSnapshot(config, nullptr, snap);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
cfg_ = std::move(snap);
|
||||
priors_cache_ = {};
|
||||
}
|
||||
|
||||
input_queue_ = ctx.input_queue;
|
||||
@ -412,8 +465,16 @@ public:
|
||||
}
|
||||
|
||||
bool Start() override {
|
||||
LogInfo("[ai_face_det] start id=" + id_ + " conf=" + std::to_string(conf_thresh_) +
|
||||
" nms=" + std::to_string(nms_thresh_) + " max_faces=" + std::to_string(max_faces_));
|
||||
std::shared_ptr<const FaceDetConfigSnapshot> cfg;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
cfg = cfg_;
|
||||
}
|
||||
const float conf = cfg ? cfg->conf_thresh : 0.0f;
|
||||
const float nms = cfg ? cfg->nms_thresh : 0.0f;
|
||||
const int max_faces = cfg ? cfg->max_faces : 0;
|
||||
LogInfo("[ai_face_det] start id=" + id_ + " conf=" + std::to_string(conf) +
|
||||
" nms=" + std::to_string(nms) + " max_faces=" + std::to_string(max_faces));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -426,52 +487,19 @@ public:
|
||||
// Changing model requires graph rebuild.
|
||||
return false;
|
||||
}
|
||||
|
||||
conf_thresh_ = new_config.ValueOr<float>("conf", conf_thresh_);
|
||||
nms_thresh_ = new_config.ValueOr<float>("nms", nms_thresh_);
|
||||
max_faces_ = std::max(1, new_config.ValueOr<int>("max_faces", max_faces_));
|
||||
output_landmarks_ = new_config.ValueOr<bool>("output_landmarks", output_landmarks_);
|
||||
|
||||
std::string dtype = new_config.ValueOr<std::string>("input_dtype", input_dtype_);
|
||||
for (auto& c : dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
input_dtype_ = std::move(dtype);
|
||||
|
||||
if (const SimpleJson* norm = new_config.Find("normalize"); norm && norm->IsObject()) {
|
||||
bool use_ms = false;
|
||||
if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
norm_mean_[static_cast<size_t>(i)] = static_cast<float>(mean->AsArray()[static_cast<size_t>(i)].AsNumber(norm_mean_[static_cast<size_t>(i)]));
|
||||
}
|
||||
use_ms = true;
|
||||
}
|
||||
if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
norm_std_[static_cast<size_t>(i)] = static_cast<float>(st->AsArray()[static_cast<size_t>(i)].AsNumber(norm_std_[static_cast<size_t>(i)]));
|
||||
}
|
||||
use_ms = true;
|
||||
}
|
||||
norm_use_mean_std_ = use_ms;
|
||||
norm_scale_ = norm->ValueOr<float>("scale", norm_scale_);
|
||||
norm_bias_ = norm->ValueOr<float>("bias", norm_bias_);
|
||||
std::shared_ptr<const FaceDetConfigSnapshot> base;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
base = cfg_;
|
||||
}
|
||||
|
||||
if (const SimpleJson* pri = new_config.Find("prior"); pri && pri->IsObject()) {
|
||||
if (const SimpleJson* steps = pri->Find("steps"); steps && steps->IsArray()) {
|
||||
std::vector<int> new_steps;
|
||||
for (const auto& v : steps->AsArray()) new_steps.push_back(std::max(1, v.AsInt(1)));
|
||||
if (!new_steps.empty()) steps_ = std::move(new_steps);
|
||||
}
|
||||
if (const SimpleJson* mins = pri->Find("min_sizes"); mins && mins->IsArray()) {
|
||||
std::vector<std::vector<int>> new_mins;
|
||||
for (const auto& grp : mins->AsArray()) {
|
||||
std::vector<int> g;
|
||||
for (const auto& v : grp.AsArray()) g.push_back(std::max(1, v.AsInt(1)));
|
||||
if (!g.empty()) new_mins.push_back(std::move(g));
|
||||
}
|
||||
if (!new_mins.empty()) min_sizes_ = std::move(new_mins);
|
||||
}
|
||||
std::shared_ptr<const FaceDetConfigSnapshot> snap;
|
||||
BuildFaceDetConfigSnapshot(new_config, base, snap);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
cfg_ = std::move(snap);
|
||||
priors_cache_ = {};
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -516,8 +544,15 @@ private:
|
||||
: (frame->stride > 0 ? frame->stride : static_cast<int>(src_row));
|
||||
if (!src || src_stride <= 0) return;
|
||||
|
||||
const bool need_swap = (frame->format == PixelFormat::BGR && input_format_ == "rgb") ||
|
||||
(frame->format == PixelFormat::RGB && input_format_ == "bgr");
|
||||
std::shared_ptr<const FaceDetConfigSnapshot> cfg;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
cfg = cfg_;
|
||||
}
|
||||
if (!cfg) return;
|
||||
|
||||
const bool need_swap = (frame->format == PixelFormat::BGR && cfg->input_format == "rgb") ||
|
||||
(frame->format == PixelFormat::RGB && cfg->input_format == "bgr");
|
||||
|
||||
const int in_w = model_w_ > 0 ? model_w_ : src_w;
|
||||
const int in_h = model_h_ > 0 ? model_h_ : src_h;
|
||||
@ -550,18 +585,19 @@ private:
|
||||
input.is_nhwc = true;
|
||||
|
||||
// Default: keep existing UINT8 behavior.
|
||||
if (input_dtype_ == "float" || input_dtype_ == "f32" || input_dtype_ == "float32") {
|
||||
if (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32") {
|
||||
float_input_buf_.resize(static_cast<size_t>(in_w) * static_cast<size_t>(in_h) * 3);
|
||||
const size_t pix = static_cast<size_t>(in_w) * static_cast<size_t>(in_h);
|
||||
const uint8_t* p = reinterpret_cast<const uint8_t*>(input_ptr);
|
||||
for (size_t i = 0; i < pix; ++i) {
|
||||
for (int c = 0; c < 3; ++c) {
|
||||
float x = static_cast<float>(p[i * 3 + static_cast<size_t>(c)]);
|
||||
if (norm_use_mean_std_) {
|
||||
const float st = std::fabs(norm_std_[static_cast<size_t>(c)]) < 1e-6f ? 1.0f : norm_std_[static_cast<size_t>(c)];
|
||||
x = (x - norm_mean_[static_cast<size_t>(c)]) / st;
|
||||
if (cfg->norm_use_mean_std) {
|
||||
const float st = std::fabs(cfg->norm_std[static_cast<size_t>(c)]) < 1e-6f ? 1.0f
|
||||
: cfg->norm_std[static_cast<size_t>(c)];
|
||||
x = (x - cfg->norm_mean[static_cast<size_t>(c)]) / st;
|
||||
} else {
|
||||
x = x * norm_scale_ + norm_bias_;
|
||||
x = x * cfg->norm_scale + cfg->norm_bias;
|
||||
}
|
||||
float_input_buf_[i * 3 + static_cast<size_t>(c)] = x;
|
||||
}
|
||||
@ -600,13 +636,47 @@ private:
|
||||
det.img_h = src_h;
|
||||
det.model_name = "retinaface";
|
||||
|
||||
DecodeRetinaFace(tensors, src_w, src_h, in_w, in_h, det);
|
||||
std::shared_ptr<const std::vector<Prior>> priors = GetRetinaFacePriors(cfg, in_w, in_h);
|
||||
DecodeRetinaFace(tensors, src_w, src_h, in_w, in_h, *cfg, priors.get(), det);
|
||||
frame->face_det = std::make_shared<FaceDetResult>(std::move(det));
|
||||
}
|
||||
|
||||
struct PriorsCache {
|
||||
int in_w = 0;
|
||||
int in_h = 0;
|
||||
const FaceDetConfigSnapshot* cfg_ptr = nullptr;
|
||||
std::shared_ptr<const std::vector<Prior>> priors;
|
||||
};
|
||||
|
||||
std::shared_ptr<const std::vector<Prior>> GetRetinaFacePriors(const std::shared_ptr<const FaceDetConfigSnapshot>& cfg,
|
||||
int in_w, int in_h) {
|
||||
if (!cfg || in_w <= 0 || in_h <= 0) return nullptr;
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
if (priors_cache_.priors && priors_cache_.cfg_ptr == cfg.get() &&
|
||||
priors_cache_.in_w == in_w && priors_cache_.in_h == in_h) {
|
||||
return priors_cache_.priors;
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<Prior> built = GenerateRetinaFacePriors(in_w, in_h, cfg->steps, cfg->min_sizes);
|
||||
auto sp = std::make_shared<std::vector<Prior>>(built);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
priors_cache_.cfg_ptr = cfg.get();
|
||||
priors_cache_.in_w = in_w;
|
||||
priors_cache_.in_h = in_h;
|
||||
priors_cache_.priors = sp;
|
||||
return priors_cache_.priors;
|
||||
}
|
||||
}
|
||||
|
||||
void DecodeRetinaFace(const std::vector<Tensor>& outs,
|
||||
int orig_w, int orig_h,
|
||||
int in_w, int in_h,
|
||||
const FaceDetConfigSnapshot& cfg,
|
||||
const std::vector<Prior>* priors_ptr,
|
||||
FaceDetResult& out) {
|
||||
// Find loc/conf/landms tensors.
|
||||
std::vector<NcTensor> locs;
|
||||
@ -654,12 +724,13 @@ private:
|
||||
NcTensor loc = Concat(locs);
|
||||
NcTensor conf = Concat(confs);
|
||||
NcTensor lmk;
|
||||
if (output_landmarks_ && !landms.empty()) lmk = Concat(landms);
|
||||
if (cfg.output_landmarks && !landms.empty()) lmk = Concat(landms);
|
||||
|
||||
if (loc.n <= 0 || conf.n != loc.n) return;
|
||||
const int n = loc.n;
|
||||
|
||||
const std::vector<Prior> priors = GenerateRetinaFacePriors(in_w, in_h, steps_, min_sizes_);
|
||||
const std::vector<Prior> empty_priors;
|
||||
const std::vector<Prior>& priors = priors_ptr ? *priors_ptr : empty_priors;
|
||||
if (!priors.empty() && static_cast<int>(priors.size()) != n) {
|
||||
// Mismatch: can't reliably decode.
|
||||
std::cerr << "[ai_face_det] prior mismatch: priors=" << priors.size() << " n=" << n << "\n";
|
||||
@ -674,7 +745,7 @@ private:
|
||||
std::vector<std::array<Point2f, 5>> lmks;
|
||||
boxes.reserve(static_cast<size_t>(n));
|
||||
scores.reserve(static_cast<size_t>(n));
|
||||
if (output_landmarks_) lmks.reserve(static_cast<size_t>(n));
|
||||
if (cfg.output_landmarks) lmks.reserve(static_cast<size_t>(n));
|
||||
|
||||
constexpr float var0 = 0.1f;
|
||||
constexpr float var1 = 0.2f;
|
||||
@ -688,7 +759,7 @@ private:
|
||||
} else {
|
||||
score = Softmax2(s0, s1);
|
||||
}
|
||||
if (score < conf_thresh_) continue;
|
||||
if (score < cfg.conf_thresh) continue;
|
||||
|
||||
const Prior p = priors.empty() ? Prior{0, 0, 0, 0} : priors[static_cast<size_t>(i)];
|
||||
|
||||
@ -724,7 +795,7 @@ private:
|
||||
boxes.push_back(bb);
|
||||
scores.push_back(score);
|
||||
|
||||
if (output_landmarks_ && !lmk.data.empty() && lmk.n == n) {
|
||||
if (cfg.output_landmarks && !lmk.data.empty() && lmk.n == n) {
|
||||
std::array<Point2f, 5> pts{};
|
||||
for (int k = 0; k < 5; ++k) {
|
||||
const float lx = lmk.data[static_cast<size_t>(i) * 10 + k * 2 + 0];
|
||||
@ -741,10 +812,10 @@ private:
|
||||
if (boxes.empty()) return;
|
||||
|
||||
std::vector<int> keep;
|
||||
NmsSorted(boxes, scores, nms_thresh_, keep);
|
||||
NmsSorted(boxes, scores, cfg.nms_thresh, keep);
|
||||
if (keep.empty()) return;
|
||||
|
||||
const int out_n = std::min<int>(max_faces_, static_cast<int>(keep.size()));
|
||||
const int out_n = std::min<int>(cfg.max_faces, static_cast<int>(keep.size()));
|
||||
out.faces.reserve(static_cast<size_t>(out_n));
|
||||
for (int i = 0; i < out_n; ++i) {
|
||||
const int k = keep[static_cast<size_t>(i)];
|
||||
@ -752,7 +823,7 @@ private:
|
||||
item.bbox = boxes[static_cast<size_t>(k)];
|
||||
item.score = scores[static_cast<size_t>(k)];
|
||||
item.track_id = -1;
|
||||
if (output_landmarks_ && k < static_cast<int>(lmks.size())) {
|
||||
if (cfg.output_landmarks && k < static_cast<int>(lmks.size())) {
|
||||
item.has_landmarks = true;
|
||||
item.landmarks = lmks[static_cast<size_t>(k)];
|
||||
}
|
||||
@ -765,23 +836,9 @@ private:
|
||||
std::string id_;
|
||||
std::string model_path_;
|
||||
|
||||
float conf_thresh_ = 0.6f;
|
||||
float nms_thresh_ = 0.4f;
|
||||
int max_faces_ = 10;
|
||||
bool output_landmarks_ = true;
|
||||
|
||||
std::string input_format_ = "rgb";
|
||||
|
||||
// Model input dtype: "uint8" (default) or "float32".
|
||||
std::string input_dtype_ = "uint8";
|
||||
float norm_scale_ = 1.0f;
|
||||
float norm_bias_ = 0.0f;
|
||||
bool norm_use_mean_std_ = false;
|
||||
std::array<float, 3> norm_mean_{{0.0f, 0.0f, 0.0f}};
|
||||
std::array<float, 3> norm_std_{{1.0f, 1.0f, 1.0f}};
|
||||
|
||||
std::vector<int> steps_;
|
||||
std::vector<std::vector<int>> min_sizes_;
|
||||
mutable std::mutex mu_;
|
||||
std::shared_ptr<const FaceDetConfigSnapshot> cfg_;
|
||||
PriorsCache priors_cache_;
|
||||
|
||||
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
|
||||
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@ -546,6 +547,91 @@ bool DecodeEmbedding(const AiScheduler::BorrowedOutput& /*o*/, std::vector<float
|
||||
void L2Normalize(std::vector<float>& /*v*/) {}
|
||||
#endif
|
||||
|
||||
struct FaceRecogConfigSnapshot {
|
||||
bool align = true;
|
||||
bool emit_embedding = false;
|
||||
int max_faces = 10;
|
||||
|
||||
float thr_accept = 0.45f;
|
||||
float thr_margin = 0.05f;
|
||||
|
||||
std::string model_input_format = "rgb";
|
||||
std::string input_dtype = "uint8";
|
||||
|
||||
float norm_scale = 1.0f;
|
||||
float norm_bias = 0.0f;
|
||||
bool norm_use_mean_std = false;
|
||||
std::array<float, 3> norm_mean{{0.0f, 0.0f, 0.0f}};
|
||||
std::array<float, 3> norm_std{{1.0f, 1.0f, 1.0f}};
|
||||
|
||||
std::string gallery_backend = "file";
|
||||
std::string gallery_path;
|
||||
bool gallery_load_on_start = true;
|
||||
int gallery_expected_dim = 512;
|
||||
std::string gallery_dtype = "auto";
|
||||
};
|
||||
|
||||
static bool BuildFaceRecogConfigSnapshot(const SimpleJson& config,
|
||||
const std::shared_ptr<const FaceRecogConfigSnapshot>& base,
|
||||
std::shared_ptr<const FaceRecogConfigSnapshot>& out) {
|
||||
auto snap = std::make_shared<FaceRecogConfigSnapshot>();
|
||||
if (base) *snap = *base;
|
||||
|
||||
snap->align = config.ValueOr<bool>("align", snap->align);
|
||||
snap->emit_embedding = config.ValueOr<bool>("emit_embedding", snap->emit_embedding);
|
||||
snap->max_faces = std::max(1, config.ValueOr<int>("max_faces", snap->max_faces));
|
||||
|
||||
if (const SimpleJson* th = config.Find("threshold"); th && th->IsObject()) {
|
||||
snap->thr_accept = th->ValueOr<float>("accept", snap->thr_accept);
|
||||
snap->thr_margin = th->ValueOr<float>("margin", snap->thr_margin);
|
||||
}
|
||||
|
||||
{
|
||||
std::string fmt = config.ValueOr<std::string>("input_format", snap->model_input_format);
|
||||
for (auto& c : fmt) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
snap->model_input_format = std::move(fmt);
|
||||
}
|
||||
{
|
||||
std::string dtype = config.ValueOr<std::string>("input_dtype", snap->input_dtype);
|
||||
for (auto& c : dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
snap->input_dtype = std::move(dtype);
|
||||
}
|
||||
|
||||
if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) {
|
||||
bool use_ms = false;
|
||||
if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
snap->norm_mean[static_cast<size_t>(i)] =
|
||||
static_cast<float>(mean->AsArray()[static_cast<size_t>(i)].AsNumber(snap->norm_mean[static_cast<size_t>(i)]));
|
||||
}
|
||||
use_ms = true;
|
||||
}
|
||||
if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
snap->norm_std[static_cast<size_t>(i)] =
|
||||
static_cast<float>(st->AsArray()[static_cast<size_t>(i)].AsNumber(snap->norm_std[static_cast<size_t>(i)]));
|
||||
}
|
||||
use_ms = true;
|
||||
}
|
||||
snap->norm_use_mean_std = use_ms;
|
||||
snap->norm_scale = norm->ValueOr<float>("scale", snap->norm_scale);
|
||||
snap->norm_bias = norm->ValueOr<float>("bias", snap->norm_bias);
|
||||
}
|
||||
|
||||
if (const SimpleJson* g = config.Find("gallery"); g && g->IsObject()) {
|
||||
snap->gallery_backend = g->ValueOr<std::string>("backend", snap->gallery_backend);
|
||||
snap->gallery_path = g->ValueOr<std::string>("path", snap->gallery_path);
|
||||
snap->gallery_load_on_start = g->ValueOr<bool>("load_on_start", snap->gallery_load_on_start);
|
||||
snap->gallery_expected_dim = std::max(0, g->ValueOr<int>("expected_dim", snap->gallery_expected_dim));
|
||||
snap->gallery_dtype = g->ValueOr<std::string>("dtype", snap->gallery_dtype);
|
||||
}
|
||||
for (auto& c : snap->gallery_backend) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
for (auto& c : snap->gallery_dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
|
||||
out = std::move(snap);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
class AiFaceRecogNode : public INode {
|
||||
@ -556,50 +642,14 @@ public:
|
||||
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
|
||||
id_ = config.ValueOr<std::string>("id", "face_recog");
|
||||
model_path_ = config.ValueOr<std::string>("model_path", "");
|
||||
align_ = config.ValueOr<bool>("align", true);
|
||||
emit_embedding_ = config.ValueOr<bool>("emit_embedding", false);
|
||||
max_faces_ = std::max(1, config.ValueOr<int>("max_faces", 10));
|
||||
|
||||
if (const SimpleJson* th = config.Find("threshold"); th && th->IsObject()) {
|
||||
thr_accept_ = th->ValueOr<float>("accept", thr_accept_);
|
||||
thr_margin_ = th->ValueOr<float>("margin", thr_margin_);
|
||||
std::shared_ptr<const FaceRecogConfigSnapshot> snap;
|
||||
BuildFaceRecogConfigSnapshot(config, nullptr, snap);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
cfg_ = std::move(snap);
|
||||
gallery_.reset();
|
||||
}
|
||||
|
||||
const std::string fmt = config.ValueOr<std::string>("input_format", "rgb");
|
||||
model_input_format_ = fmt;
|
||||
for (auto& c : model_input_format_) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
|
||||
input_dtype_ = config.ValueOr<std::string>("input_dtype", input_dtype_);
|
||||
for (auto& c : input_dtype_) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
|
||||
if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) {
|
||||
if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
norm_mean_[static_cast<size_t>(i)] = static_cast<float>(mean->AsArray()[static_cast<size_t>(i)].AsNumber(norm_mean_[static_cast<size_t>(i)]));
|
||||
}
|
||||
norm_use_mean_std_ = true;
|
||||
}
|
||||
if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
norm_std_[static_cast<size_t>(i)] = static_cast<float>(st->AsArray()[static_cast<size_t>(i)].AsNumber(norm_std_[static_cast<size_t>(i)]));
|
||||
}
|
||||
norm_use_mean_std_ = true;
|
||||
}
|
||||
norm_scale_ = norm->ValueOr<float>("scale", norm_scale_);
|
||||
norm_bias_ = norm->ValueOr<float>("bias", norm_bias_);
|
||||
}
|
||||
|
||||
// Gallery
|
||||
if (const SimpleJson* g = config.Find("gallery"); g && g->IsObject()) {
|
||||
gallery_backend_ = g->ValueOr<std::string>("backend", gallery_backend_);
|
||||
gallery_path_ = g->ValueOr<std::string>("path", gallery_path_);
|
||||
gallery_load_on_start_ = g->ValueOr<bool>("load_on_start", gallery_load_on_start_);
|
||||
gallery_expected_dim_ = std::max(0, g->ValueOr<int>("expected_dim", gallery_expected_dim_));
|
||||
gallery_dtype_ = g->ValueOr<std::string>("dtype", gallery_dtype_);
|
||||
}
|
||||
for (auto& c : gallery_backend_) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
for (auto& c : gallery_dtype_) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
|
||||
input_queue_ = ctx.input_queue;
|
||||
output_queues_ = ctx.output_queues;
|
||||
if (!input_queue_) {
|
||||
@ -636,11 +686,19 @@ public:
|
||||
}
|
||||
|
||||
bool Start() override {
|
||||
if (gallery_load_on_start_) {
|
||||
ReloadGallery();
|
||||
std::shared_ptr<const FaceRecogConfigSnapshot> cfg;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
cfg = cfg_;
|
||||
}
|
||||
LogInfo("[ai_face_recog] start id=" + id_ + " align=" + std::string(align_ ? "true" : "false") +
|
||||
" thr_accept=" + std::to_string(thr_accept_) + " thr_margin=" + std::to_string(thr_margin_));
|
||||
if (cfg && cfg->gallery_load_on_start) {
|
||||
ReloadGallery(*cfg);
|
||||
}
|
||||
const bool align = cfg ? cfg->align : false;
|
||||
const float thr_accept = cfg ? cfg->thr_accept : 0.0f;
|
||||
const float thr_margin = cfg ? cfg->thr_margin : 0.0f;
|
||||
LogInfo("[ai_face_recog] start id=" + id_ + " align=" + std::string(align ? "true" : "false") +
|
||||
" thr_accept=" + std::to_string(thr_accept) + " thr_margin=" + std::to_string(thr_margin));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -654,62 +712,31 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
align_ = new_config.ValueOr<bool>("align", align_);
|
||||
emit_embedding_ = new_config.ValueOr<bool>("emit_embedding", emit_embedding_);
|
||||
max_faces_ = std::max(1, new_config.ValueOr<int>("max_faces", max_faces_));
|
||||
|
||||
if (const SimpleJson* th = new_config.Find("threshold"); th && th->IsObject()) {
|
||||
thr_accept_ = th->ValueOr<float>("accept", thr_accept_);
|
||||
thr_margin_ = th->ValueOr<float>("margin", thr_margin_);
|
||||
std::shared_ptr<const FaceRecogConfigSnapshot> base;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
base = cfg_;
|
||||
}
|
||||
|
||||
std::string dtype = new_config.ValueOr<std::string>("input_dtype", input_dtype_);
|
||||
for (auto& c : dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
input_dtype_ = std::move(dtype);
|
||||
std::shared_ptr<const FaceRecogConfigSnapshot> snap;
|
||||
BuildFaceRecogConfigSnapshot(new_config, base, snap);
|
||||
|
||||
if (const SimpleJson* norm = new_config.Find("normalize"); norm && norm->IsObject()) {
|
||||
bool use_ms = false;
|
||||
if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
norm_mean_[static_cast<size_t>(i)] = static_cast<float>(mean->AsArray()[static_cast<size_t>(i)].AsNumber(norm_mean_[static_cast<size_t>(i)]));
|
||||
}
|
||||
use_ms = true;
|
||||
}
|
||||
if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
norm_std_[static_cast<size_t>(i)] = static_cast<float>(st->AsArray()[static_cast<size_t>(i)].AsNumber(norm_std_[static_cast<size_t>(i)]));
|
||||
}
|
||||
use_ms = true;
|
||||
}
|
||||
norm_use_mean_std_ = use_ms;
|
||||
norm_scale_ = norm->ValueOr<float>("scale", norm_scale_);
|
||||
norm_bias_ = norm->ValueOr<float>("bias", norm_bias_);
|
||||
}
|
||||
|
||||
// Gallery updates
|
||||
bool reload = false;
|
||||
if (const SimpleJson* g = new_config.Find("gallery"); g && g->IsObject()) {
|
||||
std::string backend = g->ValueOr<std::string>("backend", gallery_backend_);
|
||||
std::string path = g->ValueOr<std::string>("path", gallery_path_);
|
||||
bool los = g->ValueOr<bool>("load_on_start", gallery_load_on_start_);
|
||||
int expected_dim = std::max(0, g->ValueOr<int>("expected_dim", gallery_expected_dim_));
|
||||
std::string dtype = g->ValueOr<std::string>("dtype", gallery_dtype_);
|
||||
|
||||
for (auto& c : backend) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
for (auto& c : dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
||||
if (backend != gallery_backend_ || path != gallery_path_ || expected_dim != gallery_expected_dim_ || dtype != gallery_dtype_) {
|
||||
reload = true;
|
||||
}
|
||||
gallery_backend_ = std::move(backend);
|
||||
gallery_path_ = std::move(path);
|
||||
gallery_load_on_start_ = los;
|
||||
gallery_expected_dim_ = expected_dim;
|
||||
gallery_dtype_ = std::move(dtype);
|
||||
}
|
||||
if (reload) {
|
||||
ReloadGallery();
|
||||
if (base && snap) {
|
||||
reload = (snap->gallery_backend != base->gallery_backend ||
|
||||
snap->gallery_path != base->gallery_path ||
|
||||
snap->gallery_expected_dim != base->gallery_expected_dim ||
|
||||
snap->gallery_dtype != base->gallery_dtype);
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
cfg_ = snap;
|
||||
}
|
||||
|
||||
if (reload && snap) {
|
||||
ReloadGallery(*snap);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -738,21 +765,21 @@ private:
|
||||
for (auto& q : output_queues_) q->Push(frame);
|
||||
}
|
||||
|
||||
void ReloadGallery() {
|
||||
if (gallery_path_.empty()) return;
|
||||
void ReloadGallery(const FaceRecogConfigSnapshot& cfg) {
|
||||
if (cfg.gallery_path.empty()) return;
|
||||
|
||||
std::string err;
|
||||
FaceGallery g;
|
||||
g.SetExpectedDim(gallery_expected_dim_);
|
||||
g.SetPreferredDtype(gallery_dtype_);
|
||||
g.SetExpectedDim(cfg.gallery_expected_dim);
|
||||
g.SetPreferredDtype(cfg.gallery_dtype);
|
||||
bool ok = false;
|
||||
|
||||
if (gallery_backend_ == "sqlite") {
|
||||
ok = g.LoadSqliteBackend(gallery_path_, err);
|
||||
} else if (gallery_backend_ == "file") {
|
||||
ok = g.LoadFileBackend(gallery_path_, err);
|
||||
if (cfg.gallery_backend == "sqlite") {
|
||||
ok = g.LoadSqliteBackend(cfg.gallery_path, err);
|
||||
} else if (cfg.gallery_backend == "file") {
|
||||
ok = g.LoadFileBackend(cfg.gallery_path, err);
|
||||
} else {
|
||||
err = "unknown gallery backend: " + gallery_backend_;
|
||||
err = "unknown gallery backend: " + cfg.gallery_backend;
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
@ -760,9 +787,13 @@ private:
|
||||
return;
|
||||
}
|
||||
|
||||
gallery_ = std::move(g);
|
||||
LogInfo("[ai_face_recog] gallery loaded: n=" + std::to_string(gallery_.Size()) +
|
||||
" dim=" + std::to_string(gallery_.Dim()));
|
||||
auto sp = std::make_shared<FaceGallery>(std::move(g));
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
gallery_ = sp;
|
||||
}
|
||||
LogInfo("[ai_face_recog] gallery loaded: n=" + std::to_string(sp->Size()) +
|
||||
" dim=" + std::to_string(sp->Dim()));
|
||||
}
|
||||
|
||||
#if defined(RK3588_ENABLE_RKNN)
|
||||
@ -781,22 +812,31 @@ private:
|
||||
: (frame->stride > 0 ? frame->stride : w * 3);
|
||||
if (!src || stride <= 0) return;
|
||||
|
||||
const bool need_swap = (frame->format == PixelFormat::BGR && model_input_format_ == "rgb") ||
|
||||
(frame->format == PixelFormat::RGB && model_input_format_ == "bgr");
|
||||
std::shared_ptr<const FaceRecogConfigSnapshot> cfg;
|
||||
std::shared_ptr<const FaceGallery> gallery;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
cfg = cfg_;
|
||||
gallery = gallery_;
|
||||
}
|
||||
if (!cfg) return;
|
||||
|
||||
const bool need_swap = (frame->format == PixelFormat::BGR && cfg->model_input_format == "rgb") ||
|
||||
(frame->format == PixelFormat::RGB && cfg->model_input_format == "bgr");
|
||||
|
||||
FaceRecogResult rr;
|
||||
rr.img_w = w;
|
||||
rr.img_h = h;
|
||||
rr.model_name = "arcface";
|
||||
|
||||
const int limit = std::min<int>(max_faces_, static_cast<int>(frame->face_det->faces.size()));
|
||||
const int limit = std::min<int>(cfg->max_faces, static_cast<int>(frame->face_det->faces.size()));
|
||||
rr.items.reserve(static_cast<size_t>(limit));
|
||||
|
||||
for (int i = 0; i < limit; ++i) {
|
||||
const FaceDetItem& face = frame->face_det->faces[static_cast<size_t>(i)];
|
||||
|
||||
face_buf_.resize(static_cast<size_t>(model_w_) * static_cast<size_t>(model_h_) * 3);
|
||||
if (align_ && face.has_landmarks && model_w_ == 112 && model_h_ == 112) {
|
||||
if (cfg->align && face.has_landmarks && model_w_ == 112 && model_h_ == 112) {
|
||||
const std::array<Point2f, 5> dst = {
|
||||
Point2f{38.2946f, 51.6963f},
|
||||
Point2f{73.5318f, 51.5014f},
|
||||
@ -820,18 +860,19 @@ private:
|
||||
in.height = model_h_;
|
||||
in.is_nhwc = true;
|
||||
|
||||
if (input_dtype_ == "float" || input_dtype_ == "f32" || input_dtype_ == "float32") {
|
||||
if (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32") {
|
||||
float_input_buf_.resize(static_cast<size_t>(model_w_) * static_cast<size_t>(model_h_) * 3);
|
||||
const size_t pix = static_cast<size_t>(model_w_) * static_cast<size_t>(model_h_);
|
||||
const uint8_t* p = face_buf_.data();
|
||||
for (size_t ii = 0; ii < pix; ++ii) {
|
||||
for (int c = 0; c < 3; ++c) {
|
||||
float x = static_cast<float>(p[ii * 3 + static_cast<size_t>(c)]);
|
||||
if (norm_use_mean_std_) {
|
||||
const float st = std::fabs(norm_std_[static_cast<size_t>(c)]) < 1e-6f ? 1.0f : norm_std_[static_cast<size_t>(c)];
|
||||
x = (x - norm_mean_[static_cast<size_t>(c)]) / st;
|
||||
if (cfg->norm_use_mean_std) {
|
||||
const float st = std::fabs(cfg->norm_std[static_cast<size_t>(c)]) < 1e-6f ? 1.0f
|
||||
: cfg->norm_std[static_cast<size_t>(c)];
|
||||
x = (x - cfg->norm_mean[static_cast<size_t>(c)]) / st;
|
||||
} else {
|
||||
x = x * norm_scale_ + norm_bias_;
|
||||
x = x * cfg->norm_scale + cfg->norm_bias;
|
||||
}
|
||||
float_input_buf_[ii * 3 + static_cast<size_t>(c)] = x;
|
||||
}
|
||||
@ -859,13 +900,13 @@ private:
|
||||
L2Normalize(emb);
|
||||
|
||||
FaceGallery::SearchResult sr;
|
||||
if (gallery_.Size() > 0) {
|
||||
sr = gallery_.SearchTop2(emb);
|
||||
if (gallery && gallery->Size() > 0) {
|
||||
sr = gallery->SearchTop2(emb);
|
||||
}
|
||||
|
||||
const bool accept = (sr.best_person_id >= 0) &&
|
||||
(sr.best_sim >= thr_accept_) &&
|
||||
((thr_margin_ <= 0.0f) || ((sr.best_sim - sr.second_sim) >= thr_margin_));
|
||||
(sr.best_sim >= cfg->thr_accept) &&
|
||||
((cfg->thr_margin <= 0.0f) || ((sr.best_sim - sr.second_sim) >= cfg->thr_margin));
|
||||
|
||||
FaceRecogItem item;
|
||||
item.bbox = face.bbox;
|
||||
@ -878,7 +919,7 @@ private:
|
||||
item.second_sim = sr.second_sim;
|
||||
item.unknown = !accept;
|
||||
|
||||
if (emit_embedding_) item.embedding = emb;
|
||||
if (cfg->emit_embedding) item.embedding = emb;
|
||||
rr.items.push_back(std::move(item));
|
||||
}
|
||||
|
||||
@ -889,28 +930,9 @@ private:
|
||||
std::string id_;
|
||||
std::string model_path_;
|
||||
|
||||
bool align_ = true;
|
||||
bool emit_embedding_ = false;
|
||||
int max_faces_ = 10;
|
||||
|
||||
float thr_accept_ = 0.45f;
|
||||
float thr_margin_ = 0.05f;
|
||||
|
||||
std::string model_input_format_ = "rgb";
|
||||
|
||||
std::string input_dtype_ = "uint8";
|
||||
float norm_scale_ = 1.0f;
|
||||
float norm_bias_ = 0.0f;
|
||||
bool norm_use_mean_std_ = false;
|
||||
std::array<float, 3> norm_mean_{{0.0f, 0.0f, 0.0f}};
|
||||
std::array<float, 3> norm_std_{{1.0f, 1.0f, 1.0f}};
|
||||
|
||||
std::string gallery_backend_ = "file";
|
||||
std::string gallery_path_; // base path without extension: <path>.json / <path>.bin
|
||||
bool gallery_load_on_start_ = true;
|
||||
int gallery_expected_dim_ = 512;
|
||||
std::string gallery_dtype_ = "auto";
|
||||
FaceGallery gallery_;
|
||||
mutable std::mutex mu_;
|
||||
std::shared_ptr<const FaceRecogConfigSnapshot> cfg_;
|
||||
std::shared_ptr<const FaceGallery> gallery_;
|
||||
|
||||
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
|
||||
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
||||
|
||||
@ -334,6 +334,15 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
void WarnMetaResizeOnce(const FramePtr& frame, int out_w, int out_h) {
|
||||
if (warned_meta_resize_) return;
|
||||
if (!frame) return;
|
||||
if (frame->width == out_w && frame->height == out_h) return;
|
||||
if (!frame->det && !frame->face_det && !frame->face_recog) return;
|
||||
warned_meta_resize_ = true;
|
||||
LogWarn("[preprocess] resized frame but forwarded det/face meta without coordinate scaling; ensure det/recog/osd use same resolution (id=" + id_ + ")");
|
||||
}
|
||||
|
||||
void ProcessPassthrough(FramePtr frame) {
|
||||
PushToDownstream(frame);
|
||||
++processed_;
|
||||
@ -364,6 +373,10 @@ private:
|
||||
bool need_cvt = (src_fmt_rga != dst_fmt_rga);
|
||||
bool need_resize = (frame->width != out_w || frame->height != out_h);
|
||||
|
||||
if (need_resize) {
|
||||
WarnMetaResizeOnce(frame, out_w, out_h);
|
||||
}
|
||||
|
||||
// If no processing needed, passthrough directly
|
||||
if (!need_cvt && !need_resize) {
|
||||
PushToDownstream(frame);
|
||||
@ -607,6 +620,10 @@ private:
|
||||
out_h = (out_h + 1) & ~1;
|
||||
}
|
||||
|
||||
if (frame->width != out_w || frame->height != out_h) {
|
||||
WarnMetaResizeOnce(frame, out_w, out_h);
|
||||
}
|
||||
|
||||
AVPixelFormat src_av_fmt = ToAvFormat(frame->format);
|
||||
AVPixelFormat dst_av_fmt = ToAvFormat(out_fmt);
|
||||
|
||||
@ -790,6 +807,8 @@ private:
|
||||
bool stats_log_ = false;
|
||||
uint64_t stats_interval_ = 100;
|
||||
|
||||
bool warned_meta_resize_ = false;
|
||||
|
||||
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
|
||||
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
||||
uint64_t processed_ = 0;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user