911 lines
33 KiB
C++
911 lines
33 KiB
C++
#include <algorithm>
|
|
#include <array>
|
|
#include <cctype>
|
|
#include <cmath>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <iostream>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <numeric>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
// For test image loading (implementation in ai_face_recog_node.cpp)
|
|
#include "../../third_party/rknpu2/examples/3rdparty/stb/stb_image.h"
|
|
|
|
#include "ai_scheduler.h"
|
|
#include "face/face_result.h"
|
|
#include "node.h"
|
|
#include "utils/logger.h"
|
|
|
|
namespace rk3588 {
|
|
|
|
namespace {
|
|
|
|
inline int ClampInt(int v, int lo, int hi) {
|
|
return v < lo ? lo : (v > hi ? hi : v);
|
|
}
|
|
|
|
struct Prior {
|
|
float cx = 0.0f;
|
|
float cy = 0.0f;
|
|
float w = 0.0f;
|
|
float h = 0.0f;
|
|
};
|
|
|
|
struct FaceDetConfigSnapshot {
|
|
float conf_thresh = 0.6f;
|
|
float nms_thresh = 0.4f;
|
|
int max_faces = 10;
|
|
bool output_landmarks = true;
|
|
|
|
std::string input_format = "rgb";
|
|
std::string input_dtype = "uint8";
|
|
|
|
float norm_scale = 1.0f;
|
|
float norm_bias = 0.0f;
|
|
bool norm_use_mean_std = false;
|
|
std::array<float, 3> norm_mean{{0.0f, 0.0f, 0.0f}};
|
|
std::array<float, 3> norm_std{{1.0f, 1.0f, 1.0f}};
|
|
|
|
// RetinaFace priors defaults for 320 input (MobileNet0.25).
|
|
std::vector<int> steps{8, 16, 32};
|
|
std::vector<std::vector<int>> min_sizes{{16, 32}, {64, 128}, {256, 512}};
|
|
};
|
|
|
|
static bool BuildFaceDetConfigSnapshot(const SimpleJson& config,
|
|
const std::shared_ptr<const FaceDetConfigSnapshot>& base,
|
|
std::shared_ptr<const FaceDetConfigSnapshot>& out) {
|
|
auto snap = std::make_shared<FaceDetConfigSnapshot>();
|
|
if (base) *snap = *base;
|
|
|
|
snap->conf_thresh = config.ValueOr<float>("conf", snap->conf_thresh);
|
|
snap->nms_thresh = config.ValueOr<float>("nms", snap->nms_thresh);
|
|
snap->max_faces = std::max(1, config.ValueOr<int>("max_faces", snap->max_faces));
|
|
snap->output_landmarks = config.ValueOr<bool>("output_landmarks", snap->output_landmarks);
|
|
|
|
{
|
|
std::string fmt = config.ValueOr<std::string>("input_format", snap->input_format);
|
|
for (auto& c : fmt) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
|
snap->input_format = std::move(fmt);
|
|
}
|
|
{
|
|
std::string dtype = config.ValueOr<std::string>("input_dtype", snap->input_dtype);
|
|
for (auto& c : dtype) c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
|
|
snap->input_dtype = std::move(dtype);
|
|
}
|
|
|
|
if (const SimpleJson* norm = config.Find("normalize"); norm && norm->IsObject()) {
|
|
bool use_ms = false;
|
|
if (const SimpleJson* mean = norm->Find("mean"); mean && mean->IsArray() && mean->AsArray().size() >= 3) {
|
|
for (int i = 0; i < 3; ++i) {
|
|
snap->norm_mean[static_cast<size_t>(i)] =
|
|
static_cast<float>(mean->AsArray()[static_cast<size_t>(i)].AsNumber(snap->norm_mean[static_cast<size_t>(i)]));
|
|
}
|
|
use_ms = true;
|
|
}
|
|
if (const SimpleJson* st = norm->Find("std"); st && st->IsArray() && st->AsArray().size() >= 3) {
|
|
for (int i = 0; i < 3; ++i) {
|
|
snap->norm_std[static_cast<size_t>(i)] =
|
|
static_cast<float>(st->AsArray()[static_cast<size_t>(i)].AsNumber(snap->norm_std[static_cast<size_t>(i)]));
|
|
}
|
|
use_ms = true;
|
|
}
|
|
snap->norm_use_mean_std = use_ms;
|
|
snap->norm_scale = norm->ValueOr<float>("scale", snap->norm_scale);
|
|
snap->norm_bias = norm->ValueOr<float>("bias", snap->norm_bias);
|
|
}
|
|
|
|
if (const SimpleJson* pri = config.Find("prior"); pri && pri->IsObject()) {
|
|
std::vector<int> new_steps = snap->steps;
|
|
std::vector<std::vector<int>> new_mins = snap->min_sizes;
|
|
|
|
if (const SimpleJson* steps = pri->Find("steps"); steps && steps->IsArray()) {
|
|
std::vector<int> tmp;
|
|
for (const auto& v : steps->AsArray()) tmp.push_back(std::max(1, v.AsInt(1)));
|
|
if (!tmp.empty()) new_steps = std::move(tmp);
|
|
}
|
|
if (const SimpleJson* mins = pri->Find("min_sizes"); mins && mins->IsArray()) {
|
|
std::vector<std::vector<int>> tmp;
|
|
for (const auto& grp : mins->AsArray()) {
|
|
std::vector<int> g;
|
|
for (const auto& v : grp.AsArray()) g.push_back(std::max(1, v.AsInt(1)));
|
|
if (!g.empty()) tmp.push_back(std::move(g));
|
|
}
|
|
if (!tmp.empty()) new_mins = std::move(tmp);
|
|
}
|
|
|
|
if (!new_steps.empty() && !new_mins.empty() && new_steps.size() == new_mins.size()) {
|
|
snap->steps = std::move(new_steps);
|
|
snap->min_sizes = std::move(new_mins);
|
|
} else {
|
|
// Best-effort: keep previous priors to avoid per-frame mismatch.
|
|
if (base) {
|
|
snap->steps = base->steps;
|
|
snap->min_sizes = base->min_sizes;
|
|
}
|
|
}
|
|
}
|
|
|
|
out = std::move(snap);
|
|
return true;
|
|
}
|
|
|
|
float IoU(const Rect& a, const Rect& b) {
|
|
const float ax1 = a.x;
|
|
const float ay1 = a.y;
|
|
const float ax2 = a.x + a.w;
|
|
const float ay2 = a.y + a.h;
|
|
const float bx1 = b.x;
|
|
const float by1 = b.y;
|
|
const float bx2 = b.x + b.w;
|
|
const float by2 = b.y + b.h;
|
|
|
|
const float ix1 = std::max(ax1, bx1);
|
|
const float iy1 = std::max(ay1, by1);
|
|
const float ix2 = std::min(ax2, bx2);
|
|
const float iy2 = std::min(ay2, by2);
|
|
|
|
const float iw = std::max(0.0f, ix2 - ix1);
|
|
const float ih = std::max(0.0f, iy2 - iy1);
|
|
const float inter = iw * ih;
|
|
const float ua = a.w * a.h + b.w * b.h - inter;
|
|
return ua <= 0.0f ? 0.0f : (inter / ua);
|
|
}
|
|
|
|
void NmsSorted(const std::vector<Rect>& boxes, const std::vector<float>& scores,
|
|
float nms_thresh, std::vector<int>& keep) {
|
|
keep.clear();
|
|
std::vector<int> order(scores.size());
|
|
std::iota(order.begin(), order.end(), 0);
|
|
std::sort(order.begin(), order.end(), [&](int a, int b) { return scores[a] > scores[b]; });
|
|
|
|
for (int idx : order) {
|
|
bool suppressed = false;
|
|
for (int kept : keep) {
|
|
if (IoU(boxes[idx], boxes[kept]) > nms_thresh) {
|
|
suppressed = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!suppressed) keep.push_back(idx);
|
|
}
|
|
}
|
|
|
|
inline float Sigmoid(float x) {
|
|
return 1.0f / (1.0f + std::exp(-x));
|
|
}
|
|
|
|
inline float Softmax2(float a, float b) {
|
|
const float m = std::max(a, b);
|
|
const float ea = std::exp(a - m);
|
|
const float eb = std::exp(b - m);
|
|
return eb / (ea + eb);
|
|
}
|
|
|
|
inline float HalfToFloat(uint16_t h) {
|
|
const uint32_t sign = (static_cast<uint32_t>(h & 0x8000u)) << 16;
|
|
uint32_t exp = (h & 0x7C00u) >> 10;
|
|
uint32_t mant = (h & 0x03FFu);
|
|
|
|
uint32_t f = 0;
|
|
if (exp == 0) {
|
|
if (mant == 0) {
|
|
f = sign;
|
|
} else {
|
|
// Subnormal
|
|
exp = 1;
|
|
while ((mant & 0x0400u) == 0) {
|
|
mant <<= 1;
|
|
--exp;
|
|
}
|
|
mant &= 0x03FFu;
|
|
exp = exp + (127 - 15);
|
|
f = sign | (exp << 23) | (mant << 13);
|
|
}
|
|
} else if (exp == 31) {
|
|
// Inf/NaN
|
|
f = sign | 0x7F800000u | (mant << 13);
|
|
} else {
|
|
exp = exp + (127 - 15);
|
|
f = sign | (exp << 23) | (mant << 13);
|
|
}
|
|
|
|
float out;
|
|
memcpy(&out, &f, sizeof(out));
|
|
return out;
|
|
}
|
|
|
|
template <typename T>
|
|
inline float Dequant(T q, int32_t zp, float scale) {
|
|
return (static_cast<float>(q) - static_cast<float>(zp)) * scale;
|
|
}
|
|
|
|
struct Tensor {
|
|
const uint8_t* data = nullptr;
|
|
size_t size = 0;
|
|
int32_t zp = 0;
|
|
float scale = 1.0f;
|
|
std::vector<uint32_t> dims;
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
rknn_tensor_type type = RKNN_TENSOR_UINT8;
|
|
#endif
|
|
};
|
|
|
|
struct NcTensor {
|
|
int n = 0;
|
|
int c = 0;
|
|
std::vector<float> data; // N*C row-major
|
|
};
|
|
|
|
bool ExtractNc(const Tensor& t, int c, NcTensor& out) {
|
|
out = {};
|
|
out.c = c;
|
|
if (!t.data || t.size == 0) return false;
|
|
|
|
size_t elem_size = 1;
|
|
bool is_float32 = false;
|
|
bool is_float16 = false;
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (t.type == RKNN_TENSOR_FLOAT16) {
|
|
elem_size = 2;
|
|
is_float16 = true;
|
|
}
|
|
if (t.type == RKNN_TENSOR_FLOAT32) {
|
|
elem_size = 4;
|
|
is_float32 = true;
|
|
}
|
|
#endif
|
|
const size_t elem_cnt = elem_size > 0 ? (t.size / elem_size) : 0;
|
|
if (elem_cnt == 0) return false;
|
|
|
|
int n = 0;
|
|
bool transposed = false;
|
|
if (t.dims.size() == 3) {
|
|
// Common: [1, C, N] or [1, N, C]
|
|
const uint32_t d1 = t.dims[1];
|
|
const uint32_t d2 = t.dims[2];
|
|
if (static_cast<int>(d2) == c) {
|
|
n = static_cast<int>(d1);
|
|
transposed = false; // NxC
|
|
} else if (static_cast<int>(d1) == c) {
|
|
n = static_cast<int>(d2);
|
|
transposed = true; // CxN
|
|
} else {
|
|
return false; // 明确拒绝,不 fallback
|
|
}
|
|
} else if (t.dims.size() == 2) {
|
|
// [N, C] or [C, N]
|
|
const uint32_t d0 = t.dims[0];
|
|
const uint32_t d1 = t.dims[1];
|
|
if (static_cast<int>(d1) == c) {
|
|
n = static_cast<int>(d0);
|
|
transposed = false;
|
|
} else if (static_cast<int>(d0) == c) {
|
|
n = static_cast<int>(d1);
|
|
transposed = true;
|
|
}
|
|
}
|
|
|
|
if (n <= 0) {
|
|
if (elem_cnt % static_cast<size_t>(c) != 0) return false;
|
|
n = static_cast<int>(elem_cnt / static_cast<size_t>(c));
|
|
transposed = false;
|
|
}
|
|
|
|
if (static_cast<size_t>(n) * static_cast<size_t>(c) != elem_cnt) {
|
|
return false;
|
|
}
|
|
|
|
out.n = n;
|
|
out.data.resize(static_cast<size_t>(n) * static_cast<size_t>(c));
|
|
|
|
auto ReadElem = [&](size_t idx) -> float {
|
|
if (is_float32) {
|
|
const float* fp = reinterpret_cast<const float*>(t.data);
|
|
return fp[idx];
|
|
}
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (is_float16) {
|
|
const uint16_t* hp = reinterpret_cast<const uint16_t*>(t.data);
|
|
return HalfToFloat(hp[idx]);
|
|
}
|
|
if (t.type == RKNN_TENSOR_INT8) {
|
|
const int8_t* p = reinterpret_cast<const int8_t*>(t.data);
|
|
return Dequant(p[idx], t.zp, t.scale);
|
|
}
|
|
#endif
|
|
const uint8_t* p = reinterpret_cast<const uint8_t*>(t.data);
|
|
return Dequant(p[idx], t.zp, t.scale);
|
|
};
|
|
|
|
if (!transposed) {
|
|
for (size_t i = 0; i < out.data.size(); ++i) {
|
|
out.data[i] = ReadElem(i);
|
|
}
|
|
} else {
|
|
// Input is [C, N] contiguous. Transpose to [N, C].
|
|
for (int ci = 0; ci < c; ++ci) {
|
|
for (int ni = 0; ni < n; ++ni) {
|
|
const size_t src_idx = static_cast<size_t>(ci) * static_cast<size_t>(n) + static_cast<size_t>(ni);
|
|
const size_t dst_idx = static_cast<size_t>(ni) * static_cast<size_t>(c) + static_cast<size_t>(ci);
|
|
out.data[dst_idx] = ReadElem(src_idx);
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
std::vector<Prior> GenerateRetinaFacePriors(int in_w, int in_h,
|
|
const std::vector<int>& steps,
|
|
const std::vector<std::vector<int>>& min_sizes) {
|
|
std::vector<Prior> priors;
|
|
if (steps.empty() || steps.size() != min_sizes.size()) return priors;
|
|
priors.reserve(5000);
|
|
|
|
for (size_t s = 0; s < steps.size(); ++s) {
|
|
const int step = steps[s];
|
|
const int fm_w = in_w / step;
|
|
const int fm_h = in_h / step;
|
|
for (int i = 0; i < fm_h; ++i) {
|
|
for (int j = 0; j < fm_w; ++j) {
|
|
for (int ms : min_sizes[s]) {
|
|
const float s_kx = static_cast<float>(ms) / static_cast<float>(in_w);
|
|
const float s_ky = static_cast<float>(ms) / static_cast<float>(in_h);
|
|
const float cx = (static_cast<float>(j) + 0.5f) * static_cast<float>(step) / static_cast<float>(in_w);
|
|
const float cy = (static_cast<float>(i) + 0.5f) * static_cast<float>(step) / static_cast<float>(in_h);
|
|
priors.push_back(Prior{cx, cy, s_kx, s_ky});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return priors;
|
|
}
|
|
|
|
void ResizeRgbBilinear(const uint8_t* src, int src_w, int src_h, int src_stride,
|
|
uint8_t* dst, int dst_w, int dst_h, bool swap_rb) {
|
|
const float scale_x = static_cast<float>(src_w) / static_cast<float>(dst_w);
|
|
const float scale_y = static_cast<float>(src_h) / static_cast<float>(dst_h);
|
|
|
|
for (int y = 0; y < dst_h; ++y) {
|
|
const float fy = (static_cast<float>(y) + 0.5f) * scale_y - 0.5f;
|
|
int y0 = static_cast<int>(std::floor(fy));
|
|
int y1 = y0 + 1;
|
|
const float wy1 = fy - static_cast<float>(y0);
|
|
const float wy0 = 1.0f - wy1;
|
|
y0 = ClampInt(y0, 0, src_h - 1);
|
|
y1 = ClampInt(y1, 0, src_h - 1);
|
|
|
|
const uint8_t* row0 = src + static_cast<size_t>(y0) * static_cast<size_t>(src_stride);
|
|
const uint8_t* row1 = src + static_cast<size_t>(y1) * static_cast<size_t>(src_stride);
|
|
uint8_t* out = dst + static_cast<size_t>(y) * static_cast<size_t>(dst_w) * 3;
|
|
|
|
for (int x = 0; x < dst_w; ++x) {
|
|
const float fx = (static_cast<float>(x) + 0.5f) * scale_x - 0.5f;
|
|
int x0 = static_cast<int>(std::floor(fx));
|
|
int x1 = x0 + 1;
|
|
const float wx1 = fx - static_cast<float>(x0);
|
|
const float wx0 = 1.0f - wx1;
|
|
x0 = ClampInt(x0, 0, src_w - 1);
|
|
x1 = ClampInt(x1, 0, src_w - 1);
|
|
|
|
const uint8_t* p00 = row0 + x0 * 3;
|
|
const uint8_t* p01 = row0 + x1 * 3;
|
|
const uint8_t* p10 = row1 + x0 * 3;
|
|
const uint8_t* p11 = row1 + x1 * 3;
|
|
|
|
for (int c = 0; c < 3; ++c) {
|
|
const float v =
|
|
(static_cast<float>(p00[c]) * wx0 + static_cast<float>(p01[c]) * wx1) * wy0 +
|
|
(static_cast<float>(p10[c]) * wx0 + static_cast<float>(p11[c]) * wx1) * wy1;
|
|
out[c] = static_cast<uint8_t>(ClampInt(static_cast<int>(v + 0.5f), 0, 255));
|
|
}
|
|
|
|
if (swap_rb) {
|
|
std::swap(out[0], out[2]);
|
|
}
|
|
out += 3;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
class AiFaceDetNode : public INode {
|
|
public:
|
|
std::string Id() const override { return id_; }
|
|
std::string Type() const override { return "ai_face_det"; }
|
|
|
|
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
|
|
id_ = config.ValueOr<std::string>("id", "face_det");
|
|
model_path_ = config.ValueOr<std::string>("model_path", "");
|
|
std::shared_ptr<const FaceDetConfigSnapshot> snap;
|
|
BuildFaceDetConfigSnapshot(config, nullptr, snap);
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
cfg_ = std::move(snap);
|
|
priors_cache_ = {};
|
|
}
|
|
|
|
input_queue_ = ctx.input_queue;
|
|
output_queues_ = ctx.output_queues;
|
|
if (!input_queue_) {
|
|
std::cerr << "[ai_face_det] no input queue for node " << id_ << "\n";
|
|
return false;
|
|
}
|
|
if (output_queues_.empty()) {
|
|
std::cerr << "[ai_face_det] no output queue for node " << id_ << "\n";
|
|
return false;
|
|
}
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (model_path_.empty()) {
|
|
std::cerr << "[ai_face_det] model_path is required\n";
|
|
return false;
|
|
}
|
|
std::string err;
|
|
model_handle_ = AiScheduler::Instance().LoadModel(model_path_, err);
|
|
if (model_handle_ == kInvalidModelHandle) {
|
|
std::cerr << "[ai_face_det] failed to load model: " << err << "\n";
|
|
return false;
|
|
}
|
|
ModelInfo info;
|
|
if (AiScheduler::Instance().GetModelInfo(model_handle_, info)) {
|
|
model_w_ = info.input_width;
|
|
model_h_ = info.input_height;
|
|
n_output_ = info.n_output;
|
|
}
|
|
LogInfo("[ai_face_det] model loaded: " + model_path_ +
|
|
" (" + std::to_string(model_w_) + "x" + std::to_string(model_h_) +
|
|
", outputs=" + std::to_string(n_output_) + ")");
|
|
#else
|
|
LogWarn("[ai_face_det] RKNN disabled, will passthrough frames");
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
bool Start() override {
|
|
std::shared_ptr<const FaceDetConfigSnapshot> cfg;
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
cfg = cfg_;
|
|
}
|
|
const float conf = cfg ? cfg->conf_thresh : 0.0f;
|
|
const float nms = cfg ? cfg->nms_thresh : 0.0f;
|
|
const int max_faces = cfg ? cfg->max_faces : 0;
|
|
LogInfo("[ai_face_det] start id=" + id_ + " conf=" + std::to_string(conf) +
|
|
" nms=" + std::to_string(nms) + " max_faces=" + std::to_string(max_faces));
|
|
|
|
// ========== TEST: Load 003.jpg and run detection ==========
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
{
|
|
const char* test_img_path = "./003.jpg";
|
|
int img_w = 0, img_h = 0, img_c = 0;
|
|
unsigned char* img_data = stbi_load(test_img_path, &img_w, &img_h, &img_c, 3);
|
|
if (img_data && img_w > 0 && img_h > 0) {
|
|
std::cerr << "[TEST] Loaded " << test_img_path << " (" << img_w << "x" << img_h << ")\n";
|
|
|
|
// Create a fake frame
|
|
auto frame = std::make_shared<Frame>();
|
|
frame->width = img_w;
|
|
frame->height = img_h;
|
|
frame->format = PixelFormat::RGB;
|
|
frame->data = img_data;
|
|
frame->data_size = static_cast<size_t>(img_w) * static_cast<size_t>(img_h) * 3;
|
|
frame->stride = img_w * 3;
|
|
|
|
// Run detection
|
|
Run(frame);
|
|
|
|
// Print results
|
|
if (frame->face_det && !frame->face_det->faces.empty()) {
|
|
std::cerr << "[TEST] Detected " << frame->face_det->faces.size() << " face(s)\n";
|
|
for (size_t fi = 0; fi < frame->face_det->faces.size(); ++fi) {
|
|
const auto& face = frame->face_det->faces[fi];
|
|
std::cerr << "[TEST] Face " << fi << " bbox: ["
|
|
<< face.bbox.x << "," << face.bbox.y << ","
|
|
<< face.bbox.w << "," << face.bbox.h << "] score=" << face.score << "\n";
|
|
if (face.has_landmarks) {
|
|
std::cerr << "[TEST] Board landmarks: ";
|
|
for (int li = 0; li < 5; ++li) {
|
|
std::cerr << "[" << face.landmarks[li].x << "," << face.landmarks[li].y << "] ";
|
|
}
|
|
std::cerr << "\n";
|
|
}
|
|
}
|
|
} else {
|
|
std::cerr << "[TEST] No faces detected\n";
|
|
}
|
|
|
|
stbi_image_free(img_data);
|
|
} else {
|
|
std::cerr << "[TEST] Skip: " << test_img_path << " not found\n";
|
|
}
|
|
}
|
|
#endif
|
|
// ========== END TEST ==========
|
|
|
|
return true;
|
|
}
|
|
|
|
bool UpdateConfig(const SimpleJson& new_config) override {
|
|
const std::string new_id = new_config.ValueOr<std::string>("id", id_);
|
|
if (!new_id.empty() && new_id != id_) return false;
|
|
|
|
const std::string new_model = new_config.ValueOr<std::string>("model_path", model_path_);
|
|
if (new_model != model_path_) {
|
|
// Changing model requires graph rebuild.
|
|
return false;
|
|
}
|
|
std::shared_ptr<const FaceDetConfigSnapshot> base;
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
base = cfg_;
|
|
}
|
|
|
|
std::shared_ptr<const FaceDetConfigSnapshot> snap;
|
|
BuildFaceDetConfigSnapshot(new_config, base, snap);
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
cfg_ = std::move(snap);
|
|
priors_cache_ = {};
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Stop() override {
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
if (model_handle_ != kInvalidModelHandle) {
|
|
AiScheduler::Instance().UnloadModel(model_handle_);
|
|
model_handle_ = kInvalidModelHandle;
|
|
}
|
|
#endif
|
|
LogInfo("[ai_face_det] stop id=" + id_);
|
|
}
|
|
|
|
NodeStatus Process(FramePtr frame) override {
|
|
if (!frame) return NodeStatus::DROP;
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
Run(frame);
|
|
#endif
|
|
Push(frame);
|
|
return NodeStatus::OK;
|
|
}
|
|
|
|
private:
|
|
void Push(FramePtr frame) {
|
|
for (auto& q : output_queues_) q->Push(frame);
|
|
}
|
|
|
|
#if defined(RK3588_ENABLE_RKNN)
|
|
void Run(FramePtr frame) {
|
|
if (!frame->data || frame->data_size == 0) return;
|
|
if (frame->format != PixelFormat::RGB && frame->format != PixelFormat::BGR) {
|
|
std::cerr << "[ai_face_det] input must be RGB/BGR\n";
|
|
return;
|
|
}
|
|
|
|
const int src_w = frame->width;
|
|
const int src_h = frame->height;
|
|
const size_t src_row = static_cast<size_t>(src_w) * 3;
|
|
const uint8_t* src = frame->planes[0].data ? frame->planes[0].data : frame->data;
|
|
const int src_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
|
|
: (frame->stride > 0 ? frame->stride : static_cast<int>(src_row));
|
|
if (!src || src_stride <= 0) return;
|
|
|
|
std::shared_ptr<const FaceDetConfigSnapshot> cfg;
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
cfg = cfg_;
|
|
}
|
|
if (!cfg) return;
|
|
|
|
const bool need_swap = (frame->format == PixelFormat::BGR && cfg->input_format == "rgb") ||
|
|
(frame->format == PixelFormat::RGB && cfg->input_format == "bgr");
|
|
|
|
const int in_w = model_w_ > 0 ? model_w_ : src_w;
|
|
const int in_h = model_h_ > 0 ? model_h_ : src_h;
|
|
const size_t in_size = static_cast<size_t>(in_w) * static_cast<size_t>(in_h) * 3;
|
|
|
|
const uint8_t* input_ptr = nullptr;
|
|
|
|
// Fast path: already packed, correct size, no channel swap.
|
|
if (!need_swap && src_w == in_w && src_h == in_h &&
|
|
static_cast<size_t>(src_stride) == src_row && frame->data_size >= src_row * static_cast<size_t>(src_h)) {
|
|
input_ptr = src;
|
|
} else {
|
|
input_buf_.resize(in_size);
|
|
if (src_w == in_w && src_h == in_h && static_cast<size_t>(src_stride) == src_row) {
|
|
memcpy(input_buf_.data(), src, in_size);
|
|
if (need_swap) {
|
|
for (size_t i = 0; i < in_size; i += 3) {
|
|
std::swap(input_buf_[i], input_buf_[i + 2]);
|
|
}
|
|
}
|
|
} else {
|
|
ResizeRgbBilinear(src, src_w, src_h, src_stride, input_buf_.data(), in_w, in_h, need_swap);
|
|
}
|
|
input_ptr = input_buf_.data();
|
|
}
|
|
|
|
InferInput input;
|
|
input.width = in_w;
|
|
input.height = in_h;
|
|
input.is_nhwc = true;
|
|
|
|
// Default: keep existing UINT8 behavior.
|
|
if (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32") {
|
|
float_input_buf_.resize(static_cast<size_t>(in_w) * static_cast<size_t>(in_h) * 3);
|
|
const size_t pix = static_cast<size_t>(in_w) * static_cast<size_t>(in_h);
|
|
const uint8_t* p = reinterpret_cast<const uint8_t*>(input_ptr);
|
|
for (size_t i = 0; i < pix; ++i) {
|
|
for (int c = 0; c < 3; ++c) {
|
|
float x = static_cast<float>(p[i * 3 + static_cast<size_t>(c)]);
|
|
if (cfg->norm_use_mean_std) {
|
|
const float st = std::fabs(cfg->norm_std[static_cast<size_t>(c)]) < 1e-6f ? 1.0f
|
|
: cfg->norm_std[static_cast<size_t>(c)];
|
|
x = (x - cfg->norm_mean[static_cast<size_t>(c)]) / st;
|
|
} else {
|
|
x = x * cfg->norm_scale + cfg->norm_bias;
|
|
}
|
|
float_input_buf_[i * 3 + static_cast<size_t>(c)] = x;
|
|
}
|
|
}
|
|
|
|
input.data = float_input_buf_.data();
|
|
input.size = float_input_buf_.size() * sizeof(float);
|
|
input.type = RKNN_TENSOR_FLOAT32;
|
|
} else {
|
|
input.data = input_ptr;
|
|
input.size = in_size;
|
|
input.type = RKNN_TENSOR_UINT8;
|
|
}
|
|
|
|
auto r = AiScheduler::Instance().InferBorrowed(model_handle_, input);
|
|
if (!r.success) {
|
|
std::cerr << "[ai_face_det] inference failed: " << r.error << "\n";
|
|
return;
|
|
}
|
|
|
|
std::vector<Tensor> tensors;
|
|
tensors.reserve(r.outputs.size());
|
|
for (const auto& o : r.outputs) {
|
|
Tensor t;
|
|
t.data = o.data;
|
|
t.size = o.size;
|
|
t.zp = o.zp;
|
|
t.scale = o.scale;
|
|
t.dims = o.dims;
|
|
t.type = o.type;
|
|
tensors.push_back(std::move(t));
|
|
}
|
|
|
|
FaceDetResult det;
|
|
det.img_w = src_w;
|
|
det.img_h = src_h;
|
|
det.model_name = "retinaface";
|
|
|
|
std::shared_ptr<const std::vector<Prior>> priors = GetRetinaFacePriors(cfg, in_w, in_h);
|
|
DecodeRetinaFace(tensors, src_w, src_h, in_w, in_h, *cfg, priors.get(), det);
|
|
frame->face_det = std::make_shared<FaceDetResult>(std::move(det));
|
|
}
|
|
|
|
struct PriorsCache {
|
|
int in_w = 0;
|
|
int in_h = 0;
|
|
const FaceDetConfigSnapshot* cfg_ptr = nullptr;
|
|
std::shared_ptr<const std::vector<Prior>> priors;
|
|
};
|
|
|
|
std::shared_ptr<const std::vector<Prior>> GetRetinaFacePriors(const std::shared_ptr<const FaceDetConfigSnapshot>& cfg,
|
|
int in_w, int in_h) {
|
|
if (!cfg || in_w <= 0 || in_h <= 0) return nullptr;
|
|
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
if (priors_cache_.priors && priors_cache_.cfg_ptr == cfg.get() &&
|
|
priors_cache_.in_w == in_w && priors_cache_.in_h == in_h) {
|
|
return priors_cache_.priors;
|
|
}
|
|
}
|
|
|
|
const std::vector<Prior> built = GenerateRetinaFacePriors(in_w, in_h, cfg->steps, cfg->min_sizes);
|
|
auto sp = std::make_shared<std::vector<Prior>>(built);
|
|
{
|
|
std::lock_guard<std::mutex> lock(mu_);
|
|
priors_cache_.cfg_ptr = cfg.get();
|
|
priors_cache_.in_w = in_w;
|
|
priors_cache_.in_h = in_h;
|
|
priors_cache_.priors = sp;
|
|
return priors_cache_.priors;
|
|
}
|
|
}
|
|
|
|
void DecodeRetinaFace(const std::vector<Tensor>& outs,
|
|
int orig_w, int orig_h,
|
|
int in_w, int in_h,
|
|
const FaceDetConfigSnapshot& cfg,
|
|
const std::vector<Prior>* priors_ptr,
|
|
FaceDetResult& out) {
|
|
// Find loc/conf/landms tensors.
|
|
std::vector<NcTensor> locs;
|
|
std::vector<NcTensor> confs;
|
|
std::vector<NcTensor> landms;
|
|
locs.reserve(4);
|
|
confs.reserve(4);
|
|
landms.reserve(4);
|
|
|
|
for (const auto& t : outs) {
|
|
NcTensor tmp;
|
|
if (ExtractNc(t, 4, tmp)) {
|
|
locs.push_back(std::move(tmp));
|
|
continue;
|
|
}
|
|
if (ExtractNc(t, 2, tmp)) {
|
|
confs.push_back(std::move(tmp));
|
|
continue;
|
|
}
|
|
if (ExtractNc(t, 10, tmp)) {
|
|
landms.push_back(std::move(tmp));
|
|
continue;
|
|
}
|
|
}
|
|
if (locs.empty() || confs.empty()) return;
|
|
|
|
// Concatenate along N.
|
|
auto Concat = [](const std::vector<NcTensor>& parts) -> NcTensor {
|
|
NcTensor all;
|
|
if (parts.empty()) return all;
|
|
all.c = parts[0].c;
|
|
int total_n = 0;
|
|
for (const auto& p : parts) total_n += p.n;
|
|
all.n = total_n;
|
|
all.data.resize(static_cast<size_t>(all.n) * static_cast<size_t>(all.c));
|
|
size_t off = 0;
|
|
for (const auto& p : parts) {
|
|
if (p.c != all.c) continue;
|
|
memcpy(all.data.data() + off, p.data.data(), p.data.size() * sizeof(float));
|
|
off += p.data.size();
|
|
}
|
|
return all;
|
|
};
|
|
|
|
NcTensor loc = Concat(locs);
|
|
NcTensor conf = Concat(confs);
|
|
NcTensor lmk;
|
|
if (cfg.output_landmarks && !landms.empty()) lmk = Concat(landms);
|
|
|
|
if (loc.n <= 0 || conf.n != loc.n) return;
|
|
const int n = loc.n;
|
|
|
|
const std::vector<Prior> empty_priors;
|
|
const std::vector<Prior>& priors = priors_ptr ? *priors_ptr : empty_priors;
|
|
if (!priors.empty() && static_cast<int>(priors.size()) != n) {
|
|
// Mismatch: can't reliably decode.
|
|
std::cerr << "[ai_face_det] prior mismatch: priors=" << priors.size() << " n=" << n << "\n";
|
|
return;
|
|
}
|
|
|
|
const float sx = static_cast<float>(orig_w) / static_cast<float>(in_w);
|
|
const float sy = static_cast<float>(orig_h) / static_cast<float>(in_h);
|
|
|
|
std::vector<Rect> boxes;
|
|
std::vector<float> scores;
|
|
std::vector<std::array<Point2f, 5>> lmks;
|
|
boxes.reserve(static_cast<size_t>(n));
|
|
scores.reserve(static_cast<size_t>(n));
|
|
if (cfg.output_landmarks) lmks.reserve(static_cast<size_t>(n));
|
|
|
|
constexpr float var0 = 0.1f;
|
|
constexpr float var1 = 0.2f;
|
|
|
|
for (int i = 0; i < n; ++i) {
|
|
const float s0 = conf.data[static_cast<size_t>(i) * 2 + 0];
|
|
const float s1 = conf.data[static_cast<size_t>(i) * 2 + 1];
|
|
float score;
|
|
if (s0 >= 0.0f && s0 <= 1.0f && s1 >= 0.0f && s1 <= 1.0f && std::fabs((s0 + s1) - 1.0f) < 0.1f) {
|
|
score = s1;
|
|
} else {
|
|
score = Softmax2(s0, s1);
|
|
}
|
|
if (score < cfg.conf_thresh) continue;
|
|
|
|
const Prior p = priors.empty() ? Prior{0, 0, 0, 0} : priors[static_cast<size_t>(i)];
|
|
|
|
const float dx = loc.data[static_cast<size_t>(i) * 4 + 0];
|
|
const float dy = loc.data[static_cast<size_t>(i) * 4 + 1];
|
|
const float dw = loc.data[static_cast<size_t>(i) * 4 + 2];
|
|
const float dh = loc.data[static_cast<size_t>(i) * 4 + 3];
|
|
|
|
const float cx = p.cx + dx * var0 * p.w;
|
|
const float cy = p.cy + dy * var0 * p.h;
|
|
const float ww = p.w * std::exp(dw * var1);
|
|
const float hh = p.h * std::exp(dh * var1);
|
|
|
|
float x1 = (cx - ww * 0.5f) * static_cast<float>(in_w);
|
|
float y1 = (cy - hh * 0.5f) * static_cast<float>(in_h);
|
|
float x2 = (cx + ww * 0.5f) * static_cast<float>(in_w);
|
|
float y2 = (cy + hh * 0.5f) * static_cast<float>(in_h);
|
|
|
|
x1 *= sx;
|
|
x2 *= sx;
|
|
y1 *= sy;
|
|
y2 *= sy;
|
|
|
|
Rect bb;
|
|
bb.x = static_cast<float>(ClampInt(static_cast<int>(x1), 0, orig_w - 1));
|
|
bb.y = static_cast<float>(ClampInt(static_cast<int>(y1), 0, orig_h - 1));
|
|
const float rx2 = static_cast<float>(ClampInt(static_cast<int>(x2), 0, orig_w - 1));
|
|
const float ry2 = static_cast<float>(ClampInt(static_cast<int>(y2), 0, orig_h - 1));
|
|
bb.w = std::max(0.0f, rx2 - bb.x);
|
|
bb.h = std::max(0.0f, ry2 - bb.y);
|
|
if (bb.w <= 1.0f || bb.h <= 1.0f) continue;
|
|
|
|
boxes.push_back(bb);
|
|
scores.push_back(score);
|
|
|
|
if (cfg.output_landmarks && !lmk.data.empty() && lmk.n == n) {
|
|
std::array<Point2f, 5> pts{};
|
|
for (int k = 0; k < 5; ++k) {
|
|
const float lx = lmk.data[static_cast<size_t>(i) * 10 + k * 2 + 0];
|
|
const float ly = lmk.data[static_cast<size_t>(i) * 10 + k * 2 + 1];
|
|
const float px = (p.cx + lx * var0 * p.w) * static_cast<float>(in_w) * sx;
|
|
const float py = (p.cy + ly * var0 * p.h) * static_cast<float>(in_h) * sy;
|
|
pts[k].x = static_cast<float>(ClampInt(static_cast<int>(px), 0, orig_w - 1));
|
|
pts[k].y = static_cast<float>(ClampInt(static_cast<int>(py), 0, orig_h - 1));
|
|
}
|
|
lmks.push_back(pts);
|
|
}
|
|
}
|
|
|
|
if (boxes.empty()) return;
|
|
|
|
std::vector<int> keep;
|
|
NmsSorted(boxes, scores, cfg.nms_thresh, keep);
|
|
if (keep.empty()) return;
|
|
|
|
const int out_n = std::min<int>(cfg.max_faces, static_cast<int>(keep.size()));
|
|
out.faces.reserve(static_cast<size_t>(out_n));
|
|
for (int i = 0; i < out_n; ++i) {
|
|
const int k = keep[static_cast<size_t>(i)];
|
|
FaceDetItem item;
|
|
item.bbox = boxes[static_cast<size_t>(k)];
|
|
item.score = scores[static_cast<size_t>(k)];
|
|
item.track_id = -1;
|
|
if (cfg.output_landmarks && k < static_cast<int>(lmks.size())) {
|
|
item.has_landmarks = true;
|
|
item.landmarks = lmks[static_cast<size_t>(k)];
|
|
}
|
|
out.faces.push_back(std::move(item));
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
std::string id_;
|
|
std::string model_path_;
|
|
|
|
mutable std::mutex mu_;
|
|
std::shared_ptr<const FaceDetConfigSnapshot> cfg_;
|
|
PriorsCache priors_cache_;
|
|
|
|
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
|
|
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
|
|
|
std::vector<uint8_t> input_buf_;
|
|
std::vector<float> float_input_buf_;
|
|
|
|
ModelHandle model_handle_ = kInvalidModelHandle;
|
|
int model_w_ = 320;
|
|
int model_h_ = 320;
|
|
uint32_t n_output_ = 0;
|
|
};
|
|
|
|
REGISTER_NODE(AiFaceDetNode, "ai_face_det");
|
|
|
|
} // namespace rk3588
|