642 lines
22 KiB
C++
642 lines
22 KiB
C++
#pragma once
|
||
|
||
/**
|
||
* 人脸检测公共工具函数
|
||
* 供 ai_face_det 和 ai_face_det_zoned 节点复用
|
||
*/
|
||
|
||
#include <algorithm>
|
||
#include <array>
|
||
#include <cmath>
|
||
#include <cstdint>
|
||
#include <cstring>
|
||
#include <memory>
|
||
#include <numeric>
|
||
#include <vector>
|
||
|
||
#include "face/face_result.h"
|
||
|
||
// RKNN类型前向声明(避免直接依赖rknn_api.h)
|
||
#if defined(RK3588_ENABLE_RKNN)
|
||
#include "rknn_api.h"
|
||
#else
|
||
typedef enum _rknn_tensor_type {
|
||
RKNN_TENSOR_UINT8 = 0,
|
||
RKNN_TENSOR_INT8,
|
||
RKNN_TENSOR_FLOAT16,
|
||
RKNN_TENSOR_FLOAT32,
|
||
} rknn_tensor_type;
|
||
#endif
|
||
|
||
namespace rk3588 {
|
||
namespace face_detection {
|
||
|
||
// ============================================================================
|
||
// 基础工具函数
|
||
// ============================================================================
|
||
|
||
inline int ClampInt(int v, int lo, int hi) {
|
||
return v < lo ? lo : (v > hi ? hi : v);
|
||
}
|
||
|
||
inline float Sigmoid(float x) {
|
||
return 1.0f / (1.0f + std::exp(-x));
|
||
}
|
||
|
||
inline float Softmax2(float a, float b) {
|
||
const float m = std::max(a, b);
|
||
const float ea = std::exp(a - m);
|
||
const float eb = std::exp(b - m);
|
||
return eb / (ea + eb);
|
||
}
|
||
|
||
// ============================================================================
|
||
// 几何计算
|
||
// ============================================================================
|
||
|
||
inline float IoU(const Rect& a, const Rect& b) {
|
||
const float ax1 = a.x;
|
||
const float ay1 = a.y;
|
||
const float ax2 = a.x + a.w;
|
||
const float ay2 = a.y + a.h;
|
||
const float bx1 = b.x;
|
||
const float by1 = b.y;
|
||
const float bx2 = b.x + b.w;
|
||
const float by2 = b.y + b.h;
|
||
|
||
const float ix1 = std::max(ax1, bx1);
|
||
const float iy1 = std::max(ay1, by1);
|
||
const float ix2 = std::min(ax2, bx2);
|
||
const float iy2 = std::min(ay2, by2);
|
||
|
||
const float iw = std::max(0.0f, ix2 - ix1);
|
||
const float ih = std::max(0.0f, iy2 - iy1);
|
||
const float inter = iw * ih;
|
||
const float ua = a.w * a.h + b.w * b.h - inter;
|
||
return ua <= 0.0f ? 0.0f : (inter / ua);
|
||
}
|
||
|
||
inline void NmsSorted(const std::vector<Rect>& boxes,
|
||
const std::vector<float>& scores,
|
||
float nms_thresh,
|
||
std::vector<int>& keep) {
|
||
keep.clear();
|
||
std::vector<int> order(scores.size());
|
||
std::iota(order.begin(), order.end(), 0);
|
||
std::sort(order.begin(), order.end(),
|
||
[&](int a, int b) { return scores[a] > scores[b]; });
|
||
|
||
for (int idx : order) {
|
||
bool suppressed = false;
|
||
for (int kept : keep) {
|
||
if (IoU(boxes[idx], boxes[kept]) > nms_thresh) {
|
||
suppressed = true;
|
||
break;
|
||
}
|
||
}
|
||
if (!suppressed) keep.push_back(idx);
|
||
}
|
||
}
|
||
|
||
// ============================================================================
|
||
// RetinaFace 数据结构
|
||
// ============================================================================
|
||
|
||
struct Prior {
|
||
float cx = 0.0f;
|
||
float cy = 0.0f;
|
||
float w = 0.0f;
|
||
float h = 0.0f;
|
||
};
|
||
|
||
struct DetectionConfig {
|
||
float conf_thresh = 0.6f;
|
||
float nms_thresh = 0.4f;
|
||
int max_faces = 10;
|
||
bool output_landmarks = true;
|
||
|
||
// RetinaFace默认参数
|
||
std::vector<int> steps{8, 16, 32};
|
||
std::vector<std::vector<int>> min_sizes{{16, 32}, {64, 128}, {256, 512}};
|
||
float variance0 = 0.1f;
|
||
float variance1 = 0.2f;
|
||
};
|
||
|
||
// 张量结构(与RKNN解耦)
|
||
struct TensorView {
|
||
const uint8_t* data = nullptr;
|
||
size_t size = 0;
|
||
int32_t zp = 0;
|
||
float scale = 1.0f;
|
||
std::vector<uint32_t> dims;
|
||
rknn_tensor_type type = RKNN_TENSOR_UINT8;
|
||
};
|
||
|
||
struct NcTensor {
|
||
int n = 0;
|
||
int c = 0;
|
||
std::vector<float> data; // N*C row-major
|
||
};
|
||
|
||
// ============================================================================
|
||
// 图像预处理
|
||
// ============================================================================
|
||
|
||
inline void ResizeRgbBilinear(const uint8_t* src, int src_w, int src_h, int src_stride,
|
||
uint8_t* dst, int dst_w, int dst_h, bool swap_rb) {
|
||
const float scale_x = static_cast<float>(src_w) / static_cast<float>(dst_w);
|
||
const float scale_y = static_cast<float>(src_h) / static_cast<float>(dst_h);
|
||
|
||
for (int y = 0; y < dst_h; ++y) {
|
||
const float fy = (static_cast<float>(y) + 0.5f) * scale_y - 0.5f;
|
||
int y0 = static_cast<int>(std::floor(fy));
|
||
int y1 = y0 + 1;
|
||
const float wy1 = fy - static_cast<float>(y0);
|
||
const float wy0 = 1.0f - wy1;
|
||
y0 = ClampInt(y0, 0, src_h - 1);
|
||
y1 = ClampInt(y1, 0, src_h - 1);
|
||
|
||
const uint8_t* row0 = src + static_cast<size_t>(y0) * static_cast<size_t>(src_stride);
|
||
const uint8_t* row1 = src + static_cast<size_t>(y1) * static_cast<size_t>(src_stride);
|
||
uint8_t* out = dst + static_cast<size_t>(y) * static_cast<size_t>(dst_w) * 3;
|
||
|
||
for (int x = 0; x < dst_w; ++x) {
|
||
const float fx = (static_cast<float>(x) + 0.5f) * scale_x - 0.5f;
|
||
int x0 = static_cast<int>(std::floor(fx));
|
||
int x1 = x0 + 1;
|
||
const float wx1 = fx - static_cast<float>(x0);
|
||
const float wx0 = 1.0f - wx1;
|
||
x0 = ClampInt(x0, 0, src_w - 1);
|
||
x1 = ClampInt(x1, 0, src_w - 1);
|
||
|
||
const uint8_t* p00 = row0 + x0 * 3;
|
||
const uint8_t* p01 = row0 + x1 * 3;
|
||
const uint8_t* p10 = row1 + x0 * 3;
|
||
const uint8_t* p11 = row1 + x1 * 3;
|
||
|
||
for (int c = 0; c < 3; ++c) {
|
||
const float v =
|
||
(static_cast<float>(p00[c]) * wx0 + static_cast<float>(p01[c]) * wx1) * wy0 +
|
||
(static_cast<float>(p10[c]) * wx0 + static_cast<float>(p11[c]) * wx1) * wy1;
|
||
out[c] = static_cast<uint8_t>(ClampInt(static_cast<int>(v + 0.5f), 0, 255));
|
||
}
|
||
|
||
if (swap_rb) {
|
||
std::swap(out[0], out[2]);
|
||
}
|
||
out += 3;
|
||
}
|
||
}
|
||
}
|
||
|
||
// ============================================================================
|
||
// 张量解析(从RKNN输出提取)
|
||
// ============================================================================
|
||
|
||
inline float HalfToFloat(uint16_t h) {
|
||
const uint32_t sign = (static_cast<uint32_t>(h & 0x8000u)) << 16;
|
||
uint32_t exp = (h & 0x7C00u) >> 10;
|
||
uint32_t mant = (h & 0x03FFu);
|
||
|
||
uint32_t f = 0;
|
||
if (exp == 0) {
|
||
if (mant == 0) {
|
||
f = sign;
|
||
} else {
|
||
exp = 1;
|
||
while ((mant & 0x0400u) == 0) {
|
||
mant <<= 1;
|
||
--exp;
|
||
}
|
||
mant &= 0x03FFu;
|
||
exp = exp + (127 - 15);
|
||
f = sign | (exp << 23) | (mant << 13);
|
||
}
|
||
} else if (exp == 31) {
|
||
f = sign | 0x7F800000u | (mant << 13);
|
||
} else {
|
||
exp = exp + (127 - 15);
|
||
f = sign | (exp << 23) | (mant << 13);
|
||
}
|
||
|
||
float out;
|
||
memcpy(&out, &f, sizeof(out));
|
||
return out;
|
||
}
|
||
|
||
template <typename T>
|
||
inline float Dequant(T q, int32_t zp, float scale) {
|
||
return (static_cast<float>(q) - static_cast<float>(zp)) * scale;
|
||
}
|
||
|
||
// 从TensorView提取NcTensor
|
||
inline bool ExtractNcTensor(const TensorView& t, int c, NcTensor& out) {
|
||
out = {};
|
||
out.c = c;
|
||
if (!t.data || t.size == 0) return false;
|
||
|
||
size_t elem_size = 1;
|
||
bool is_float32 = false;
|
||
bool is_float16 = false;
|
||
|
||
if (t.type == RKNN_TENSOR_FLOAT16) {
|
||
elem_size = 2;
|
||
is_float16 = true;
|
||
} else if (t.type == RKNN_TENSOR_FLOAT32) {
|
||
elem_size = 4;
|
||
is_float32 = true;
|
||
}
|
||
|
||
const size_t elem_cnt = elem_size > 0 ? (t.size / elem_size) : 0;
|
||
if (elem_cnt == 0) return false;
|
||
|
||
int n = 0;
|
||
bool transposed = false;
|
||
if (t.dims.size() == 3) {
|
||
const uint32_t d1 = t.dims[1];
|
||
const uint32_t d2 = t.dims[2];
|
||
if (static_cast<int>(d2) == c) {
|
||
n = static_cast<int>(d1);
|
||
transposed = false;
|
||
} else if (static_cast<int>(d1) == c) {
|
||
n = static_cast<int>(d2);
|
||
transposed = true;
|
||
} else {
|
||
return false;
|
||
}
|
||
} else if (t.dims.size() == 2) {
|
||
const uint32_t d0 = t.dims[0];
|
||
const uint32_t d1 = t.dims[1];
|
||
if (static_cast<int>(d1) == c) {
|
||
n = static_cast<int>(d0);
|
||
transposed = false;
|
||
} else if (static_cast<int>(d0) == c) {
|
||
n = static_cast<int>(d1);
|
||
transposed = true;
|
||
}
|
||
}
|
||
|
||
if (n <= 0) {
|
||
if (elem_cnt % static_cast<size_t>(c) != 0) return false;
|
||
n = static_cast<int>(elem_cnt / static_cast<size_t>(c));
|
||
transposed = false;
|
||
}
|
||
|
||
if (static_cast<size_t>(n) * static_cast<size_t>(c) != elem_cnt) {
|
||
return false;
|
||
}
|
||
|
||
out.n = n;
|
||
out.data.resize(static_cast<size_t>(n) * static_cast<size_t>(c));
|
||
|
||
auto ReadElem = [&](size_t idx) -> float {
|
||
if (is_float32) {
|
||
const float* fp = reinterpret_cast<const float*>(t.data);
|
||
return fp[idx];
|
||
}
|
||
if (is_float16) {
|
||
const uint16_t* hp = reinterpret_cast<const uint16_t*>(t.data);
|
||
return HalfToFloat(hp[idx]);
|
||
}
|
||
if (t.type == RKNN_TENSOR_INT8) {
|
||
const int8_t* p = reinterpret_cast<const int8_t*>(t.data);
|
||
return Dequant(p[idx], t.zp, t.scale);
|
||
}
|
||
const uint8_t* p = reinterpret_cast<const uint8_t*>(t.data);
|
||
return Dequant(p[idx], t.zp, t.scale);
|
||
};
|
||
|
||
if (!transposed) {
|
||
for (size_t i = 0; i < out.data.size(); ++i) {
|
||
out.data[i] = ReadElem(i);
|
||
}
|
||
} else {
|
||
for (int ci = 0; ci < c; ++ci) {
|
||
for (int ni = 0; ni < n; ++ni) {
|
||
const size_t src_idx = static_cast<size_t>(ci) * static_cast<size_t>(n) + static_cast<size_t>(ni);
|
||
const size_t dst_idx = static_cast<size_t>(ni) * static_cast<size_t>(c) + static_cast<size_t>(ci);
|
||
out.data[dst_idx] = ReadElem(src_idx);
|
||
}
|
||
}
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
// ============================================================================
|
||
// RetinaFace 核心:先验框生成
|
||
// ============================================================================
|
||
|
||
inline std::vector<Prior> GeneratePriors(int in_w, int in_h,
|
||
const std::vector<int>& steps,
|
||
const std::vector<std::vector<int>>& min_sizes) {
|
||
std::vector<Prior> priors;
|
||
if (steps.empty() || steps.size() != min_sizes.size()) return priors;
|
||
priors.reserve(5000);
|
||
|
||
for (size_t s = 0; s < steps.size(); ++s) {
|
||
const int step = steps[s];
|
||
const int fm_w = in_w / step;
|
||
const int fm_h = in_h / step;
|
||
for (int i = 0; i < fm_h; ++i) {
|
||
for (int j = 0; j < fm_w; ++j) {
|
||
for (int ms : min_sizes[s]) {
|
||
const float s_kx = static_cast<float>(ms) / static_cast<float>(in_w);
|
||
const float s_ky = static_cast<float>(ms) / static_cast<float>(in_h);
|
||
const float cx = (static_cast<float>(j) + 0.5f) * static_cast<float>(step) /
|
||
static_cast<float>(in_w);
|
||
const float cy = (static_cast<float>(i) + 0.5f) * static_cast<float>(step) /
|
||
static_cast<float>(in_h);
|
||
priors.push_back(Prior{cx, cy, s_kx, s_ky});
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return priors;
|
||
}
|
||
|
||
// ============================================================================
|
||
// RetinaFace 核心:检测结果解码
|
||
// ============================================================================
|
||
|
||
/**
|
||
* 解码RetinaFace检测结果
|
||
*
|
||
* @param loc_tensor 位置回归张量 [N, 4]
|
||
* @param conf_tensor 置信度张量 [N, 2]
|
||
* @param landm_tensor 关键点张量 [N, 10] (可选,可以为空)
|
||
* @param priors 先验框
|
||
* @param src_w 原始图像宽度
|
||
* @param src_h 原始图像高度
|
||
* @param model_w 模型输入宽度
|
||
* @param model_h 模型输入高度
|
||
* @param cfg 检测配置
|
||
* @param out 输出结果
|
||
*/
|
||
inline void DecodeRetinaFace(const NcTensor& loc_tensor,
|
||
const NcTensor& conf_tensor,
|
||
const NcTensor& landm_tensor,
|
||
const std::vector<Prior>& priors,
|
||
int src_w, int src_h,
|
||
int model_w, int model_h,
|
||
const DetectionConfig& cfg,
|
||
FaceDetResult& out) {
|
||
if (loc_tensor.n <= 0 || conf_tensor.n != loc_tensor.n) return;
|
||
|
||
const int n = loc_tensor.n;
|
||
const bool has_landmarks = cfg.output_landmarks && !landm_tensor.data.empty() && landm_tensor.n == n;
|
||
|
||
if (!priors.empty() && static_cast<int>(priors.size()) != n) {
|
||
return; // prior mismatch
|
||
}
|
||
|
||
const float sx = static_cast<float>(src_w) / static_cast<float>(model_w);
|
||
const float sy = static_cast<float>(src_h) / static_cast<float>(model_h);
|
||
|
||
std::vector<Rect> boxes;
|
||
std::vector<float> scores;
|
||
std::vector<std::array<Point2f, 5>> lmks;
|
||
boxes.reserve(static_cast<size_t>(n));
|
||
scores.reserve(static_cast<size_t>(n));
|
||
if (has_landmarks) lmks.reserve(static_cast<size_t>(n));
|
||
|
||
const float var0 = cfg.variance0;
|
||
const float var1 = cfg.variance1;
|
||
|
||
for (int i = 0; i < n; ++i) {
|
||
// 解析置信度
|
||
const float s0 = conf_tensor.data[static_cast<size_t>(i) * 2 + 0];
|
||
const float s1 = conf_tensor.data[static_cast<size_t>(i) * 2 + 1];
|
||
float score;
|
||
if (s0 >= 0.0f && s0 <= 1.0f && s1 >= 0.0f && s1 <= 1.0f && std::fabs((s0 + s1) - 1.0f) < 0.1f) {
|
||
score = s1;
|
||
} else {
|
||
score = Softmax2(s0, s1);
|
||
}
|
||
if (score < cfg.conf_thresh) continue;
|
||
|
||
// 解析位置
|
||
const Prior p = priors.empty() ? Prior{0, 0, 0, 0} : priors[static_cast<size_t>(i)];
|
||
const float dx = loc_tensor.data[static_cast<size_t>(i) * 4 + 0];
|
||
const float dy = loc_tensor.data[static_cast<size_t>(i) * 4 + 1];
|
||
const float dw = loc_tensor.data[static_cast<size_t>(i) * 4 + 2];
|
||
const float dh = loc_tensor.data[static_cast<size_t>(i) * 4 + 3];
|
||
|
||
const float cx = p.cx + dx * var0 * p.w;
|
||
const float cy = p.cy + dy * var0 * p.h;
|
||
const float ww = p.w * std::exp(dw * var1);
|
||
const float hh = p.h * std::exp(dh * var1);
|
||
|
||
float x1 = (cx - ww * 0.5f) * static_cast<float>(model_w);
|
||
float y1 = (cy - hh * 0.5f) * static_cast<float>(model_h);
|
||
float x2 = (cx + ww * 0.5f) * static_cast<float>(model_w);
|
||
float y2 = (cy + hh * 0.5f) * static_cast<float>(model_h);
|
||
|
||
// 映射到原始图像
|
||
x1 *= sx;
|
||
x2 *= sx;
|
||
y1 *= sy;
|
||
y2 *= sy;
|
||
|
||
Rect bb;
|
||
bb.x = static_cast<float>(ClampInt(static_cast<int>(x1), 0, src_w - 1));
|
||
bb.y = static_cast<float>(ClampInt(static_cast<int>(y1), 0, src_h - 1));
|
||
const float rx2 = static_cast<float>(ClampInt(static_cast<int>(x2), 0, src_w - 1));
|
||
const float ry2 = static_cast<float>(ClampInt(static_cast<int>(y2), 0, src_h - 1));
|
||
bb.w = std::max(0.0f, rx2 - bb.x);
|
||
bb.h = std::max(0.0f, ry2 - bb.y);
|
||
if (bb.w <= 1.0f || bb.h <= 1.0f) continue;
|
||
|
||
boxes.push_back(bb);
|
||
scores.push_back(score);
|
||
|
||
// 解析关键点
|
||
if (has_landmarks) {
|
||
std::array<Point2f, 5> pts{};
|
||
for (int k = 0; k < 5; ++k) {
|
||
const float lx = landm_tensor.data[static_cast<size_t>(i) * 10 + k * 2 + 0];
|
||
const float ly = landm_tensor.data[static_cast<size_t>(i) * 10 + k * 2 + 1];
|
||
const float px = (p.cx + lx * var0 * p.w) * static_cast<float>(model_w) * sx;
|
||
const float py = (p.cy + ly * var0 * p.h) * static_cast<float>(model_h) * sy;
|
||
pts[k].x = static_cast<float>(ClampInt(static_cast<int>(px), 0, src_w - 1));
|
||
pts[k].y = static_cast<float>(ClampInt(static_cast<int>(py), 0, src_h - 1));
|
||
}
|
||
lmks.push_back(pts);
|
||
}
|
||
}
|
||
|
||
if (boxes.empty()) return;
|
||
|
||
// NMS
|
||
std::vector<int> keep;
|
||
NmsSorted(boxes, scores, cfg.nms_thresh, keep);
|
||
if (keep.empty()) return;
|
||
|
||
// 构建输出
|
||
const int out_n = std::min<int>(cfg.max_faces, static_cast<int>(keep.size()));
|
||
out.faces.reserve(static_cast<size_t>(out_n));
|
||
for (int i = 0; i < out_n; ++i) {
|
||
const int k = keep[static_cast<size_t>(i)];
|
||
FaceDetItem item;
|
||
item.bbox = boxes[static_cast<size_t>(k)];
|
||
item.score = scores[static_cast<size_t>(k)];
|
||
item.track_id = -1;
|
||
if (has_landmarks && k < static_cast<int>(lmks.size())) {
|
||
item.has_landmarks = true;
|
||
item.landmarks = lmks[static_cast<size_t>(k)];
|
||
}
|
||
out.faces.push_back(std::move(item));
|
||
}
|
||
}
|
||
|
||
// ============================================================================
|
||
// SCRFD 核心:检测结果解码
|
||
// SCRFD输出格式:9个张量 (3个尺度 × 3种类型:score, bbox, kps)
|
||
// ============================================================================
|
||
|
||
struct ScrfdAnchor {
|
||
float cx = 0.0f;
|
||
float cy = 0.0f;
|
||
int stride = 0;
|
||
};
|
||
|
||
inline std::vector<ScrfdAnchor> GenerateScrfdAnchors(int in_w, int in_h,
|
||
const std::vector<int>& strides) {
|
||
std::vector<ScrfdAnchor> anchors;
|
||
anchors.reserve(20000);
|
||
|
||
for (int stride : strides) {
|
||
int fm_w = in_w / stride;
|
||
int fm_h = in_h / stride;
|
||
for (int y = 0; y < fm_h; ++y) {
|
||
for (int x = 0; x < fm_w; ++x) {
|
||
// SCRFD使用2个anchor per location
|
||
for (int a = 0; a < 2; ++a) {
|
||
anchors.push_back(ScrfdAnchor{
|
||
(x + 0.5f) * stride,
|
||
(y + 0.5f) * stride,
|
||
stride
|
||
});
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return anchors;
|
||
}
|
||
|
||
// 从NCHW格式提取值
|
||
inline float ExtractNCHW(const TensorView& t, int c, int h, int w, int C, int H, int W) {
|
||
if (c < 0 || c >= C || h < 0 || h >= H || w < 0 || w >= W) return 0.0f;
|
||
size_t idx = (static_cast<size_t>(c) * H + h) * W + w;
|
||
|
||
if (t.type == RKNN_TENSOR_FLOAT32) {
|
||
const float* p = reinterpret_cast<const float*>(t.data);
|
||
return p[idx];
|
||
} else if (t.type == RKNN_TENSOR_INT8) {
|
||
const int8_t* p = reinterpret_cast<const int8_t*>(t.data);
|
||
return Dequant(p[idx], t.zp, t.scale);
|
||
} else {
|
||
const uint8_t* p = reinterpret_cast<const uint8_t*>(t.data);
|
||
return Dequant(p[idx], t.zp, t.scale);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 解码SCRFD检测结果 - 与 ai_scrfd 节点使用相同的逻辑
|
||
*
|
||
* @param outputs 9个输出张量 [score_8, score_16, score_32, bbox_8, bbox_16, bbox_32, kps_8, kps_16, kps_32]
|
||
* @param anchors 预生成的anchor (center_x, center_y, stride)
|
||
* @param src_w 原始图像宽度
|
||
* @param src_h 原始图像高度
|
||
* @param model_w 模型输入宽度
|
||
* @param model_h 模型输入高度
|
||
* @param conf_thresh 置信度阈值
|
||
* @param output_lm 是否输出关键点
|
||
* @param out 输出结果
|
||
*/
|
||
inline void DecodeScrfd(const std::vector<TensorView>& outputs,
|
||
const std::vector<ScrfdAnchor>& anchors,
|
||
int src_w, int src_h,
|
||
int model_w, int model_h,
|
||
float conf_thresh,
|
||
bool output_lm,
|
||
FaceDetResult& out) {
|
||
if (outputs.size() != 9) return;
|
||
|
||
// Output order: score_8, score_16, score_32, bbox_8, bbox_16, bbox_32, kps_8, kps_16, kps_32
|
||
const int anchor_counts[] = {12800, 3200, 800};
|
||
const int strides[] = {8, 16, 32};
|
||
|
||
size_t anchor_idx = 0;
|
||
float scale_x = static_cast<float>(src_w) / model_w;
|
||
float scale_y = static_cast<float>(src_h) / model_h;
|
||
|
||
for (int s = 0; s < 3; ++s) {
|
||
int stride = strides[s];
|
||
int count = anchor_counts[s];
|
||
|
||
// 检查输出数据是否有效
|
||
if (outputs[s].type != RKNN_TENSOR_FLOAT32 ||
|
||
outputs[s + 3].type != RKNN_TENSOR_FLOAT32 ||
|
||
outputs[s + 6].type != RKNN_TENSOR_FLOAT32) {
|
||
continue;
|
||
}
|
||
|
||
const float* scores = reinterpret_cast<const float*>(outputs[s].data);
|
||
const float* bboxes = reinterpret_cast<const float*>(outputs[s + 3].data);
|
||
const float* kps = reinterpret_cast<const float*>(outputs[s + 6].data);
|
||
|
||
if (!scores || !bboxes || !kps) continue;
|
||
|
||
for (int i = 0; i < count; ++i) {
|
||
if (anchor_idx >= anchors.size()) break;
|
||
|
||
float score = scores[i];
|
||
if (score < conf_thresh) {
|
||
anchor_idx++;
|
||
continue;
|
||
}
|
||
|
||
const ScrfdAnchor& pt = anchors[anchor_idx];
|
||
|
||
// BBox: [left, top, right, bottom] - distances from center
|
||
float left = bboxes[i * 4 + 0];
|
||
float top = bboxes[i * 4 + 1];
|
||
float right = bboxes[i * 4 + 2];
|
||
float bottom = bboxes[i * 4 + 3];
|
||
|
||
// Decode to image coordinates (640x640)
|
||
float x1_640 = (pt.cx - left) * stride;
|
||
float y1_640 = (pt.cy - top) * stride;
|
||
float x2_640 = (pt.cx + right) * stride;
|
||
float y2_640 = (pt.cy + bottom) * stride;
|
||
|
||
FaceDetItem det;
|
||
det.bbox.x = x1_640 * scale_x;
|
||
det.bbox.y = y1_640 * scale_y;
|
||
det.bbox.w = (x2_640 - x1_640) * scale_x;
|
||
det.bbox.h = (y2_640 - y1_640) * scale_y;
|
||
det.score = score;
|
||
det.has_landmarks = output_lm;
|
||
|
||
// Keypoints
|
||
if (output_lm) {
|
||
for (int p = 0; p < 5; ++p) {
|
||
float kps_x = kps[i * 10 + p * 2 + 0];
|
||
float kps_y = kps[i * 10 + p * 2 + 1];
|
||
float kx_640 = (pt.cx + kps_x) * stride;
|
||
float ky_640 = (pt.cy + kps_y) * stride;
|
||
det.landmarks[p].x = kx_640 * scale_x;
|
||
det.landmarks[p].y = ky_640 * scale_y;
|
||
}
|
||
}
|
||
|
||
out.faces.push_back(det);
|
||
anchor_idx++;
|
||
}
|
||
}
|
||
}
|
||
|
||
} // namespace face_detection
|
||
} // namespace rk3588
|