607 lines
24 KiB
C++
607 lines
24 KiB
C++
#include <algorithm>
|
|
#include <cmath>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "face/face_result.h"
|
|
#include "hw/i_image_processor.h"
|
|
#include "node.h"
|
|
#include "utils/dma_alloc.h"
|
|
#include "utils/logger.h"
|
|
|
|
namespace rk3588 {
|
|
|
|
namespace {
|
|
|
|
enum class ResizeMode {
|
|
Stretch,
|
|
KeepRatio,
|
|
Letterbox,
|
|
};
|
|
|
|
PixelFormat ParseFormat(const std::string& s) {
|
|
if (s == "nv12" || s == "NV12") return PixelFormat::NV12;
|
|
if (s == "yuv420" || s == "YUV420") return PixelFormat::YUV420;
|
|
if (s == "rgb" || s == "RGB") return PixelFormat::RGB;
|
|
if (s == "bgr" || s == "BGR") return PixelFormat::BGR;
|
|
return PixelFormat::UNKNOWN;
|
|
}
|
|
|
|
ResizeMode ParseResizeMode(const std::string& s, bool keep_ratio) {
|
|
if (s == "stretch") return ResizeMode::Stretch;
|
|
if (s == "keep_ratio" || s == "fit") return ResizeMode::KeepRatio;
|
|
if (s == "letterbox") return ResizeMode::Letterbox;
|
|
return keep_ratio ? ResizeMode::KeepRatio : ResizeMode::Stretch;
|
|
}
|
|
|
|
inline bool IsYuvFormat(PixelFormat fmt) {
|
|
return fmt == PixelFormat::NV12 || fmt == PixelFormat::YUV420;
|
|
}
|
|
|
|
inline float ClampFloat(float v, float lo, float hi) {
|
|
return std::max(lo, std::min(v, hi));
|
|
}
|
|
|
|
inline int MakeEvenFloor(int v) {
|
|
if (v <= 0) return 0;
|
|
return v & ~1;
|
|
}
|
|
|
|
size_t CalcImageSize(int w, int h, PixelFormat fmt) {
|
|
if (w <= 0 || h <= 0) return 0;
|
|
switch (fmt) {
|
|
case PixelFormat::NV12:
|
|
case PixelFormat::YUV420:
|
|
return static_cast<size_t>(w) * static_cast<size_t>(h) * 3 / 2;
|
|
case PixelFormat::RGB:
|
|
case PixelFormat::BGR:
|
|
return static_cast<size_t>(w) * static_cast<size_t>(h) * 3;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void SetupPlanes(Frame& f) {
|
|
if (!f.data || f.width <= 0 || f.height <= 0) return;
|
|
if (f.format == PixelFormat::NV12) {
|
|
const int y_stride = f.width;
|
|
const int y_size = y_stride * f.height;
|
|
const int uv_size = y_stride * (f.height / 2);
|
|
f.stride = y_stride;
|
|
f.plane_count = 2;
|
|
f.planes[0] = {f.data, y_stride, y_size, 0};
|
|
f.planes[1] = {f.data + y_size, y_stride, uv_size, y_size};
|
|
} else if (f.format == PixelFormat::YUV420) {
|
|
const int y_stride = f.width;
|
|
const int y_size = y_stride * f.height;
|
|
const int uv_stride = f.width / 2;
|
|
const int u_size = uv_stride * (f.height / 2);
|
|
f.stride = y_stride;
|
|
f.plane_count = 3;
|
|
f.planes[0] = {f.data, y_stride, y_size, 0};
|
|
f.planes[1] = {f.data + y_size, uv_stride, u_size, y_size};
|
|
f.planes[2] = {f.data + y_size + u_size, uv_stride, u_size, y_size + u_size};
|
|
} else {
|
|
const int stride = f.width * 3;
|
|
f.stride = stride;
|
|
f.plane_count = 1;
|
|
f.planes[0] = {f.data, stride, static_cast<int>(f.data_size), 0};
|
|
}
|
|
f.SyncBufferFromFrame();
|
|
}
|
|
|
|
bool InitFrameStorage(Frame& f) {
|
|
const size_t need = CalcImageSize(f.width, f.height, f.format);
|
|
if (need == 0) return false;
|
|
|
|
if (auto dma = DmaAlloc(need); dma && dma->valid()) {
|
|
f.SetDmaFd(dma->fd);
|
|
f.data = dma->data();
|
|
f.data_size = dma->size;
|
|
f.SetOwner(dma);
|
|
SetupPlanes(f);
|
|
return true;
|
|
}
|
|
|
|
auto buf = std::make_shared<std::vector<uint8_t>>(need);
|
|
f.SetDmaFd(-1);
|
|
f.data = buf->data();
|
|
f.data_size = buf->size();
|
|
f.SetOwner(buf);
|
|
SetupPlanes(f);
|
|
return true;
|
|
}
|
|
|
|
void FillBlack(Frame& f) {
|
|
if (!f.data || f.data_size == 0) return;
|
|
if (f.DmaFd() >= 0) {
|
|
DmaSyncStartFd(f.DmaFd());
|
|
}
|
|
if (f.format == PixelFormat::NV12) {
|
|
const int y_size = f.width * f.height;
|
|
std::memset(f.data, 0, static_cast<size_t>(y_size));
|
|
std::memset(f.data + y_size, 128, static_cast<size_t>(f.width * f.height / 2));
|
|
} else if (f.format == PixelFormat::YUV420) {
|
|
const int y_size = f.width * f.height;
|
|
const int u_size = (f.width / 2) * (f.height / 2);
|
|
std::memset(f.data, 0, static_cast<size_t>(y_size));
|
|
std::memset(f.data + y_size, 128, static_cast<size_t>(u_size));
|
|
std::memset(f.data + y_size + u_size, 128, static_cast<size_t>(u_size));
|
|
} else {
|
|
std::memset(f.data, 0, f.data_size);
|
|
}
|
|
if (f.DmaFd() >= 0) {
|
|
DmaSyncEndFd(f.DmaFd());
|
|
}
|
|
}
|
|
|
|
bool BlitLetterbox(const Frame& src, Frame& dst, int pad_x, int pad_y) {
|
|
if (!src.data || !dst.data || src.format != dst.format) return false;
|
|
if (pad_x < 0 || pad_y < 0) return false;
|
|
if (src.width + pad_x > dst.width || src.height + pad_y > dst.height) return false;
|
|
|
|
if (src.DmaFd() >= 0) src.SyncStart();
|
|
if (dst.DmaFd() >= 0) DmaSyncStartFd(dst.DmaFd());
|
|
|
|
if (src.format == PixelFormat::RGB || src.format == PixelFormat::BGR) {
|
|
const int src_stride = src.planes[0].stride > 0 ? src.planes[0].stride : src.width * 3;
|
|
const int dst_stride = dst.planes[0].stride > 0 ? dst.planes[0].stride : dst.width * 3;
|
|
const uint8_t* src_ptr = src.planes[0].data ? src.planes[0].data : src.data;
|
|
uint8_t* dst_ptr = dst.planes[0].data ? dst.planes[0].data : dst.data;
|
|
const size_t row_bytes = static_cast<size_t>(src.width) * 3;
|
|
for (int y = 0; y < src.height; ++y) {
|
|
std::memcpy(dst_ptr + static_cast<size_t>(y + pad_y) * dst_stride + static_cast<size_t>(pad_x) * 3,
|
|
src_ptr + static_cast<size_t>(y) * src_stride,
|
|
row_bytes);
|
|
}
|
|
} else if (src.format == PixelFormat::NV12) {
|
|
const int src_y_stride = src.planes[0].stride > 0 ? src.planes[0].stride : src.width;
|
|
const int src_uv_stride = src.planes[1].stride > 0 ? src.planes[1].stride : src.width;
|
|
const int dst_y_stride = dst.planes[0].stride > 0 ? dst.planes[0].stride : dst.width;
|
|
const int dst_uv_stride = dst.planes[1].stride > 0 ? dst.planes[1].stride : dst.width;
|
|
const uint8_t* src_y = src.planes[0].data ? src.planes[0].data : src.data;
|
|
const uint8_t* src_uv = src.planes[1].data ? src.planes[1].data : (src.data + src.width * src.height);
|
|
uint8_t* dst_y = dst.planes[0].data ? dst.planes[0].data : dst.data;
|
|
uint8_t* dst_uv = dst.planes[1].data ? dst.planes[1].data : (dst.data + dst.width * dst.height);
|
|
|
|
for (int y = 0; y < src.height; ++y) {
|
|
std::memcpy(dst_y + static_cast<size_t>(y + pad_y) * dst_y_stride + pad_x,
|
|
src_y + static_cast<size_t>(y) * src_y_stride,
|
|
static_cast<size_t>(src.width));
|
|
}
|
|
|
|
const int uv_rows = src.height / 2;
|
|
for (int y = 0; y < uv_rows; ++y) {
|
|
std::memcpy(dst_uv + static_cast<size_t>(y + pad_y / 2) * dst_uv_stride + pad_x,
|
|
src_uv + static_cast<size_t>(y) * src_uv_stride,
|
|
static_cast<size_t>(src.width));
|
|
}
|
|
} else if (src.format == PixelFormat::YUV420) {
|
|
const int src_y_stride = src.planes[0].stride > 0 ? src.planes[0].stride : src.width;
|
|
const int src_u_stride = src.planes[1].stride > 0 ? src.planes[1].stride : src.width / 2;
|
|
const int src_v_stride = src.planes[2].stride > 0 ? src.planes[2].stride : src.width / 2;
|
|
const int dst_y_stride = dst.planes[0].stride > 0 ? dst.planes[0].stride : dst.width;
|
|
const int dst_u_stride = dst.planes[1].stride > 0 ? dst.planes[1].stride : dst.width / 2;
|
|
const int dst_v_stride = dst.planes[2].stride > 0 ? dst.planes[2].stride : dst.width / 2;
|
|
const uint8_t* src_y = src.planes[0].data ? src.planes[0].data : src.data;
|
|
const uint8_t* src_u = src.planes[1].data ? src.planes[1].data : (src.data + src.width * src.height);
|
|
const uint8_t* src_v = src.planes[2].data ? src.planes[2].data : (src_u + (src.width / 2) * (src.height / 2));
|
|
uint8_t* dst_y = dst.planes[0].data ? dst.planes[0].data : dst.data;
|
|
uint8_t* dst_u = dst.planes[1].data ? dst.planes[1].data : (dst.data + dst.width * dst.height);
|
|
uint8_t* dst_v = dst.planes[2].data ? dst.planes[2].data : (dst_u + (dst.width / 2) * (dst.height / 2));
|
|
|
|
for (int y = 0; y < src.height; ++y) {
|
|
std::memcpy(dst_y + static_cast<size_t>(y + pad_y) * dst_y_stride + pad_x,
|
|
src_y + static_cast<size_t>(y) * src_y_stride,
|
|
static_cast<size_t>(src.width));
|
|
}
|
|
|
|
const int uv_rows = src.height / 2;
|
|
const int uv_pad_x = pad_x / 2;
|
|
const int uv_pad_y = pad_y / 2;
|
|
const int uv_cols = src.width / 2;
|
|
for (int y = 0; y < uv_rows; ++y) {
|
|
std::memcpy(dst_u + static_cast<size_t>(y + uv_pad_y) * dst_u_stride + uv_pad_x,
|
|
src_u + static_cast<size_t>(y) * src_u_stride,
|
|
static_cast<size_t>(uv_cols));
|
|
std::memcpy(dst_v + static_cast<size_t>(y + uv_pad_y) * dst_v_stride + uv_pad_x,
|
|
src_v + static_cast<size_t>(y) * src_v_stride,
|
|
static_cast<size_t>(uv_cols));
|
|
}
|
|
} else {
|
|
if (src.DmaFd() >= 0) src.SyncEnd();
|
|
if (dst.DmaFd() >= 0) DmaSyncEndFd(dst.DmaFd());
|
|
return false;
|
|
}
|
|
|
|
if (dst.DmaFd() >= 0) DmaSyncEndFd(dst.DmaFd());
|
|
if (src.DmaFd() >= 0) src.SyncEnd();
|
|
return true;
|
|
}
|
|
|
|
void TransformRect(Rect& r, float sx, float sy, float tx, float ty, int out_w, int out_h) {
|
|
if (out_w <= 0 || out_h <= 0) {
|
|
r = Rect{};
|
|
return;
|
|
}
|
|
const float fw = static_cast<float>(out_w);
|
|
const float fh = static_cast<float>(out_h);
|
|
|
|
const float x = ClampFloat(r.x * sx + tx, 0.0f, fw);
|
|
const float y = ClampFloat(r.y * sy + ty, 0.0f, fh);
|
|
float w = std::max(0.0f, r.w * sx);
|
|
float h = std::max(0.0f, r.h * sy);
|
|
|
|
if (x + w > fw) w = std::max(0.0f, fw - x);
|
|
if (y + h > fh) h = std::max(0.0f, fh - y);
|
|
|
|
r.x = x;
|
|
r.y = y;
|
|
r.w = w;
|
|
r.h = h;
|
|
}
|
|
|
|
void TransformPoint(Point2f& p, float sx, float sy, float tx, float ty, int out_w, int out_h) {
|
|
if (out_w <= 0 || out_h <= 0) {
|
|
p = Point2f{};
|
|
return;
|
|
}
|
|
p.x = ClampFloat(p.x * sx + tx, 0.0f, static_cast<float>(out_w));
|
|
p.y = ClampFloat(p.y * sy + ty, 0.0f, static_cast<float>(out_h));
|
|
}
|
|
|
|
} // namespace
|
|
|
|
class PreprocessNode : public INode {
|
|
public:
|
|
std::string Id() const override { return id_; }
|
|
std::string Type() const override { return "preprocess"; }
|
|
|
|
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
|
|
id_ = config.ValueOr<std::string>("id", "preprocess");
|
|
dst_w_ = config.ValueOr<int>("dst_w", 640);
|
|
dst_h_ = config.ValueOr<int>("dst_h", 640);
|
|
keep_ratio_ = config.ValueOr<bool>("keep_ratio", false);
|
|
resize_mode_ = ParseResizeMode(config.ValueOr<std::string>("resize_mode", ""), keep_ratio_);
|
|
|
|
std::string fmt_str = config.ValueOr<std::string>("dst_format", "");
|
|
if (!fmt_str.empty()) {
|
|
dst_fmt_ = ParseFormat(fmt_str);
|
|
}
|
|
|
|
const bool requested_use_rga = config.ValueOr<bool>("use_rga", true);
|
|
use_rga_ = requested_use_rga;
|
|
|
|
if (const SimpleJson* dbg = config.Find("debug"); dbg && dbg->IsObject()) {
|
|
stats_log_ = dbg->ValueOr<bool>("stats", stats_log_);
|
|
stats_interval_ = std::max<uint64_t>(
|
|
1, static_cast<uint64_t>(dbg->ValueOr<int>("stats_interval", static_cast<int>(stats_interval_))));
|
|
}
|
|
|
|
input_queue_ = ctx.input_queue;
|
|
if (!input_queue_) {
|
|
LogError("[preprocess] no input queue for node " + id_);
|
|
return false;
|
|
}
|
|
if (ctx.output_queues.empty()) {
|
|
LogError("[preprocess] no output queue for node " + id_);
|
|
return false;
|
|
}
|
|
output_queues_ = ctx.output_queues;
|
|
|
|
#if !defined(RK3588_ENABLE_RGA)
|
|
if (requested_use_rga) {
|
|
LogError("[preprocess] use_rga=true but RGA not enabled at build time");
|
|
return false;
|
|
}
|
|
use_rga_ = false;
|
|
#endif
|
|
#if !defined(RK3588_ENABLE_FFMPEG)
|
|
if (!use_rga_) {
|
|
LogError("[preprocess] neither RGA nor FFmpeg enabled");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
image_processor_ = ctx.image_processor;
|
|
if (!image_processor_) {
|
|
LogError("[preprocess] no image processor for node " + id_);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Start() override {
|
|
std::string mode = "stretch";
|
|
if (resize_mode_ == ResizeMode::KeepRatio) mode = "keep_ratio";
|
|
if (resize_mode_ == ResizeMode::Letterbox) mode = "letterbox";
|
|
LogInfo("[preprocess] start id=" + id_ + " dst=" + std::to_string(dst_w_) + "x" +
|
|
std::to_string(dst_h_) + " mode=" + mode + (use_rga_ ? " (rga)" : " (swscale)"));
|
|
return true;
|
|
}
|
|
|
|
void Stop() override {}
|
|
|
|
NodeStatus Process(FramePtr frame) override {
|
|
if (!frame) return NodeStatus::DROP;
|
|
if (!image_processor_) return NodeStatus::ERROR;
|
|
|
|
PixelFormat out_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format;
|
|
int out_w = dst_w_;
|
|
int out_h = dst_h_;
|
|
if (out_w <= 0) out_w = frame->width;
|
|
if (out_h <= 0) out_h = frame->height;
|
|
|
|
FrameTransformMeta tx{};
|
|
tx.valid = true;
|
|
tx.src_w = frame->width;
|
|
tx.src_h = frame->height;
|
|
|
|
Frame out;
|
|
if (resize_mode_ == ResizeMode::Letterbox && dst_w_ > 0 && dst_h_ > 0 &&
|
|
frame->width > 0 && frame->height > 0) {
|
|
Status lb = BuildLetterbox(*frame, out_fmt, out_w, out_h, out, tx);
|
|
if (lb.Failed()) {
|
|
LogError("[preprocess] letterbox failed: " + lb.ErrMessage());
|
|
return NodeStatus::ERROR;
|
|
}
|
|
} else {
|
|
if (resize_mode_ == ResizeMode::KeepRatio && dst_w_ > 0 && dst_h_ > 0 &&
|
|
frame->width > 0 && frame->height > 0) {
|
|
float scale = std::min(static_cast<float>(dst_w_) / frame->width,
|
|
static_cast<float>(dst_h_) / frame->height);
|
|
out_w = static_cast<int>(std::round(frame->width * scale));
|
|
out_h = static_cast<int>(std::round(frame->height * scale));
|
|
if (IsYuvFormat(out_fmt)) {
|
|
out_w = std::max(2, MakeEvenFloor(out_w));
|
|
out_h = std::max(2, MakeEvenFloor(out_h));
|
|
}
|
|
if (out_w <= 0) out_w = frame->width;
|
|
if (out_h <= 0) out_h = frame->height;
|
|
}
|
|
|
|
const bool need_resize = (frame->width != out_w || frame->height != out_h);
|
|
const bool need_cvt = (frame->format != out_fmt);
|
|
|
|
tx.letterbox = false;
|
|
tx.dst_w = out_w;
|
|
tx.dst_h = out_h;
|
|
tx.scale_x = frame->width > 0 ? static_cast<float>(out_w) / frame->width : 1.0f;
|
|
tx.scale_y = frame->height > 0 ? static_cast<float>(out_h) / frame->height : 1.0f;
|
|
tx.pad_x = 0.0f;
|
|
tx.pad_y = 0.0f;
|
|
|
|
if (!need_resize && !need_cvt) {
|
|
auto t = std::make_shared<FrameTransformMeta>(tx);
|
|
frame->transform_meta = t;
|
|
ProcessPassthrough(frame);
|
|
return NodeStatus::OK;
|
|
}
|
|
|
|
if (need_resize) {
|
|
WarnMetaResizeOnce(frame, out_w, out_h);
|
|
}
|
|
|
|
out.width = out_w;
|
|
out.height = out_h;
|
|
out.format = out_fmt;
|
|
Status st = image_processor_->Resize(*frame, out);
|
|
if (st.Failed()) {
|
|
if (!use_rga_ && st.ErrMessage().find("unsupported format") != std::string::npos) {
|
|
auto t = std::make_shared<FrameTransformMeta>(tx);
|
|
frame->transform_meta = t;
|
|
ProcessPassthrough(frame);
|
|
return NodeStatus::OK;
|
|
}
|
|
LogError("[preprocess] " + st.ErrMessage());
|
|
return NodeStatus::ERROR;
|
|
}
|
|
}
|
|
|
|
auto out_frame = std::make_shared<Frame>(out);
|
|
out_frame->pts = frame->pts;
|
|
out_frame->frame_id = frame->frame_id;
|
|
out_frame->transform_meta = std::make_shared<FrameTransformMeta>(tx);
|
|
ScaleMeta(*frame, *out_frame, tx);
|
|
out_frame->user_meta = frame->user_meta;
|
|
|
|
PushToDownstream(out_frame);
|
|
++processed_;
|
|
|
|
if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) {
|
|
LogInfo("[preprocess] " + std::string(use_rga_ ? "rga" : "swscale") +
|
|
" frame=" + std::to_string(out_frame->frame_id) +
|
|
" " + std::to_string(frame->width) + "x" + std::to_string(frame->height) +
|
|
" -> " + std::to_string(out_frame->width) + "x" + std::to_string(out_frame->height) +
|
|
" id=" + id_);
|
|
}
|
|
|
|
return NodeStatus::OK;
|
|
}
|
|
|
|
private:
|
|
Status BuildLetterbox(const Frame& input, PixelFormat out_fmt, int dst_w, int dst_h, Frame& out,
|
|
FrameTransformMeta& tx) const {
|
|
Frame src = input;
|
|
if (input.format != out_fmt) {
|
|
src.width = input.width;
|
|
src.height = input.height;
|
|
src.format = out_fmt;
|
|
Status cvt = image_processor_->CvtColor(input, src, out_fmt);
|
|
if (cvt.Failed()) return cvt;
|
|
}
|
|
|
|
if (src.width <= 0 || src.height <= 0 || dst_w <= 0 || dst_h <= 0) {
|
|
return FailStatus("invalid letterbox size");
|
|
}
|
|
|
|
float scale = std::min(static_cast<float>(dst_w) / static_cast<float>(src.width),
|
|
static_cast<float>(dst_h) / static_cast<float>(src.height));
|
|
int inner_w = std::max(1, static_cast<int>(std::round(src.width * scale)));
|
|
int inner_h = std::max(1, static_cast<int>(std::round(src.height * scale)));
|
|
if (IsYuvFormat(out_fmt)) {
|
|
inner_w = std::max(2, MakeEvenFloor(inner_w));
|
|
inner_h = std::max(2, MakeEvenFloor(inner_h));
|
|
if (((dst_w - inner_w) & 1) != 0) inner_w = std::max(2, inner_w - 2);
|
|
if (((dst_h - inner_h) & 1) != 0) inner_h = std::max(2, inner_h - 2);
|
|
}
|
|
if (inner_w <= 0 || inner_h <= 0 || inner_w > dst_w || inner_h > dst_h) {
|
|
return FailStatus("invalid inner letterbox size");
|
|
}
|
|
|
|
Frame resized;
|
|
resized.width = inner_w;
|
|
resized.height = inner_h;
|
|
resized.format = out_fmt;
|
|
Status st = image_processor_->Resize(src, resized);
|
|
if (st.Failed()) return st;
|
|
|
|
out.width = dst_w;
|
|
out.height = dst_h;
|
|
out.format = out_fmt;
|
|
if (!InitFrameStorage(out)) {
|
|
return FailStatus("alloc letterbox output failed");
|
|
}
|
|
FillBlack(out);
|
|
|
|
const int pad_x = (dst_w - inner_w) / 2;
|
|
const int pad_y = (dst_h - inner_h) / 2;
|
|
if (!BlitLetterbox(resized, out, pad_x, pad_y)) {
|
|
return FailStatus("blit letterbox failed");
|
|
}
|
|
|
|
tx.letterbox = true;
|
|
tx.src_w = input.width;
|
|
tx.src_h = input.height;
|
|
tx.dst_w = dst_w;
|
|
tx.dst_h = dst_h;
|
|
tx.scale_x = static_cast<float>(inner_w) / static_cast<float>(input.width);
|
|
tx.scale_y = static_cast<float>(inner_h) / static_cast<float>(input.height);
|
|
tx.pad_x = static_cast<float>(pad_x);
|
|
tx.pad_y = static_cast<float>(pad_y);
|
|
return OkStatus();
|
|
}
|
|
|
|
void ScaleMeta(const Frame& in_frame, Frame& out_frame, const FrameTransformMeta& tx) const {
|
|
if (in_frame.det) {
|
|
auto det = std::make_shared<DetectionResult>(*in_frame.det);
|
|
const int src_meta_w = det->img_w > 0 ? det->img_w : in_frame.width;
|
|
const int src_meta_h = det->img_h > 0 ? det->img_h : in_frame.height;
|
|
const float to_frame_x = src_meta_w > 0 ? static_cast<float>(in_frame.width) / src_meta_w : 1.0f;
|
|
const float to_frame_y = src_meta_h > 0 ? static_cast<float>(in_frame.height) / src_meta_h : 1.0f;
|
|
for (auto& it : det->items) {
|
|
TransformRect(it.bbox,
|
|
tx.scale_x * to_frame_x,
|
|
tx.scale_y * to_frame_y,
|
|
tx.pad_x, tx.pad_y,
|
|
out_frame.width, out_frame.height);
|
|
}
|
|
det->img_w = out_frame.width;
|
|
det->img_h = out_frame.height;
|
|
out_frame.det = std::move(det);
|
|
}
|
|
|
|
if (in_frame.face_det) {
|
|
auto face_det = std::make_shared<FaceDetResult>(*in_frame.face_det);
|
|
const int src_meta_w = face_det->img_w > 0 ? face_det->img_w : in_frame.width;
|
|
const int src_meta_h = face_det->img_h > 0 ? face_det->img_h : in_frame.height;
|
|
const float to_frame_x = src_meta_w > 0 ? static_cast<float>(in_frame.width) / src_meta_w : 1.0f;
|
|
const float to_frame_y = src_meta_h > 0 ? static_cast<float>(in_frame.height) / src_meta_h : 1.0f;
|
|
for (auto& it : face_det->faces) {
|
|
TransformRect(it.bbox,
|
|
tx.scale_x * to_frame_x,
|
|
tx.scale_y * to_frame_y,
|
|
tx.pad_x, tx.pad_y,
|
|
out_frame.width, out_frame.height);
|
|
if (it.has_landmarks) {
|
|
for (auto& lm : it.landmarks) {
|
|
TransformPoint(lm,
|
|
tx.scale_x * to_frame_x,
|
|
tx.scale_y * to_frame_y,
|
|
tx.pad_x, tx.pad_y,
|
|
out_frame.width, out_frame.height);
|
|
}
|
|
}
|
|
}
|
|
face_det->img_w = out_frame.width;
|
|
face_det->img_h = out_frame.height;
|
|
out_frame.face_det = std::move(face_det);
|
|
}
|
|
|
|
if (in_frame.face_recog) {
|
|
auto face_recog = std::make_shared<FaceRecogResult>(*in_frame.face_recog);
|
|
const int src_meta_w = face_recog->img_w > 0 ? face_recog->img_w : in_frame.width;
|
|
const int src_meta_h = face_recog->img_h > 0 ? face_recog->img_h : in_frame.height;
|
|
const float to_frame_x = src_meta_w > 0 ? static_cast<float>(in_frame.width) / src_meta_w : 1.0f;
|
|
const float to_frame_y = src_meta_h > 0 ? static_cast<float>(in_frame.height) / src_meta_h : 1.0f;
|
|
for (auto& it : face_recog->items) {
|
|
TransformRect(it.bbox,
|
|
tx.scale_x * to_frame_x,
|
|
tx.scale_y * to_frame_y,
|
|
tx.pad_x, tx.pad_y,
|
|
out_frame.width, out_frame.height);
|
|
if (it.has_landmarks) {
|
|
for (auto& lm : it.landmarks) {
|
|
TransformPoint(lm,
|
|
tx.scale_x * to_frame_x,
|
|
tx.scale_y * to_frame_y,
|
|
tx.pad_x, tx.pad_y,
|
|
out_frame.width, out_frame.height);
|
|
}
|
|
}
|
|
}
|
|
face_recog->img_w = out_frame.width;
|
|
face_recog->img_h = out_frame.height;
|
|
out_frame.face_recog = std::move(face_recog);
|
|
}
|
|
}
|
|
|
|
void PushToDownstream(FramePtr frame) {
|
|
for (auto& q : output_queues_) {
|
|
q->Push(frame);
|
|
}
|
|
}
|
|
|
|
void WarnMetaResizeOnce(const FramePtr& frame, int out_w, int out_h) {
|
|
if (warned_meta_resize_) return;
|
|
if (!frame) return;
|
|
if (frame->width == out_w && frame->height == out_h) return;
|
|
if (!frame->det && !frame->face_det && !frame->face_recog) return;
|
|
warned_meta_resize_ = true;
|
|
LogInfo("[preprocess] resized frame and scaled det/face meta to destination resolution (id=" + id_ + ")");
|
|
}
|
|
|
|
void ProcessPassthrough(FramePtr frame) {
|
|
PushToDownstream(frame);
|
|
++processed_;
|
|
if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) {
|
|
LogInfo("[preprocess] passthrough frame=" + std::to_string(frame->frame_id) + " id=" + id_);
|
|
}
|
|
}
|
|
|
|
std::string id_;
|
|
int dst_w_ = 640;
|
|
int dst_h_ = 640;
|
|
bool keep_ratio_ = false;
|
|
ResizeMode resize_mode_ = ResizeMode::Stretch;
|
|
PixelFormat dst_fmt_ = PixelFormat::UNKNOWN;
|
|
bool use_rga_ = true;
|
|
|
|
bool stats_log_ = false;
|
|
uint64_t stats_interval_ = 100;
|
|
bool warned_meta_resize_ = false;
|
|
|
|
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
|
|
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
|
|
std::shared_ptr<IImageProcessor> image_processor_;
|
|
uint64_t processed_ = 0;
|
|
};
|
|
|
|
REGISTER_NODE(PreprocessNode, "preprocess");
|
|
|
|
} // namespace rk3588
|