OrangePi3588Media/plugins/preprocess/preprocess_node.cpp

736 lines
30 KiB
C++

#include <algorithm>
#include <cmath>
#include <cstdint>
#include <cstring>
#include <memory>
#include <string>
#include <vector>
#include "face/face_result.h"
#include "hw/i_image_processor.h"
#include "node.h"
#include "utils/dma_alloc.h"
#include "utils/logger.h"
namespace rk3588 {
namespace {
enum class ResizeMode {
Stretch,
KeepRatio,
Letterbox,
};
PixelFormat ParseFormat(const std::string& s) {
if (s == "nv12" || s == "NV12") return PixelFormat::NV12;
if (s == "yuv420" || s == "YUV420") return PixelFormat::YUV420;
if (s == "rgb" || s == "RGB") return PixelFormat::RGB;
if (s == "bgr" || s == "BGR") return PixelFormat::BGR;
return PixelFormat::UNKNOWN;
}
ResizeMode ParseResizeMode(const std::string& s, bool keep_ratio) {
if (s == "stretch") return ResizeMode::Stretch;
if (s == "keep_ratio" || s == "fit") return ResizeMode::KeepRatio;
if (s == "letterbox") return ResizeMode::Letterbox;
return keep_ratio ? ResizeMode::KeepRatio : ResizeMode::Stretch;
}
inline bool IsYuvFormat(PixelFormat fmt) {
return fmt == PixelFormat::NV12 || fmt == PixelFormat::YUV420;
}
inline float ClampFloat(float v, float lo, float hi) {
return std::max(lo, std::min(v, hi));
}
inline int MakeEvenFloor(int v) {
if (v <= 0) return 0;
return v & ~1;
}
size_t CalcImageSize(int w, int h, PixelFormat fmt) {
if (w <= 0 || h <= 0) return 0;
switch (fmt) {
case PixelFormat::NV12:
case PixelFormat::YUV420:
return static_cast<size_t>(w) * static_cast<size_t>(h) * 3 / 2;
case PixelFormat::RGB:
case PixelFormat::BGR:
return static_cast<size_t>(w) * static_cast<size_t>(h) * 3;
default:
return 0;
}
}
void SetupPlanes(Frame& f) {
if (!f.data || f.width <= 0 || f.height <= 0) return;
if (f.format == PixelFormat::NV12) {
const int y_stride = f.width;
const int y_size = y_stride * f.height;
const int uv_size = y_stride * (f.height / 2);
f.stride = y_stride;
f.plane_count = 2;
f.planes[0] = {f.data, y_stride, y_size, 0};
f.planes[1] = {f.data + y_size, y_stride, uv_size, y_size};
} else if (f.format == PixelFormat::YUV420) {
const int y_stride = f.width;
const int y_size = y_stride * f.height;
const int uv_stride = f.width / 2;
const int u_size = uv_stride * (f.height / 2);
f.stride = y_stride;
f.plane_count = 3;
f.planes[0] = {f.data, y_stride, y_size, 0};
f.planes[1] = {f.data + y_size, uv_stride, u_size, y_size};
f.planes[2] = {f.data + y_size + u_size, uv_stride, u_size, y_size + u_size};
} else {
const int stride = f.width * 3;
f.stride = stride;
f.plane_count = 1;
f.planes[0] = {f.data, stride, static_cast<int>(f.data_size), 0};
}
f.SyncBufferFromFrame();
}
bool InitFrameStorage(Frame& f) {
const size_t need = CalcImageSize(f.width, f.height, f.format);
if (need == 0) return false;
if (auto dma = DmaAlloc(need); dma && dma->valid()) {
f.SetDmaFd(dma->fd);
f.data = dma->data();
f.data_size = dma->size;
f.SetOwner(dma);
SetupPlanes(f);
return true;
}
auto buf = std::make_shared<std::vector<uint8_t>>(need);
f.SetDmaFd(-1);
f.data = buf->data();
f.data_size = buf->size();
f.SetOwner(buf);
SetupPlanes(f);
return true;
}
void FillBlack(Frame& f) {
if (!f.data || f.data_size == 0) return;
if (f.DmaFd() >= 0) {
DmaSyncStartFd(f.DmaFd());
}
if (f.format == PixelFormat::NV12) {
const int y_size = f.width * f.height;
std::memset(f.data, 0, static_cast<size_t>(y_size));
std::memset(f.data + y_size, 128, static_cast<size_t>(f.width * f.height / 2));
} else if (f.format == PixelFormat::YUV420) {
const int y_size = f.width * f.height;
const int u_size = (f.width / 2) * (f.height / 2);
std::memset(f.data, 0, static_cast<size_t>(y_size));
std::memset(f.data + y_size, 128, static_cast<size_t>(u_size));
std::memset(f.data + y_size + u_size, 128, static_cast<size_t>(u_size));
} else {
std::memset(f.data, 0, f.data_size);
}
if (f.DmaFd() >= 0) {
DmaSyncEndFd(f.DmaFd());
}
}
bool BlitLetterbox(const Frame& src, Frame& dst, int pad_x, int pad_y) {
if (!src.data || !dst.data || src.format != dst.format) return false;
if (pad_x < 0 || pad_y < 0) return false;
if (src.width + pad_x > dst.width || src.height + pad_y > dst.height) return false;
if (src.DmaFd() >= 0) src.SyncStart();
if (dst.DmaFd() >= 0) DmaSyncStartFd(dst.DmaFd());
if (src.format == PixelFormat::RGB || src.format == PixelFormat::BGR) {
const int src_stride = src.planes[0].stride > 0 ? src.planes[0].stride : src.width * 3;
const int dst_stride = dst.planes[0].stride > 0 ? dst.planes[0].stride : dst.width * 3;
const uint8_t* src_ptr = src.planes[0].data ? src.planes[0].data : src.data;
uint8_t* dst_ptr = dst.planes[0].data ? dst.planes[0].data : dst.data;
const size_t row_bytes = static_cast<size_t>(src.width) * 3;
for (int y = 0; y < src.height; ++y) {
std::memcpy(dst_ptr + static_cast<size_t>(y + pad_y) * dst_stride + static_cast<size_t>(pad_x) * 3,
src_ptr + static_cast<size_t>(y) * src_stride,
row_bytes);
}
} else if (src.format == PixelFormat::NV12) {
const int src_y_stride = src.planes[0].stride > 0 ? src.planes[0].stride : src.width;
const int src_uv_stride = src.planes[1].stride > 0 ? src.planes[1].stride : src.width;
const int dst_y_stride = dst.planes[0].stride > 0 ? dst.planes[0].stride : dst.width;
const int dst_uv_stride = dst.planes[1].stride > 0 ? dst.planes[1].stride : dst.width;
const uint8_t* src_y = src.planes[0].data ? src.planes[0].data : src.data;
const uint8_t* src_uv = src.planes[1].data ? src.planes[1].data : (src.data + src.width * src.height);
uint8_t* dst_y = dst.planes[0].data ? dst.planes[0].data : dst.data;
uint8_t* dst_uv = dst.planes[1].data ? dst.planes[1].data : (dst.data + dst.width * dst.height);
for (int y = 0; y < src.height; ++y) {
std::memcpy(dst_y + static_cast<size_t>(y + pad_y) * dst_y_stride + pad_x,
src_y + static_cast<size_t>(y) * src_y_stride,
static_cast<size_t>(src.width));
}
const int uv_rows = src.height / 2;
for (int y = 0; y < uv_rows; ++y) {
std::memcpy(dst_uv + static_cast<size_t>(y + pad_y / 2) * dst_uv_stride + pad_x,
src_uv + static_cast<size_t>(y) * src_uv_stride,
static_cast<size_t>(src.width));
}
} else if (src.format == PixelFormat::YUV420) {
const int src_y_stride = src.planes[0].stride > 0 ? src.planes[0].stride : src.width;
const int src_u_stride = src.planes[1].stride > 0 ? src.planes[1].stride : src.width / 2;
const int src_v_stride = src.planes[2].stride > 0 ? src.planes[2].stride : src.width / 2;
const int dst_y_stride = dst.planes[0].stride > 0 ? dst.planes[0].stride : dst.width;
const int dst_u_stride = dst.planes[1].stride > 0 ? dst.planes[1].stride : dst.width / 2;
const int dst_v_stride = dst.planes[2].stride > 0 ? dst.planes[2].stride : dst.width / 2;
const uint8_t* src_y = src.planes[0].data ? src.planes[0].data : src.data;
const uint8_t* src_u = src.planes[1].data ? src.planes[1].data : (src.data + src.width * src.height);
const uint8_t* src_v = src.planes[2].data ? src.planes[2].data : (src_u + (src.width / 2) * (src.height / 2));
uint8_t* dst_y = dst.planes[0].data ? dst.planes[0].data : dst.data;
uint8_t* dst_u = dst.planes[1].data ? dst.planes[1].data : (dst.data + dst.width * dst.height);
uint8_t* dst_v = dst.planes[2].data ? dst.planes[2].data : (dst_u + (dst.width / 2) * (dst.height / 2));
for (int y = 0; y < src.height; ++y) {
std::memcpy(dst_y + static_cast<size_t>(y + pad_y) * dst_y_stride + pad_x,
src_y + static_cast<size_t>(y) * src_y_stride,
static_cast<size_t>(src.width));
}
const int uv_rows = src.height / 2;
const int uv_pad_x = pad_x / 2;
const int uv_pad_y = pad_y / 2;
const int uv_cols = src.width / 2;
for (int y = 0; y < uv_rows; ++y) {
std::memcpy(dst_u + static_cast<size_t>(y + uv_pad_y) * dst_u_stride + uv_pad_x,
src_u + static_cast<size_t>(y) * src_u_stride,
static_cast<size_t>(uv_cols));
std::memcpy(dst_v + static_cast<size_t>(y + uv_pad_y) * dst_v_stride + uv_pad_x,
src_v + static_cast<size_t>(y) * src_v_stride,
static_cast<size_t>(uv_cols));
}
} else {
if (src.DmaFd() >= 0) src.SyncEnd();
if (dst.DmaFd() >= 0) DmaSyncEndFd(dst.DmaFd());
return false;
}
if (dst.DmaFd() >= 0) DmaSyncEndFd(dst.DmaFd());
if (src.DmaFd() >= 0) src.SyncEnd();
return true;
}
void TransformRect(Rect& r, float sx, float sy, float tx, float ty, int out_w, int out_h) {
if (out_w <= 0 || out_h <= 0) {
r = Rect{};
return;
}
const float fw = static_cast<float>(out_w);
const float fh = static_cast<float>(out_h);
const float x = ClampFloat(r.x * sx + tx, 0.0f, fw);
const float y = ClampFloat(r.y * sy + ty, 0.0f, fh);
float w = std::max(0.0f, r.w * sx);
float h = std::max(0.0f, r.h * sy);
if (x + w > fw) w = std::max(0.0f, fw - x);
if (y + h > fh) h = std::max(0.0f, fh - y);
r.x = x;
r.y = y;
r.w = w;
r.h = h;
}
void TransformPoint(Point2f& p, float sx, float sy, float tx, float ty, int out_w, int out_h) {
if (out_w <= 0 || out_h <= 0) {
p = Point2f{};
return;
}
p.x = ClampFloat(p.x * sx + tx, 0.0f, static_cast<float>(out_w));
p.y = ClampFloat(p.y * sy + ty, 0.0f, static_cast<float>(out_h));
}
} // namespace
class PreprocessNode : public INode {
public:
std::string Id() const override { return id_; }
std::string Type() const override { return "preprocess"; }
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
id_ = config.ValueOr<std::string>("id", "preprocess");
dst_w_ = config.ValueOr<int>("dst_w", 640);
dst_h_ = config.ValueOr<int>("dst_h", 640);
keep_ratio_ = config.ValueOr<bool>("keep_ratio", false);
resize_mode_ = ParseResizeMode(config.ValueOr<std::string>("resize_mode", ""), keep_ratio_);
std::string fmt_str = config.ValueOr<std::string>("dst_format", "");
if (!fmt_str.empty()) {
dst_fmt_ = ParseFormat(fmt_str);
}
const bool requested_use_rga = config.ValueOr<bool>("use_rga", true);
use_rga_ = requested_use_rga;
if (const SimpleJson* dbg = config.Find("debug"); dbg && dbg->IsObject()) {
stats_log_ = dbg->ValueOr<bool>("stats", stats_log_);
stats_interval_ = std::max<uint64_t>(
1, static_cast<uint64_t>(dbg->ValueOr<int>("stats_interval", static_cast<int>(stats_interval_))));
}
input_queue_ = ctx.input_queue;
if (!input_queue_) {
LogError("[preprocess] no input queue for node " + id_);
return false;
}
if (ctx.output_queues.empty()) {
LogError("[preprocess] no output queue for node " + id_);
return false;
}
output_queues_ = ctx.output_queues;
#if !defined(RK3588_ENABLE_RGA)
if (requested_use_rga) {
LogError("[preprocess] use_rga=true but RGA not enabled at build time");
return false;
}
use_rga_ = false;
#endif
#if !defined(RK3588_ENABLE_FFMPEG)
if (!use_rga_) {
LogError("[preprocess] neither RGA nor FFmpeg enabled");
return false;
}
#endif
image_processor_ = ctx.image_processor;
if (!image_processor_) {
LogError("[preprocess] no image processor for node " + id_);
return false;
}
return true;
}
bool Start() override {
std::string mode = "stretch";
if (resize_mode_ == ResizeMode::KeepRatio) mode = "keep_ratio";
if (resize_mode_ == ResizeMode::Letterbox) mode = "letterbox";
LogInfo("[preprocess] start id=" + id_ + " dst=" + std::to_string(dst_w_) + "x" +
std::to_string(dst_h_) + " mode=" + mode + (use_rga_ ? " (rga)" : " (swscale)"));
return true;
}
void Stop() override {}
NodeStatus Process(FramePtr frame) override {
static int process_count = 0;
if (id_.find("post") != std::string::npos && process_count++ % 30 == 0) {
}
if (!frame) return NodeStatus::DROP;
if (!image_processor_) return NodeStatus::ERROR;
// Check if input frame has letterbox transform metadata - if so, crop to valid region
if (frame->transform_meta && frame->transform_meta->valid && frame->transform_meta->letterbox) {
Status crop_st = ProcessLetterboxInput(frame);
if (crop_st.Ok()) return NodeStatus::OK;
}
PixelFormat out_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format;
int out_w = dst_w_;
int out_h = dst_h_;
if (out_w <= 0) out_w = frame->width;
if (out_h <= 0) out_h = frame->height;
FrameTransformMeta tx{};
tx.valid = true;
tx.src_w = frame->width;
tx.src_h = frame->height;
Frame out;
if (resize_mode_ == ResizeMode::Letterbox && dst_w_ > 0 && dst_h_ > 0 &&
frame->width > 0 && frame->height > 0) {
Status lb = BuildLetterbox(*frame, out_fmt, out_w, out_h, out, tx);
if (lb.Failed()) {
LogError("[preprocess] letterbox failed: " + lb.ErrMessage());
return NodeStatus::ERROR;
}
} else {
if (resize_mode_ == ResizeMode::KeepRatio && dst_w_ > 0 && dst_h_ > 0 &&
frame->width > 0 && frame->height > 0) {
float scale = std::min(static_cast<float>(dst_w_) / frame->width,
static_cast<float>(dst_h_) / frame->height);
out_w = static_cast<int>(std::round(frame->width * scale));
out_h = static_cast<int>(std::round(frame->height * scale));
if (IsYuvFormat(out_fmt)) {
out_w = std::max(2, MakeEvenFloor(out_w));
out_h = std::max(2, MakeEvenFloor(out_h));
}
if (out_w <= 0) out_w = frame->width;
if (out_h <= 0) out_h = frame->height;
}
const bool need_resize = (frame->width != out_w || frame->height != out_h);
const bool need_cvt = (frame->format != out_fmt);
tx.letterbox = false;
tx.dst_w = out_w;
tx.dst_h = out_h;
tx.scale_x = frame->width > 0 ? static_cast<float>(out_w) / frame->width : 1.0f;
tx.scale_y = frame->height > 0 ? static_cast<float>(out_h) / frame->height : 1.0f;
tx.pad_x = 0.0f;
tx.pad_y = 0.0f;
if (!need_resize && !need_cvt) {
auto t = std::make_shared<FrameTransformMeta>(tx);
frame->transform_meta = t;
ProcessPassthrough(frame);
return NodeStatus::OK;
}
if (need_resize) {
WarnMetaResizeOnce(frame, out_w, out_h);
}
out.width = out_w;
out.height = out_h;
out.format = out_fmt;
Status st = image_processor_->Resize(*frame, out);
if (st.Failed()) {
if (!use_rga_ && st.ErrMessage().find("unsupported format") != std::string::npos) {
auto t = std::make_shared<FrameTransformMeta>(tx);
frame->transform_meta = t;
ProcessPassthrough(frame);
return NodeStatus::OK;
}
LogError("[preprocess] " + st.ErrMessage());
return NodeStatus::ERROR;
}
}
auto out_frame = std::make_shared<Frame>(out);
out_frame->pts = frame->pts;
out_frame->frame_id = frame->frame_id;
out_frame->transform_meta = std::make_shared<FrameTransformMeta>(tx);
ScaleMeta(*frame, *out_frame, tx);
out_frame->user_meta = frame->user_meta;
PushToDownstream(out_frame);
++processed_;
if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) {
LogInfo("[preprocess] " + std::string(use_rga_ ? "rga" : "swscale") +
" frame=" + std::to_string(out_frame->frame_id) +
" " + std::to_string(frame->width) + "x" + std::to_string(frame->height) +
" -> " + std::to_string(out_frame->width) + "x" + std::to_string(out_frame->height) +
" id=" + id_);
}
return NodeStatus::OK;
}
private:
Status BuildLetterbox(const Frame& input, PixelFormat out_fmt, int dst_w, int dst_h, Frame& out,
FrameTransformMeta& tx) const {
Frame src = input;
if (input.format != out_fmt) {
src.width = input.width;
src.height = input.height;
src.format = out_fmt;
Status cvt = image_processor_->CvtColor(input, src, out_fmt);
if (cvt.Failed()) return cvt;
}
if (src.width <= 0 || src.height <= 0 || dst_w <= 0 || dst_h <= 0) {
return FailStatus("invalid letterbox size");
}
float scale = std::min(static_cast<float>(dst_w) / static_cast<float>(src.width),
static_cast<float>(dst_h) / static_cast<float>(src.height));
int inner_w = std::max(1, static_cast<int>(std::round(src.width * scale)));
int inner_h = std::max(1, static_cast<int>(std::round(src.height * scale)));
if (IsYuvFormat(out_fmt)) {
inner_w = std::max(2, MakeEvenFloor(inner_w));
inner_h = std::max(2, MakeEvenFloor(inner_h));
if (((dst_w - inner_w) & 1) != 0) inner_w = std::max(2, inner_w - 2);
if (((dst_h - inner_h) & 1) != 0) inner_h = std::max(2, inner_h - 2);
}
if (inner_w <= 0 || inner_h <= 0 || inner_w > dst_w || inner_h > dst_h) {
return FailStatus("invalid inner letterbox size");
}
Frame resized;
resized.width = inner_w;
resized.height = inner_h;
resized.format = out_fmt;
Status st = image_processor_->Resize(src, resized);
if (st.Failed()) return st;
out.width = dst_w;
out.height = dst_h;
out.format = out_fmt;
if (!InitFrameStorage(out)) {
return FailStatus("alloc letterbox output failed");
}
FillBlack(out);
const int pad_x = (dst_w - inner_w) / 2;
const int pad_y = (dst_h - inner_h) / 2;
if (!BlitLetterbox(resized, out, pad_x, pad_y)) {
return FailStatus("blit letterbox failed");
}
tx.letterbox = true;
tx.src_w = input.width;
tx.src_h = input.height;
tx.dst_w = dst_w;
tx.dst_h = dst_h;
tx.scale_x = static_cast<float>(inner_w) / static_cast<float>(input.width);
tx.scale_y = static_cast<float>(inner_h) / static_cast<float>(input.height);
tx.pad_x = static_cast<float>(pad_x);
tx.pad_y = static_cast<float>(pad_y);
return OkStatus();
}
void ScaleMeta(const Frame& in_frame, Frame& out_frame, const FrameTransformMeta& tx) const {
if (in_frame.det) {
auto det = std::make_shared<DetectionResult>(*in_frame.det);
const int src_meta_w = det->img_w > 0 ? det->img_w : in_frame.width;
const int src_meta_h = det->img_h > 0 ? det->img_h : in_frame.height;
const float to_frame_x = src_meta_w > 0 ? static_cast<float>(in_frame.width) / src_meta_w : 1.0f;
const float to_frame_y = src_meta_h > 0 ? static_cast<float>(in_frame.height) / src_meta_h : 1.0f;
for (auto& it : det->items) {
TransformRect(it.bbox,
tx.scale_x * to_frame_x,
tx.scale_y * to_frame_y,
tx.pad_x, tx.pad_y,
out_frame.width, out_frame.height);
}
det->img_w = out_frame.width;
det->img_h = out_frame.height;
out_frame.det = std::move(det);
}
if (in_frame.face_det) {
auto face_det = std::make_shared<FaceDetResult>(*in_frame.face_det);
const int src_meta_w = face_det->img_w > 0 ? face_det->img_w : in_frame.width;
const int src_meta_h = face_det->img_h > 0 ? face_det->img_h : in_frame.height;
const float to_frame_x = src_meta_w > 0 ? static_cast<float>(in_frame.width) / src_meta_w : 1.0f;
const float to_frame_y = src_meta_h > 0 ? static_cast<float>(in_frame.height) / src_meta_h : 1.0f;
for (auto& it : face_det->faces) {
TransformRect(it.bbox,
tx.scale_x * to_frame_x,
tx.scale_y * to_frame_y,
tx.pad_x, tx.pad_y,
out_frame.width, out_frame.height);
if (it.has_landmarks) {
for (auto& lm : it.landmarks) {
TransformPoint(lm,
tx.scale_x * to_frame_x,
tx.scale_y * to_frame_y,
tx.pad_x, tx.pad_y,
out_frame.width, out_frame.height);
}
}
}
face_det->img_w = out_frame.width;
face_det->img_h = out_frame.height;
out_frame.face_det = std::move(face_det);
}
if (in_frame.face_recog) {
auto face_recog = std::make_shared<FaceRecogResult>(*in_frame.face_recog);
const int src_meta_w = face_recog->img_w > 0 ? face_recog->img_w : in_frame.width;
const int src_meta_h = face_recog->img_h > 0 ? face_recog->img_h : in_frame.height;
const float to_frame_x = src_meta_w > 0 ? static_cast<float>(in_frame.width) / src_meta_w : 1.0f;
const float to_frame_y = src_meta_h > 0 ? static_cast<float>(in_frame.height) / src_meta_h : 1.0f;
for (auto& it : face_recog->items) {
TransformRect(it.bbox,
tx.scale_x * to_frame_x,
tx.scale_y * to_frame_y,
tx.pad_x, tx.pad_y,
out_frame.width, out_frame.height);
if (it.has_landmarks) {
for (auto& lm : it.landmarks) {
TransformPoint(lm,
tx.scale_x * to_frame_x,
tx.scale_y * to_frame_y,
tx.pad_x, tx.pad_y,
out_frame.width, out_frame.height);
}
}
}
face_recog->img_w = out_frame.width;
face_recog->img_h = out_frame.height;
out_frame.face_recog = std::move(face_recog);
}
}
// Process letterbox input: crop to valid region and resize to target
Status ProcessLetterboxInput(FramePtr frame) {
if (!frame->transform_meta) {
LogInfo("[preprocess] " + id_ + " letterbox: no transform_meta");
return FailStatus("no transform meta");
}
const auto& meta = *frame->transform_meta;
// Calculate valid region (excluding black padding)
// For letterbox: the actual image is in the center, surrounded by black padding
const int valid_x = static_cast<int>(meta.pad_x);
const int valid_y = static_cast<int>(meta.pad_y);
// Valid region size = dst_size - 2*padding
int valid_w = meta.dst_w - 2 * static_cast<int>(meta.pad_x);
int valid_h = meta.dst_h - 2 * static_cast<int>(meta.pad_y);
// Align width to 16 for RGA compatibility
valid_w = (valid_w / 16) * 16;
if (valid_w <= 0) valid_w = 16;
if (valid_w <= 0 || valid_h <= 0 || valid_x < 0 || valid_y < 0 ||
valid_x + valid_w > frame->width || valid_y + valid_h > frame->height) {
return FailStatus("invalid letterbox region");
}
// Crop valid region to a temporary frame
Frame cropped;
cropped.width = valid_w;
cropped.height = valid_h;
cropped.format = frame->format;
if (!InitFrameStorage(cropped)) {
LogInfo("[preprocess] " + id_ + " letterbox: alloc crop buffer failed");
return FailStatus("alloc crop buffer failed");
}
// Step 1: Crop valid region from RGB frame
bool crop_ok = CropFrameSoftware(*frame, cropped, valid_x, valid_y, valid_w, valid_h);
if (!crop_ok) {
return FailStatus("crop failed - only RGB/BGR supported for letterbox crop");
}
// Step 2: Resize to target size (may include format conversion)
PixelFormat target_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format;
Frame resized;
resized.width = dst_w_;
resized.height = dst_h_;
resized.format = target_fmt;
if (!InitFrameStorage(resized)) {
return FailStatus("alloc resize buffer failed");
}
Status resize_st = image_processor_->Resize(cropped, resized);
if (resize_st.Failed()) {
return resize_st;
}
// Use resized as output
Frame out = std::move(resized);
// Create output frame with updated transform
auto out_frame = std::make_shared<Frame>(out);
out_frame->pts = frame->pts;
out_frame->frame_id = frame->frame_id;
// Update transform meta: now it's a direct scale from original to output
FrameTransformMeta new_tx{};
new_tx.valid = true;
new_tx.letterbox = false;
new_tx.src_w = meta.src_w;
new_tx.src_h = meta.src_h;
new_tx.dst_w = dst_w_;
new_tx.dst_h = dst_h_;
new_tx.scale_x = static_cast<float>(dst_w_) / static_cast<float>(meta.src_w);
new_tx.scale_y = static_cast<float>(dst_h_) / static_cast<float>(meta.src_h);
new_tx.pad_x = 0.0f;
new_tx.pad_y = 0.0f;
out_frame->transform_meta = std::make_shared<FrameTransformMeta>(new_tx);
// Scale metadata
ScaleMeta(*frame, *out_frame, new_tx);
out_frame->user_meta = frame->user_meta;
PushToDownstream(out_frame);
++processed_;
if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) {
LogInfo("[preprocess] letterbox crop " + std::to_string(frame->width) + "x" + std::to_string(frame->height) +
" valid=" + std::to_string(valid_w) + "x" + std::to_string(valid_h) +
" -> " + std::to_string(dst_w_) + "x" + std::to_string(dst_h_) + " id=" + id_);
}
return OkStatus();
}
bool CropFrameSoftware(const Frame& src, Frame& dst, int x, int y, int w, int h) {
if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > src.width || y + h > src.height) {
return false;
}
if (dst.width != w || dst.height != h || dst.format != src.format) {
return false;
}
const uint8_t* src_data = src.data;
uint8_t* dst_data = dst.data;
if (!src_data || !dst_data) return false;
if (src.format == PixelFormat::RGB || src.format == PixelFormat::BGR) {
const int channels = 3;
for (int row = 0; row < h; ++row) {
std::memcpy(dst_data + row * w * channels,
src_data + (y + row) * src.width * channels + x * channels,
w * channels);
}
return true;
}
// NV12 and other formats would need more complex handling
return false;
}
void PushToDownstream(FramePtr frame) {
for (auto& q : output_queues_) {
q->Push(frame);
}
}
void WarnMetaResizeOnce(const FramePtr& frame, int out_w, int out_h) {
if (warned_meta_resize_) return;
if (!frame) return;
if (frame->width == out_w && frame->height == out_h) return;
if (!frame->det && !frame->face_det && !frame->face_recog) return;
warned_meta_resize_ = true;
LogInfo("[preprocess] resized frame and scaled det/face meta to destination resolution (id=" + id_ + ")");
}
void ProcessPassthrough(FramePtr frame) {
PushToDownstream(frame);
++processed_;
if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) {
LogInfo("[preprocess] passthrough frame=" + std::to_string(frame->frame_id) + " id=" + id_);
}
}
std::string id_;
int dst_w_ = 640;
int dst_h_ = 640;
bool keep_ratio_ = false;
ResizeMode resize_mode_ = ResizeMode::Stretch;
PixelFormat dst_fmt_ = PixelFormat::UNKNOWN;
bool use_rga_ = true;
bool stats_log_ = false;
uint64_t stats_interval_ = 100;
bool warned_meta_resize_ = false;
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
std::shared_ptr<IImageProcessor> image_processor_;
uint64_t processed_ = 0;
};
REGISTER_NODE(PreprocessNode, "preprocess");
} // namespace rk3588