OrangePi3588Media/plugins/preprocess/preprocess_node.cpp
sladro acd3815288
Some checks are pending
CI / host-build (push) Waiting to run
CI / rk3588-cross-build (push) Waiting to run
性能优化
2026-01-12 15:27:38 +08:00

864 lines
33 KiB
C++

#include <algorithm>
#include <atomic>
#include <chrono>
#include <condition_variable>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>
#include "node.h"
#include "utils/dma_alloc.h"
#include "utils/logger.h"
#if defined(RK3588_ENABLE_RGA)
#include "im2d.hpp"
#include "im2d_buffer.h"
#include "im2d_type.h"
#endif
#if defined(RK3588_ENABLE_FFMPEG)
extern "C" {
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
#endif
namespace rk3588 {
namespace {
inline int Align16(int v) { return (v + 15) & ~15; }
#if defined(RK3588_ENABLE_RGA)
int ToRgaFormat(PixelFormat fmt) {
switch (fmt) {
case PixelFormat::NV12: return RK_FORMAT_YCbCr_420_SP;
case PixelFormat::YUV420: return RK_FORMAT_YCbCr_420_P;
case PixelFormat::RGB: return RK_FORMAT_RGB_888;
case PixelFormat::BGR: return RK_FORMAT_BGR_888;
default: return RK_FORMAT_UNKNOWN;
}
}
std::atomic<int>& GlobalRgaMaxInflightRef() {
static std::atomic<int> v{2};
return v;
}
int GlobalRgaMaxInflight() {
static std::once_flag once;
std::call_once(once, []() {
const char* s = std::getenv("RK3588_RGA_MAX_INFLIGHT");
if (!s || !*s) return;
try {
int v = std::stoi(s);
if (v > 0 && v <= 32) GlobalRgaMaxInflightRef().store(v);
} catch (...) {
}
});
int v = GlobalRgaMaxInflightRef().load();
return v > 0 ? v : 1;
}
void SetGlobalRgaMaxInflight(int v) {
if (v <= 0) return;
if (v > 32) v = 32;
GlobalRgaMaxInflightRef().store(v);
}
class RgaGate {
public:
void Acquire() {
const int max_inflight = GlobalRgaMaxInflight();
std::unique_lock<std::mutex> lock(mu_);
cv_.wait(lock, [&]() { return in_flight_ < max_inflight; });
++in_flight_;
}
void Release() {
std::lock_guard<std::mutex> lock(mu_);
if (in_flight_ > 0) {
--in_flight_;
}
cv_.notify_one();
}
private:
std::mutex mu_;
std::condition_variable cv_;
int in_flight_ = 0;
};
RgaGate& GlobalRgaGate() {
static RgaGate* g = new RgaGate();
return *g;
}
class ScopedRgaGate {
public:
ScopedRgaGate() { GlobalRgaGate().Acquire(); }
~ScopedRgaGate() { GlobalRgaGate().Release(); }
ScopedRgaGate(const ScopedRgaGate&) = delete;
ScopedRgaGate& operator=(const ScopedRgaGate&) = delete;
};
void EnsureRgaInitializedOnce() {
static std::once_flag once;
std::call_once(once, []() {
const IM_STATUS st = imcheckHeader();
if (st != IM_STATUS_NOERROR && st != IM_STATUS_SUCCESS) {
std::cerr << "[preprocess] imcheckHeader failed: " << imStrError(st) << "\n";
}
});
}
#endif
PixelFormat ParseFormat(const std::string& s) {
if (s == "nv12" || s == "NV12") return PixelFormat::NV12;
if (s == "yuv420" || s == "YUV420") return PixelFormat::YUV420;
if (s == "rgb" || s == "RGB") return PixelFormat::RGB;
if (s == "bgr" || s == "BGR") return PixelFormat::BGR;
return PixelFormat::UNKNOWN;
}
size_t CalcImageSize(int w, int h, PixelFormat fmt) {
switch (fmt) {
case PixelFormat::NV12:
case PixelFormat::YUV420:
return static_cast<size_t>(w) * h * 3 / 2;
case PixelFormat::RGB:
case PixelFormat::BGR:
return static_cast<size_t>(w) * h * 3;
default:
return 0;
}
}
size_t CalcImageSizeStrided(int wstride, int hstride, PixelFormat fmt) {
if (wstride <= 0 || hstride <= 0) return 0;
const size_t ws = static_cast<size_t>(wstride);
const size_t hs = static_cast<size_t>(hstride);
switch (fmt) {
case PixelFormat::NV12: {
// Y: ws*hs, UV: ws*(hs/2)
return ws * hs + ws * (hs / 2);
}
case PixelFormat::YUV420: {
// Y: ws*hs, U/V: (ws/2)*(hs/2)
const size_t y = ws * hs;
const size_t uv = (ws / 2) * (hs / 2);
return y + uv + uv;
}
case PixelFormat::RGB:
case PixelFormat::BGR:
return ws * hs * 3;
default:
return 0;
}
}
bool CopyToStridedBuffer(const Frame& src, uint8_t* dst, size_t dst_size,
int dst_wstride, int dst_hstride) {
if (!dst || dst_size == 0) return false;
if (dst_wstride <= 0 || dst_hstride <= 0) return false;
std::memset(dst, 0, dst_size);
const int w = src.width;
const int h = src.height;
if (w <= 0 || h <= 0) return false;
if (src.format == PixelFormat::NV12) {
const size_t y_bytes = static_cast<size_t>(dst_wstride) * dst_hstride;
const size_t uv_bytes = static_cast<size_t>(dst_wstride) * (dst_hstride / 2);
if (y_bytes + uv_bytes > dst_size) return false;
const uint8_t* src_y = src.planes[0].data ? src.planes[0].data : src.data;
const uint8_t* src_uv = src.planes[1].data ? src.planes[1].data : nullptr;
const int src_y_stride = src.planes[0].stride > 0 ? src.planes[0].stride : w;
const int src_uv_stride = src.planes[1].stride > 0 ? src.planes[1].stride : w;
if (!src_y) return false;
if (!src_uv) {
// Fallback: packed NV12 layout.
if (!src.data) return false;
src_uv = src.data + static_cast<size_t>(src_y_stride) * static_cast<size_t>(h);
}
for (int row = 0; row < h; ++row) {
std::memcpy(dst + static_cast<size_t>(row) * dst_wstride,
src_y + static_cast<size_t>(row) * src_y_stride,
static_cast<size_t>(w));
}
uint8_t* dst_uv = dst + y_bytes;
const int uv_rows = h / 2;
for (int row = 0; row < uv_rows; ++row) {
std::memcpy(dst_uv + static_cast<size_t>(row) * dst_wstride,
src_uv + static_cast<size_t>(row) * src_uv_stride,
static_cast<size_t>(w));
}
return true;
}
if (src.format == PixelFormat::YUV420) {
const size_t y_bytes = static_cast<size_t>(dst_wstride) * dst_hstride;
const size_t uv_stride = static_cast<size_t>(dst_wstride) / 2;
const size_t uv_h = static_cast<size_t>(dst_hstride) / 2;
const size_t u_bytes = uv_stride * uv_h;
const size_t v_bytes = u_bytes;
if (y_bytes + u_bytes + v_bytes > dst_size) return false;
const uint8_t* src_y = src.planes[0].data ? src.planes[0].data : src.data;
const uint8_t* src_u = src.planes[1].data ? src.planes[1].data : nullptr;
const uint8_t* src_v = src.planes[2].data ? src.planes[2].data : nullptr;
const int src_y_stride = src.planes[0].stride > 0 ? src.planes[0].stride : w;
const int src_u_stride = src.planes[1].stride > 0 ? src.planes[1].stride : (w / 2);
const int src_v_stride = src.planes[2].stride > 0 ? src.planes[2].stride : (w / 2);
if (!src_y || !src_u || !src_v) return false;
for (int row = 0; row < h; ++row) {
std::memcpy(dst + static_cast<size_t>(row) * dst_wstride,
src_y + static_cast<size_t>(row) * src_y_stride,
static_cast<size_t>(w));
}
uint8_t* dst_u = dst + y_bytes;
uint8_t* dst_v = dst + y_bytes + u_bytes;
const int uv_rows = h / 2;
const int uv_cols = w / 2;
for (int row = 0; row < uv_rows; ++row) {
std::memcpy(dst_u + static_cast<size_t>(row) * uv_stride,
src_u + static_cast<size_t>(row) * src_u_stride,
static_cast<size_t>(uv_cols));
std::memcpy(dst_v + static_cast<size_t>(row) * uv_stride,
src_v + static_cast<size_t>(row) * src_v_stride,
static_cast<size_t>(uv_cols));
}
return true;
}
if (src.format == PixelFormat::RGB || src.format == PixelFormat::BGR) {
const size_t need = static_cast<size_t>(dst_wstride) * dst_hstride * 3;
if (need > dst_size) return false;
const uint8_t* src_rgb = src.planes[0].data ? src.planes[0].data : src.data;
const int src_stride = src.planes[0].stride > 0
? src.planes[0].stride
: (src.stride > 0 ? src.stride : w * 3);
if (!src_rgb) return false;
const size_t dst_stride = static_cast<size_t>(dst_wstride) * 3;
const size_t row_bytes = static_cast<size_t>(w) * 3;
for (int row = 0; row < h; ++row) {
std::memcpy(dst + static_cast<size_t>(row) * dst_stride,
src_rgb + static_cast<size_t>(row) * src_stride,
row_bytes);
}
return true;
}
return false;
}
} // namespace
class PreprocessNode : public INode {
public:
std::string Id() const override { return id_; }
std::string Type() const override { return "preprocess"; }
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
id_ = config.ValueOr<std::string>("id", "preprocess");
dst_w_ = config.ValueOr<int>("dst_w", 640);
dst_h_ = config.ValueOr<int>("dst_h", 640);
keep_ratio_ = config.ValueOr<bool>("keep_ratio", false);
dst_packed_ = config.ValueOr<bool>("dst_packed", false);
std::string fmt_str = config.ValueOr<std::string>("dst_format", "");
if (!fmt_str.empty()) {
dst_fmt_ = ParseFormat(fmt_str);
}
#if defined(RK3588_ENABLE_RGA)
const int rga_max_inflight = config.ValueOr<int>("rga_max_inflight", 0);
if (rga_max_inflight > 0) {
SetGlobalRgaMaxInflight(rga_max_inflight);
}
#endif
const bool requested_use_rga = config.ValueOr<bool>("use_rga", true);
use_rga_ = requested_use_rga;
if (const SimpleJson* dbg = config.Find("debug"); dbg && dbg->IsObject()) {
stats_log_ = dbg->ValueOr<bool>("stats", stats_log_);
stats_interval_ = std::max<uint64_t>(1, static_cast<uint64_t>(dbg->ValueOr<int>("stats_interval", static_cast<int>(stats_interval_))));
}
input_queue_ = ctx.input_queue;
if (!input_queue_) {
std::cerr << "[preprocess] no input queue for node " << id_ << "\n";
return false;
}
if (ctx.output_queues.empty()) {
std::cerr << "[preprocess] no output queue for node " << id_ << "\n";
return false;
}
output_queues_ = ctx.output_queues;
#if !defined(RK3588_ENABLE_RGA)
if (requested_use_rga) {
std::cerr << "[preprocess] use_rga=true but RGA not enabled at build time\n";
return false;
}
use_rga_ = false;
#endif
#if !defined(RK3588_ENABLE_FFMPEG)
if (!use_rga_) {
std::cerr << "[preprocess] neither RGA nor FFmpeg enabled\n";
return false;
}
#endif
return true;
}
bool Start() override {
LogInfo("[preprocess] start id=" + id_ + " dst=" + std::to_string(dst_w_) + "x" + std::to_string(dst_h_) +
(use_rga_ ? " (rga)" : " (swscale)"));
return true;
}
void Stop() override {
#if defined(RK3588_ENABLE_FFMPEG)
if (sws_ctx_) {
sws_freeContext(sws_ctx_);
sws_ctx_ = nullptr;
}
#endif
}
NodeStatus Process(FramePtr frame) override {
if (!frame) return NodeStatus::DROP;
#if defined(RK3588_ENABLE_RGA)
if (use_rga_) {
if (!ProcessRga(frame)) {
return NodeStatus::ERROR;
}
} else {
ProcessSwscale(frame);
}
#elif defined(RK3588_ENABLE_FFMPEG)
ProcessSwscale(frame);
#else
ProcessPassthrough(frame);
#endif
return NodeStatus::OK;
}
private:
void PushToDownstream(FramePtr frame) {
for (auto& q : output_queues_) {
q->Push(frame);
}
}
void WarnMetaResizeOnce(const FramePtr& frame, int out_w, int out_h) {
if (warned_meta_resize_) return;
if (!frame) return;
if (frame->width == out_w && frame->height == out_h) return;
if (!frame->det && !frame->face_det && !frame->face_recog) return;
warned_meta_resize_ = true;
LogWarn("[preprocess] resized frame but forwarded det/face meta without coordinate scaling; ensure det/recog/osd use same resolution (id=" + id_ + ")");
}
void ProcessPassthrough(FramePtr frame) {
PushToDownstream(frame);
++processed_;
if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) {
LogInfo("[preprocess] passthrough frame=" + std::to_string(frame->frame_id) + " id=" + id_);
}
}
#if defined(RK3588_ENABLE_RGA)
bool ProcessRga(FramePtr frame) {
EnsureRgaInitializedOnce();
PixelFormat out_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format;
int out_w = dst_w_;
int out_h = dst_h_;
if (keep_ratio_ && frame->width > 0 && frame->height > 0) {
float scale = std::min(static_cast<float>(dst_w_) / frame->width,
static_cast<float>(dst_h_) / frame->height);
out_w = static_cast<int>(frame->width * scale);
out_h = static_cast<int>(frame->height * scale);
out_w = (out_w + 1) & ~1;
out_h = (out_h + 1) & ~1;
}
int src_fmt_rga = ToRgaFormat(frame->format);
int dst_fmt_rga = ToRgaFormat(out_fmt);
bool need_cvt = (src_fmt_rga != dst_fmt_rga);
bool need_resize = (frame->width != out_w || frame->height != out_h);
if (need_resize) {
WarnMetaResizeOnce(frame, out_w, out_h);
}
// If no processing needed, passthrough directly
if (!need_cvt && !need_resize) {
PushToDownstream(frame);
++processed_;
if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) {
LogInfo("[preprocess] passthrough frame=" + std::to_string(frame->frame_id) +
" " + std::to_string(frame->width) + "x" + std::to_string(frame->height) + " (no change)" +
" id=" + id_);
}
return true;
}
// Calculate proper strides.
// IMPORTANT: For DMA-BUF frames (e.g. MPP decode output), the actual vertical stride
// (ver_stride) may be larger than `height`. We must honor that, otherwise RGA will
// read UV from the wrong offset (典型花屏/错色).
int src_wstride = Align16(frame->width);
int src_hstride = Align16(frame->height);
if (frame->format == PixelFormat::NV12 || frame->format == PixelFormat::YUV420) {
const int y_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride
: (frame->stride > 0 ? frame->stride : frame->width);
if (y_stride > 0) src_wstride = y_stride;
if (frame->planes[0].size > 0 && y_stride > 0) {
const int hs = frame->planes[0].size / y_stride;
if (hs >= frame->height) src_hstride = hs;
}
} else if (frame->format == PixelFormat::RGB || frame->format == PixelFormat::BGR) {
const int stride_bytes = frame->planes[0].stride > 0 ? frame->planes[0].stride
: (frame->stride > 0 ? frame->stride : frame->width * 3);
if (stride_bytes > 0 && (stride_bytes % 3) == 0) {
src_wstride = stride_bytes / 3;
}
if (frame->planes[0].size > 0 && stride_bytes > 0) {
const int hs = frame->planes[0].size / stride_bytes;
if (hs >= frame->height) src_hstride = hs;
}
}
int dst_wstride = Align16(out_w);
// For AI input (RGB/BGR), allow a tightly packed output to avoid an extra per-frame memcpy
// in downstream nodes (e.g. ai_yolo).
if (dst_packed_ && (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR)) {
dst_wstride = out_w;
}
int dst_hstride = Align16(out_h);
if (src_fmt_rga == RK_FORMAT_UNKNOWN || dst_fmt_rga == RK_FORMAT_UNKNOWN) {
std::cerr << "[preprocess] unsupported format for RGA\n";
return false;
}
size_t out_size = CalcImageSizeStrided(dst_wstride, dst_hstride, out_fmt);
if (out_size == 0) {
std::cerr << "[preprocess] invalid output size for RGA\n";
return false;
}
// Use DMA-BUF allocation to avoid >4GB address issue with RGA
auto dma_buf = DmaAlloc(out_size);
if (!dma_buf || !dma_buf->valid()) {
std::cerr << "[preprocess] DMA alloc failed\n";
return false;
}
if (stats_log_ && processed_ < 3) {
LogInfo("[preprocess] src: " + std::to_string(frame->width) + "x" + std::to_string(frame->height) +
" fmt=" + std::to_string(static_cast<int>(frame->format)) +
" rga_fmt=" + std::to_string(src_fmt_rga) +
" wstride=" + std::to_string(src_wstride) +
" hstride=" + std::to_string(src_hstride) +
" data_size=" + std::to_string(frame->data_size));
LogInfo("[preprocess] dst: " + std::to_string(out_w) + "x" + std::to_string(out_h) +
" fmt=" + std::to_string(static_cast<int>(out_fmt)) +
" rga_fmt=" + std::to_string(dst_fmt_rga) +
" wstride=" + std::to_string(dst_wstride) +
" hstride=" + std::to_string(dst_hstride) +
" out_size=" + std::to_string(out_size));
}
rga_buffer_t src_buf{};
rga_buffer_t dst_buf{};
DmaBufferPtr src_dma_buf; // keep alive if we allocate/copy
// Prepare tmp buffer outside the RGA critical section.
DmaBufferPtr tmp_dma;
if (need_resize && need_cvt) {
tmp_dma = DmaAlloc(CalcImageSizeStrided(dst_wstride, dst_hstride, frame->format));
if (!tmp_dma || !tmp_dma->valid()) {
std::cerr << "[preprocess] DMA alloc for tmp failed\n";
return false;
}
}
const bool can_cpu_read_src = (frame->data != nullptr && frame->data_size > 0);
auto CopySrcToDmaIfPossible = [&]() -> bool {
if (!can_cpu_read_src) return false;
// Use tight/aligned strides for the copied source buffer.
const int copy_wstride = Align16(frame->width);
const int copy_hstride = Align16(frame->height);
const size_t src_size = CalcImageSizeStrided(copy_wstride, copy_hstride, frame->format);
src_dma_buf = DmaAlloc(src_size);
if (!src_dma_buf || !src_dma_buf->valid()) {
return false;
}
// If source is a DMA-BUF, sync it before CPU reads.
if (frame->dma_fd >= 0) {
DmaSyncStartFd(frame->dma_fd);
}
// CPU writes to a DMA-BUF must be flushed before RGA reads it.
DmaSyncStartFd(src_dma_buf->fd);
const bool ok = CopyToStridedBuffer(*frame, src_dma_buf->data(), src_dma_buf->size,
copy_wstride, copy_hstride);
DmaSyncEndFd(src_dma_buf->fd);
if (frame->dma_fd >= 0) {
DmaSyncEndFd(frame->dma_fd);
}
if (!ok) {
src_dma_buf.reset();
return false;
}
// Update strides used for RGA to match the copied buffer.
src_wstride = copy_wstride;
src_hstride = copy_hstride;
return true;
};
// If there's no DMA fd, we must allocate/copy into a DMA-BUF for RGA.
if (frame->dma_fd < 0) {
if (!CopySrcToDmaIfPossible()) {
std::cerr << "[preprocess] no dma_fd and src copy failed\n";
return false;
}
}
auto RunRgaOnce = [&](int src_fd, std::string& err) -> bool {
// Serialize/limit librga/im2d usage; multiple pipelines call RGA concurrently.
ScopedRgaGate guard;
src_buf = wrapbuffer_fd_t(src_fd, frame->width, frame->height,
src_wstride, src_hstride, src_fmt_rga);
dst_buf = wrapbuffer_fd_t(dma_buf->fd, out_w, out_h,
dst_wstride, dst_hstride, dst_fmt_rga);
auto Check = [&](const rga_buffer_t& s, const rga_buffer_t& d) -> bool {
const im_rect sr{0, 0, s.width, s.height};
const im_rect dr{0, 0, d.width, d.height};
// Do NOT call the imcheck(...) variadic macro with 0 extra args:
// under -Wpedantic/-std=c++11 it expands to a zero-sized array.
rga_buffer_t pat{};
const im_rect pr{0, 0, 0, 0};
const IM_STATUS chk = imcheck_t(s, d, pat, sr, dr, pr, 0);
if (chk != IM_STATUS_NOERROR && chk != IM_STATUS_SUCCESS) {
err = std::string("RGA imcheck failed: ") + imStrError(chk);
return false;
}
return true;
};
IM_STATUS status = IM_STATUS_SUCCESS;
if (need_resize && need_cvt) {
rga_buffer_t tmp = wrapbuffer_fd_t(tmp_dma->fd, out_w, out_h,
dst_wstride, dst_hstride, src_fmt_rga);
if (!Check(src_buf, tmp) || !Check(tmp, dst_buf)) {
return false;
}
status = imresize(src_buf, tmp, 0, 0, 0, 1, nullptr);
if (status == IM_STATUS_SUCCESS) {
status = imcvtcolor(tmp, dst_buf, src_fmt_rga, dst_fmt_rga, IM_COLOR_SPACE_DEFAULT, 1, nullptr);
}
} else if (need_resize) {
if (!Check(src_buf, dst_buf)) {
return false;
}
status = imresize(src_buf, dst_buf, 0, 0, 0, 1, nullptr);
} else if (need_cvt) {
if (!Check(src_buf, dst_buf)) {
return false;
}
status = imcvtcolor(src_buf, dst_buf, src_fmt_rga, dst_fmt_rga, IM_COLOR_SPACE_DEFAULT, 1, nullptr);
}
if (status != IM_STATUS_SUCCESS) {
err = std::string("RGA failed: ") + imStrError(status);
return false;
}
return true;
};
std::string rga_err;
const int src_fd = (src_dma_buf && src_dma_buf->valid()) ? src_dma_buf->fd : frame->dma_fd;
if (src_fd < 0 || !RunRgaOnce(src_fd, rga_err)) {
std::cerr << "[preprocess] " << (rga_err.empty() ? "RGA failed" : rga_err) << "\n";
return false;
}
auto out_frame = std::make_shared<Frame>();
out_frame->width = out_w;
out_frame->height = out_h;
out_frame->format = out_fmt;
out_frame->stride = (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR)
? (dst_wstride * 3)
: dst_wstride;
out_frame->dma_fd = dma_buf->fd;
out_frame->data = dma_buf->data();
out_frame->data_size = dma_buf->size;
out_frame->data_owner = dma_buf; // DmaBuffer shared_ptr keeps fd alive
out_frame->pts = frame->pts;
out_frame->frame_id = frame->frame_id;
out_frame->det = frame->det;
out_frame->face_det = frame->face_det;
out_frame->face_recog = frame->face_recog;
out_frame->user_meta = frame->user_meta;
SetupPlanes(*out_frame, out_fmt);
PushToDownstream(out_frame);
++processed_;
if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) {
LogInfo("[preprocess] rga frame=" + std::to_string(out_frame->frame_id) +
" " + std::to_string(frame->width) + "x" + std::to_string(frame->height) +
" -> " + std::to_string(out_w) + "x" + std::to_string(out_h));
}
return true;
}
#endif
#if defined(RK3588_ENABLE_FFMPEG)
void ProcessSwscale(FramePtr frame) {
PixelFormat out_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format;
int out_w = dst_w_;
int out_h = dst_h_;
if (keep_ratio_ && frame->width > 0 && frame->height > 0) {
float scale = std::min(static_cast<float>(dst_w_) / frame->width,
static_cast<float>(dst_h_) / frame->height);
out_w = static_cast<int>(frame->width * scale);
out_h = static_cast<int>(frame->height * scale);
out_w = (out_w + 1) & ~1;
out_h = (out_h + 1) & ~1;
}
if (frame->width != out_w || frame->height != out_h) {
WarnMetaResizeOnce(frame, out_w, out_h);
}
AVPixelFormat src_av_fmt = ToAvFormat(frame->format);
AVPixelFormat dst_av_fmt = ToAvFormat(out_fmt);
if (src_av_fmt == AV_PIX_FMT_NONE || dst_av_fmt == AV_PIX_FMT_NONE) {
PushToDownstream(frame);
return;
}
if (!sws_ctx_ || frame->width != last_src_w_ || frame->height != last_src_h_ ||
src_av_fmt != last_src_fmt_ || dst_av_fmt != last_dst_fmt_) {
if (sws_ctx_) sws_freeContext(sws_ctx_);
sws_ctx_ = sws_getContext(frame->width, frame->height, src_av_fmt,
out_w, out_h, dst_av_fmt,
SWS_BILINEAR, nullptr, nullptr, nullptr);
last_src_w_ = frame->width;
last_src_h_ = frame->height;
last_src_fmt_ = src_av_fmt;
last_dst_fmt_ = dst_av_fmt;
}
if (!sws_ctx_) {
PushToDownstream(frame);
return;
}
size_t out_size = CalcImageSize(out_w, out_h, out_fmt);
auto buffer = std::make_shared<std::vector<uint8_t>>(out_size);
uint8_t* src_data[4] = {nullptr};
int src_linesize[4] = {0};
uint8_t* dst_data[4] = {nullptr};
int dst_linesize[4] = {0};
SetupAvPlanes(frame.get(), src_data, src_linesize);
av_image_fill_arrays(dst_data, dst_linesize, buffer->data(),
dst_av_fmt, out_w, out_h, 1);
sws_scale(sws_ctx_, src_data, src_linesize, 0, frame->height,
dst_data, dst_linesize);
auto out_frame = std::make_shared<Frame>();
out_frame->width = out_w;
out_frame->height = out_h;
out_frame->format = out_fmt;
out_frame->stride = (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR)
? (out_w * 3)
: out_w;
out_frame->data = buffer->data();
out_frame->data_size = buffer->size();
out_frame->data_owner = buffer;
out_frame->pts = frame->pts;
out_frame->frame_id = frame->frame_id;
out_frame->det = frame->det;
out_frame->face_det = frame->face_det;
out_frame->face_recog = frame->face_recog;
out_frame->user_meta = frame->user_meta;
SetupPlanes(*out_frame, out_fmt);
PushToDownstream(out_frame);
++processed_;
if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) {
LogInfo("[preprocess] swscale frame=" + std::to_string(out_frame->frame_id) +
" " + std::to_string(frame->width) + "x" + std::to_string(frame->height) +
" -> " + std::to_string(out_w) + "x" + std::to_string(out_h) +
" id=" + id_);
}
}
static AVPixelFormat ToAvFormat(PixelFormat fmt) {
switch (fmt) {
case PixelFormat::NV12: return AV_PIX_FMT_NV12;
case PixelFormat::YUV420: return AV_PIX_FMT_YUV420P;
case PixelFormat::RGB: return AV_PIX_FMT_RGB24;
case PixelFormat::BGR: return AV_PIX_FMT_BGR24;
default: return AV_PIX_FMT_NONE;
}
}
static void SetupAvPlanes(const Frame* f, uint8_t* data[4], int linesize[4]) {
if (!f->data) return;
if (f->format == PixelFormat::NV12) {
data[0] = f->planes[0].data ? f->planes[0].data : f->data;
data[1] = f->planes[1].data ? f->planes[1].data : (f->data + f->width * f->height);
linesize[0] = f->planes[0].stride > 0 ? f->planes[0].stride : f->width;
linesize[1] = f->planes[1].stride > 0 ? f->planes[1].stride : f->width;
} else if (f->format == PixelFormat::YUV420) {
data[0] = f->planes[0].data ? f->planes[0].data : f->data;
int y_size = f->width * f->height;
int uv_size = y_size / 4;
data[1] = f->planes[1].data ? f->planes[1].data : (f->data + y_size);
data[2] = f->planes[2].data ? f->planes[2].data : (f->data + y_size + uv_size);
linesize[0] = f->planes[0].stride > 0 ? f->planes[0].stride : f->width;
linesize[1] = f->planes[1].stride > 0 ? f->planes[1].stride : f->width / 2;
linesize[2] = f->planes[2].stride > 0 ? f->planes[2].stride : f->width / 2;
} else {
data[0] = f->data;
linesize[0] = f->stride > 0 ? f->stride : f->width * 3;
}
}
#endif
void SetupPlanes(Frame& f, PixelFormat fmt) {
if (!f.data) return;
if (fmt == PixelFormat::NV12) {
f.plane_count = 2;
int y_stride = f.stride > 0 ? f.stride : f.width;
if (y_stride <= 0) y_stride = f.width;
size_t y_bytes = static_cast<size_t>(y_stride) * static_cast<size_t>(f.height);
if (f.data_size > 0) {
const size_t candidate = (f.data_size * 2) / 3; // total = Y + UV = Y*3/2
if (candidate >= y_bytes && candidate <= f.data_size &&
(candidate % static_cast<size_t>(y_stride)) == 0) {
y_bytes = candidate;
}
}
size_t uv_bytes = y_bytes / 2;
if (f.data_size > 0 && y_bytes + uv_bytes > f.data_size) {
uv_bytes = f.data_size > y_bytes ? (f.data_size - y_bytes) : 0;
}
f.planes[0] = {f.data, y_stride, static_cast<int>(y_bytes), 0};
f.planes[1] = {f.data + y_bytes, y_stride, static_cast<int>(uv_bytes), static_cast<int>(y_bytes)};
return;
}
if (fmt == PixelFormat::YUV420) {
f.plane_count = 3;
int y_stride = f.stride > 0 ? f.stride : f.width;
if (y_stride <= 0) y_stride = f.width;
size_t y_bytes = static_cast<size_t>(y_stride) * static_cast<size_t>(f.height);
size_t hstride = static_cast<size_t>(f.height);
if (f.data_size > 0) {
const size_t candidate = (f.data_size * 2) / 3;
if (candidate >= y_bytes && candidate <= f.data_size &&
(candidate % static_cast<size_t>(y_stride)) == 0) {
y_bytes = candidate;
hstride = y_bytes / static_cast<size_t>(y_stride);
}
}
size_t uv_stride = static_cast<size_t>(y_stride) / 2;
size_t uv_h = hstride / 2;
size_t u_bytes = uv_stride * uv_h;
size_t v_bytes = u_bytes;
size_t need = y_bytes + u_bytes + v_bytes;
if (f.data_size > 0 && need > f.data_size) {
// Fallback to tightly packed layout.
y_stride = f.width;
y_bytes = static_cast<size_t>(f.width) * static_cast<size_t>(f.height);
uv_stride = static_cast<size_t>(f.width) / 2;
uv_h = static_cast<size_t>(f.height) / 2;
u_bytes = uv_stride * uv_h;
v_bytes = u_bytes;
}
f.planes[0] = {f.data, y_stride, static_cast<int>(y_bytes), 0};
f.planes[1] = {f.data + y_bytes, static_cast<int>(uv_stride), static_cast<int>(u_bytes), static_cast<int>(y_bytes)};
f.planes[2] = {f.data + y_bytes + u_bytes, static_cast<int>(uv_stride), static_cast<int>(v_bytes), static_cast<int>(y_bytes + u_bytes)};
return;
}
// RGB/BGR
f.plane_count = 1;
int stride_bytes = f.stride > 0 ? f.stride : (f.width * 3);
f.planes[0] = {f.data, stride_bytes, static_cast<int>(f.data_size), 0};
}
std::string id_;
int dst_w_ = 640;
int dst_h_ = 640;
bool keep_ratio_ = false;
PixelFormat dst_fmt_ = PixelFormat::UNKNOWN;
bool dst_packed_ = false;
bool use_rga_ = true;
bool stats_log_ = false;
uint64_t stats_interval_ = 100;
bool warned_meta_resize_ = false;
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
uint64_t processed_ = 0;
#if defined(RK3588_ENABLE_FFMPEG)
SwsContext* sws_ctx_ = nullptr;
int last_src_w_ = 0;
int last_src_h_ = 0;
AVPixelFormat last_src_fmt_ = AV_PIX_FMT_NONE;
AVPixelFormat last_dst_fmt_ = AV_PIX_FMT_NONE;
#endif
};
REGISTER_NODE(PreprocessNode, "preprocess");
} // namespace rk3588