OrangePi3588Media/plugins/preprocess/preprocess_node.cpp

453 lines
16 KiB
C++

#include <algorithm>
#include <atomic>
#include <chrono>
#include <cstring>
#include <iostream>
#include <memory>
#include <thread>
#include <vector>
#include "node.h"
#include "utils/dma_alloc.h"
#if defined(RK3588_ENABLE_RGA)
#include "im2d.hpp"
#include "im2d_buffer.h"
#include "im2d_type.h"
#endif
#if defined(RK3588_ENABLE_FFMPEG)
extern "C" {
#include <libavutil/imgutils.h>
#include <libswscale/swscale.h>
}
#endif
namespace rk3588 {
namespace {
inline int Align16(int v) { return (v + 15) & ~15; }
#if defined(RK3588_ENABLE_RGA)
int ToRgaFormat(PixelFormat fmt) {
switch (fmt) {
case PixelFormat::NV12: return RK_FORMAT_YCbCr_420_SP;
case PixelFormat::YUV420: return RK_FORMAT_YCbCr_420_P;
case PixelFormat::RGB: return RK_FORMAT_RGB_888;
case PixelFormat::BGR: return RK_FORMAT_BGR_888;
default: return RK_FORMAT_UNKNOWN;
}
}
#endif
PixelFormat ParseFormat(const std::string& s) {
if (s == "nv12" || s == "NV12") return PixelFormat::NV12;
if (s == "yuv420" || s == "YUV420") return PixelFormat::YUV420;
if (s == "rgb" || s == "RGB") return PixelFormat::RGB;
if (s == "bgr" || s == "BGR") return PixelFormat::BGR;
return PixelFormat::UNKNOWN;
}
size_t CalcImageSize(int w, int h, PixelFormat fmt) {
switch (fmt) {
case PixelFormat::NV12:
case PixelFormat::YUV420:
return static_cast<size_t>(w) * h * 3 / 2;
case PixelFormat::RGB:
case PixelFormat::BGR:
return static_cast<size_t>(w) * h * 3;
default:
return 0;
}
}
} // namespace
class PreprocessNode : public INode {
public:
std::string Id() const override { return id_; }
std::string Type() const override { return "preprocess"; }
bool Init(const SimpleJson& config, const NodeContext& ctx) override {
id_ = config.ValueOr<std::string>("id", "preprocess");
dst_w_ = config.ValueOr<int>("dst_w", 640);
dst_h_ = config.ValueOr<int>("dst_h", 640);
keep_ratio_ = config.ValueOr<bool>("keep_ratio", false);
std::string fmt_str = config.ValueOr<std::string>("dst_format", "");
if (!fmt_str.empty()) {
dst_fmt_ = ParseFormat(fmt_str);
}
use_rga_ = config.ValueOr<bool>("use_rga", true);
input_queue_ = ctx.input_queue;
if (!input_queue_) {
std::cerr << "[preprocess] no input queue for node " << id_ << "\n";
return false;
}
if (ctx.output_queues.empty()) {
std::cerr << "[preprocess] no output queue for node " << id_ << "\n";
return false;
}
output_queues_ = ctx.output_queues;
#if !defined(RK3588_ENABLE_RGA)
use_rga_ = false;
#endif
#if !defined(RK3588_ENABLE_FFMPEG)
if (!use_rga_) {
std::cerr << "[preprocess] neither RGA nor FFmpeg enabled\n";
return false;
}
#endif
return true;
}
bool Start() override {
std::cout << "[preprocess] start dst=" << dst_w_ << "x" << dst_h_
<< (use_rga_ ? " (rga)" : " (swscale)") << "\n";
return true;
}
void Stop() override {
#if defined(RK3588_ENABLE_FFMPEG)
if (sws_ctx_) {
sws_freeContext(sws_ctx_);
sws_ctx_ = nullptr;
}
#endif
}
NodeStatus Process(FramePtr frame) override {
if (!frame) return NodeStatus::DROP;
#if defined(RK3588_ENABLE_RGA)
if (use_rga_) {
ProcessRga(frame);
} else {
ProcessSwscale(frame);
}
#elif defined(RK3588_ENABLE_FFMPEG)
ProcessSwscale(frame);
#else
ProcessPassthrough(frame);
#endif
return NodeStatus::OK;
}
private:
void PushToDownstream(FramePtr frame) {
for (auto& q : output_queues_) {
q->Push(frame);
}
}
void ProcessPassthrough(FramePtr frame) {
PushToDownstream(frame);
++processed_;
if (processed_ % 100 == 0) {
std::cout << "[preprocess] passthrough frame " << frame->frame_id << "\n";
}
}
#if defined(RK3588_ENABLE_RGA)
void ProcessRga(FramePtr frame) {
PixelFormat out_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format;
int out_w = dst_w_;
int out_h = dst_h_;
if (keep_ratio_ && frame->width > 0 && frame->height > 0) {
float scale = std::min(static_cast<float>(dst_w_) / frame->width,
static_cast<float>(dst_h_) / frame->height);
out_w = static_cast<int>(frame->width * scale);
out_h = static_cast<int>(frame->height * scale);
out_w = (out_w + 1) & ~1;
out_h = (out_h + 1) & ~1;
}
int src_fmt_rga = ToRgaFormat(frame->format);
int dst_fmt_rga = ToRgaFormat(out_fmt);
bool need_cvt = (src_fmt_rga != dst_fmt_rga);
bool need_resize = (frame->width != out_w || frame->height != out_h);
// If no processing needed, passthrough directly
if (!need_cvt && !need_resize) {
PushToDownstream(frame);
++processed_;
if (processed_ % 100 == 0) {
std::cout << "[preprocess] passthrough frame " << frame->frame_id
<< " " << frame->width << "x" << frame->height << " (no change)\n";
}
return;
}
size_t out_size = CalcImageSize(out_w, out_h, out_fmt);
if (out_size == 0 || src_fmt_rga == RK_FORMAT_UNKNOWN || dst_fmt_rga == RK_FORMAT_UNKNOWN) {
std::cerr << "[preprocess] unsupported format for RGA\n";
PushToDownstream(frame);
return;
}
// Use DMA-BUF allocation to avoid >4GB address issue with RGA
auto dma_buf = DmaAlloc(out_size);
if (!dma_buf || !dma_buf->valid()) {
std::cerr << "[preprocess] DMA alloc failed, falling back to std::vector\n";
PushToDownstream(frame);
return;
}
// Calculate proper strides (RGA requires aligned strides)
// For YUV formats, wstride is the width of Y plane
// For RGB/BGR formats, wstride is width (not width*3)
int src_wstride = Align16(frame->width);
int src_hstride = Align16(frame->height);
int dst_wstride = Align16(out_w);
int dst_hstride = Align16(out_h);
if (processed_ < 3) {
std::cout << "[preprocess] src: " << frame->width << "x" << frame->height
<< " fmt=" << static_cast<int>(frame->format) << " rga_fmt=" << src_fmt_rga
<< " wstride=" << src_wstride << " hstride=" << src_hstride
<< " data_size=" << frame->data_size << "\n";
std::cout << "[preprocess] dst: " << out_w << "x" << out_h
<< " fmt=" << static_cast<int>(out_fmt) << " rga_fmt=" << dst_fmt_rga
<< " wstride=" << dst_wstride << " hstride=" << dst_hstride << "\n";
}
rga_buffer_t src_buf{};
rga_buffer_t dst_buf{};
DmaBufferPtr src_dma_buf; // Keep alive if we allocate
if (frame->dma_fd >= 0) {
src_buf = wrapbuffer_fd_t(frame->dma_fd, frame->width, frame->height,
src_wstride, src_hstride, src_fmt_rga);
} else if (frame->data) {
// Source doesn't have DMA fd, copy to DMA buffer first to avoid >4GB address issue
size_t src_size = CalcImageSize(frame->width, frame->height, frame->format);
src_dma_buf = DmaAlloc(src_size);
if (!src_dma_buf || !src_dma_buf->valid()) {
std::cerr << "[preprocess] DMA alloc for src failed\n";
PushToDownstream(frame);
return;
}
memcpy(src_dma_buf->data(), frame->data, std::min(src_size, frame->data_size));
src_buf = wrapbuffer_fd_t(src_dma_buf->fd, frame->width, frame->height,
src_wstride, src_hstride, src_fmt_rga);
} else {
PushToDownstream(frame);
return;
}
// Use DMA fd for destination buffer
dst_buf = wrapbuffer_fd_t(dma_buf->fd, out_w, out_h,
dst_wstride, dst_hstride, dst_fmt_rga);
IM_STATUS status = IM_STATUS_SUCCESS;
if (need_resize && need_cvt) {
// Allocate DMA buffer for intermediate result
auto tmp_dma = DmaAlloc(CalcImageSize(out_w, out_h, frame->format));
if (!tmp_dma || !tmp_dma->valid()) {
std::cerr << "[preprocess] DMA alloc for tmp failed\n";
PushToDownstream(frame);
return;
}
rga_buffer_t tmp = wrapbuffer_fd_t(tmp_dma->fd, out_w, out_h,
dst_wstride, dst_hstride, src_fmt_rga);
status = imresize(src_buf, tmp);
if (status == IM_STATUS_SUCCESS) {
status = imcvtcolor(tmp, dst_buf, src_fmt_rga, dst_fmt_rga, IM_COLOR_SPACE_DEFAULT);
}
} else if (need_resize) {
status = imresize(src_buf, dst_buf);
} else if (need_cvt) {
status = imcvtcolor(src_buf, dst_buf, src_fmt_rga, dst_fmt_rga, IM_COLOR_SPACE_DEFAULT);
}
if (status != IM_STATUS_SUCCESS) {
std::cerr << "[preprocess] RGA failed: " << imStrError(status) << "\n";
PushToDownstream(frame);
return;
}
auto out_frame = std::make_shared<Frame>();
out_frame->width = out_w;
out_frame->height = out_h;
out_frame->format = out_fmt;
out_frame->stride = dst_wstride;
out_frame->dma_fd = dma_buf->fd;
out_frame->data = dma_buf->data();
out_frame->data_size = dma_buf->size;
out_frame->data_owner = dma_buf; // DmaBuffer shared_ptr keeps fd alive
out_frame->pts = frame->pts;
out_frame->frame_id = frame->frame_id;
out_frame->det = frame->det;
out_frame->user_meta = frame->user_meta;
SetupPlanes(*out_frame, out_fmt);
PushToDownstream(out_frame);
++processed_;
if (processed_ % 100 == 0) {
std::cout << "[preprocess] rga frame " << out_frame->frame_id
<< " " << frame->width << "x" << frame->height
<< " -> " << out_w << "x" << out_h << "\n";
}
}
#endif
#if defined(RK3588_ENABLE_FFMPEG)
void ProcessSwscale(FramePtr frame) {
PixelFormat out_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format;
int out_w = dst_w_;
int out_h = dst_h_;
if (keep_ratio_ && frame->width > 0 && frame->height > 0) {
float scale = std::min(static_cast<float>(dst_w_) / frame->width,
static_cast<float>(dst_h_) / frame->height);
out_w = static_cast<int>(frame->width * scale);
out_h = static_cast<int>(frame->height * scale);
out_w = (out_w + 1) & ~1;
out_h = (out_h + 1) & ~1;
}
AVPixelFormat src_av_fmt = ToAvFormat(frame->format);
AVPixelFormat dst_av_fmt = ToAvFormat(out_fmt);
if (src_av_fmt == AV_PIX_FMT_NONE || dst_av_fmt == AV_PIX_FMT_NONE) {
PushToDownstream(frame);
return;
}
if (!sws_ctx_ || frame->width != last_src_w_ || frame->height != last_src_h_ ||
src_av_fmt != last_src_fmt_ || dst_av_fmt != last_dst_fmt_) {
if (sws_ctx_) sws_freeContext(sws_ctx_);
sws_ctx_ = sws_getContext(frame->width, frame->height, src_av_fmt,
out_w, out_h, dst_av_fmt,
SWS_BILINEAR, nullptr, nullptr, nullptr);
last_src_w_ = frame->width;
last_src_h_ = frame->height;
last_src_fmt_ = src_av_fmt;
last_dst_fmt_ = dst_av_fmt;
}
if (!sws_ctx_) {
PushToDownstream(frame);
return;
}
size_t out_size = CalcImageSize(out_w, out_h, out_fmt);
auto buffer = std::make_shared<std::vector<uint8_t>>(out_size);
uint8_t* src_data[4] = {nullptr};
int src_linesize[4] = {0};
uint8_t* dst_data[4] = {nullptr};
int dst_linesize[4] = {0};
SetupAvPlanes(frame.get(), src_data, src_linesize);
av_image_fill_arrays(dst_data, dst_linesize, buffer->data(),
dst_av_fmt, out_w, out_h, 1);
sws_scale(sws_ctx_, src_data, src_linesize, 0, frame->height,
dst_data, dst_linesize);
auto out_frame = std::make_shared<Frame>();
out_frame->width = out_w;
out_frame->height = out_h;
out_frame->format = out_fmt;
out_frame->stride = out_w;
out_frame->data = buffer->data();
out_frame->data_size = buffer->size();
out_frame->data_owner = buffer;
out_frame->pts = frame->pts;
out_frame->frame_id = frame->frame_id;
out_frame->det = frame->det;
out_frame->user_meta = frame->user_meta;
SetupPlanes(*out_frame, out_fmt);
PushToDownstream(out_frame);
++processed_;
if (processed_ % 100 == 0) {
std::cout << "[preprocess] swscale frame " << out_frame->frame_id
<< " " << frame->width << "x" << frame->height
<< " -> " << out_w << "x" << out_h << "\n";
}
}
static AVPixelFormat ToAvFormat(PixelFormat fmt) {
switch (fmt) {
case PixelFormat::NV12: return AV_PIX_FMT_NV12;
case PixelFormat::YUV420: return AV_PIX_FMT_YUV420P;
case PixelFormat::RGB: return AV_PIX_FMT_RGB24;
case PixelFormat::BGR: return AV_PIX_FMT_BGR24;
default: return AV_PIX_FMT_NONE;
}
}
static void SetupAvPlanes(const Frame* f, uint8_t* data[4], int linesize[4]) {
if (!f->data) return;
if (f->format == PixelFormat::NV12) {
data[0] = f->planes[0].data ? f->planes[0].data : f->data;
data[1] = f->planes[1].data ? f->planes[1].data : (f->data + f->width * f->height);
linesize[0] = f->planes[0].stride > 0 ? f->planes[0].stride : f->width;
linesize[1] = f->planes[1].stride > 0 ? f->planes[1].stride : f->width;
} else if (f->format == PixelFormat::YUV420) {
data[0] = f->planes[0].data ? f->planes[0].data : f->data;
int y_size = f->width * f->height;
int uv_size = y_size / 4;
data[1] = f->planes[1].data ? f->planes[1].data : (f->data + y_size);
data[2] = f->planes[2].data ? f->planes[2].data : (f->data + y_size + uv_size);
linesize[0] = f->planes[0].stride > 0 ? f->planes[0].stride : f->width;
linesize[1] = f->planes[1].stride > 0 ? f->planes[1].stride : f->width / 2;
linesize[2] = f->planes[2].stride > 0 ? f->planes[2].stride : f->width / 2;
} else {
data[0] = f->data;
linesize[0] = f->stride > 0 ? f->stride : f->width * 3;
}
}
#endif
void SetupPlanes(Frame& f, PixelFormat fmt) {
if (fmt == PixelFormat::NV12) {
f.plane_count = 2;
int y_size = f.width * f.height;
f.planes[0] = {f.data, f.width, y_size, 0};
f.planes[1] = {f.data + y_size, f.width, y_size / 2, y_size};
} else if (fmt == PixelFormat::YUV420) {
f.plane_count = 3;
int y_size = f.width * f.height;
int uv_size = y_size / 4;
f.planes[0] = {f.data, f.width, y_size, 0};
f.planes[1] = {f.data + y_size, f.width / 2, uv_size, y_size};
f.planes[2] = {f.data + y_size + uv_size, f.width / 2, uv_size, y_size + uv_size};
} else {
f.plane_count = 1;
f.planes[0] = {f.data, f.width * 3, static_cast<int>(f.data_size), 0};
}
}
std::string id_;
int dst_w_ = 640;
int dst_h_ = 640;
bool keep_ratio_ = false;
PixelFormat dst_fmt_ = PixelFormat::UNKNOWN;
bool use_rga_ = true;
std::shared_ptr<SpscQueue<FramePtr>> input_queue_;
std::vector<std::shared_ptr<SpscQueue<FramePtr>>> output_queues_;
uint64_t processed_ = 0;
#if defined(RK3588_ENABLE_FFMPEG)
SwsContext* sws_ctx_ = nullptr;
int last_src_w_ = 0;
int last_src_h_ = 0;
AVPixelFormat last_src_fmt_ = AV_PIX_FMT_NONE;
AVPixelFormat last_dst_fmt_ = AV_PIX_FMT_NONE;
#endif
};
REGISTER_NODE(PreprocessNode, "preprocess");
} // namespace rk3588