#include #include #include #include #include #include #include #include #include "node.h" #include "utils/dma_alloc.h" #if defined(RK3588_ENABLE_RGA) #include "im2d.hpp" #include "im2d_buffer.h" #include "im2d_type.h" #endif #if defined(RK3588_ENABLE_FFMPEG) extern "C" { #include #include } #endif namespace rk3588 { namespace { inline int Align16(int v) { return (v + 15) & ~15; } #if defined(RK3588_ENABLE_RGA) int ToRgaFormat(PixelFormat fmt) { switch (fmt) { case PixelFormat::NV12: return RK_FORMAT_YCbCr_420_SP; case PixelFormat::YUV420: return RK_FORMAT_YCbCr_420_P; case PixelFormat::RGB: return RK_FORMAT_RGB_888; case PixelFormat::BGR: return RK_FORMAT_BGR_888; default: return RK_FORMAT_UNKNOWN; } } #endif PixelFormat ParseFormat(const std::string& s) { if (s == "nv12" || s == "NV12") return PixelFormat::NV12; if (s == "yuv420" || s == "YUV420") return PixelFormat::YUV420; if (s == "rgb" || s == "RGB") return PixelFormat::RGB; if (s == "bgr" || s == "BGR") return PixelFormat::BGR; return PixelFormat::UNKNOWN; } size_t CalcImageSize(int w, int h, PixelFormat fmt) { switch (fmt) { case PixelFormat::NV12: case PixelFormat::YUV420: return static_cast(w) * h * 3 / 2; case PixelFormat::RGB: case PixelFormat::BGR: return static_cast(w) * h * 3; default: return 0; } } size_t CalcImageSizeStrided(int wstride, int hstride, PixelFormat fmt) { if (wstride <= 0 || hstride <= 0) return 0; const size_t ws = static_cast(wstride); const size_t hs = static_cast(hstride); switch (fmt) { case PixelFormat::NV12: { // Y: ws*hs, UV: ws*(hs/2) return ws * hs + ws * (hs / 2); } case PixelFormat::YUV420: { // Y: ws*hs, U/V: (ws/2)*(hs/2) const size_t y = ws * hs; const size_t uv = (ws / 2) * (hs / 2); return y + uv + uv; } case PixelFormat::RGB: case PixelFormat::BGR: return ws * hs * 3; default: return 0; } } bool CopyToStridedBuffer(const Frame& src, uint8_t* dst, size_t dst_size, int dst_wstride, int dst_hstride) { if (!dst || dst_size == 0) return false; if (dst_wstride <= 0 || dst_hstride <= 0) return false; std::memset(dst, 0, dst_size); const int w = src.width; const int h = src.height; if (w <= 0 || h <= 0) return false; if (src.format == PixelFormat::NV12) { const size_t y_bytes = static_cast(dst_wstride) * dst_hstride; const size_t uv_bytes = static_cast(dst_wstride) * (dst_hstride / 2); if (y_bytes + uv_bytes > dst_size) return false; const uint8_t* src_y = src.planes[0].data ? src.planes[0].data : src.data; const uint8_t* src_uv = src.planes[1].data ? src.planes[1].data : nullptr; const int src_y_stride = src.planes[0].stride > 0 ? src.planes[0].stride : w; const int src_uv_stride = src.planes[1].stride > 0 ? src.planes[1].stride : w; if (!src_y || !src_uv) return false; for (int row = 0; row < h; ++row) { std::memcpy(dst + static_cast(row) * dst_wstride, src_y + static_cast(row) * src_y_stride, static_cast(w)); } uint8_t* dst_uv = dst + y_bytes; const int uv_rows = h / 2; for (int row = 0; row < uv_rows; ++row) { std::memcpy(dst_uv + static_cast(row) * dst_wstride, src_uv + static_cast(row) * src_uv_stride, static_cast(w)); } return true; } if (src.format == PixelFormat::YUV420) { const size_t y_bytes = static_cast(dst_wstride) * dst_hstride; const size_t uv_stride = static_cast(dst_wstride) / 2; const size_t uv_h = static_cast(dst_hstride) / 2; const size_t u_bytes = uv_stride * uv_h; const size_t v_bytes = u_bytes; if (y_bytes + u_bytes + v_bytes > dst_size) return false; const uint8_t* src_y = src.planes[0].data ? src.planes[0].data : src.data; const uint8_t* src_u = src.planes[1].data ? src.planes[1].data : nullptr; const uint8_t* src_v = src.planes[2].data ? src.planes[2].data : nullptr; const int src_y_stride = src.planes[0].stride > 0 ? src.planes[0].stride : w; const int src_u_stride = src.planes[1].stride > 0 ? src.planes[1].stride : (w / 2); const int src_v_stride = src.planes[2].stride > 0 ? src.planes[2].stride : (w / 2); if (!src_y || !src_u || !src_v) return false; for (int row = 0; row < h; ++row) { std::memcpy(dst + static_cast(row) * dst_wstride, src_y + static_cast(row) * src_y_stride, static_cast(w)); } uint8_t* dst_u = dst + y_bytes; uint8_t* dst_v = dst + y_bytes + u_bytes; const int uv_rows = h / 2; const int uv_cols = w / 2; for (int row = 0; row < uv_rows; ++row) { std::memcpy(dst_u + static_cast(row) * uv_stride, src_u + static_cast(row) * src_u_stride, static_cast(uv_cols)); std::memcpy(dst_v + static_cast(row) * uv_stride, src_v + static_cast(row) * src_v_stride, static_cast(uv_cols)); } return true; } if (src.format == PixelFormat::RGB || src.format == PixelFormat::BGR) { const size_t need = static_cast(dst_wstride) * dst_hstride * 3; if (need > dst_size) return false; const uint8_t* src_rgb = src.planes[0].data ? src.planes[0].data : src.data; const int src_stride = src.planes[0].stride > 0 ? src.planes[0].stride : (src.stride > 0 ? src.stride : w * 3); if (!src_rgb) return false; const size_t dst_stride = static_cast(dst_wstride) * 3; const size_t row_bytes = static_cast(w) * 3; for (int row = 0; row < h; ++row) { std::memcpy(dst + static_cast(row) * dst_stride, src_rgb + static_cast(row) * src_stride, row_bytes); } return true; } return false; } } // namespace class PreprocessNode : public INode { public: std::string Id() const override { return id_; } std::string Type() const override { return "preprocess"; } bool Init(const SimpleJson& config, const NodeContext& ctx) override { id_ = config.ValueOr("id", "preprocess"); dst_w_ = config.ValueOr("dst_w", 640); dst_h_ = config.ValueOr("dst_h", 640); keep_ratio_ = config.ValueOr("keep_ratio", false); std::string fmt_str = config.ValueOr("dst_format", ""); if (!fmt_str.empty()) { dst_fmt_ = ParseFormat(fmt_str); } const bool requested_use_rga = config.ValueOr("use_rga", true); use_rga_ = requested_use_rga; input_queue_ = ctx.input_queue; if (!input_queue_) { std::cerr << "[preprocess] no input queue for node " << id_ << "\n"; return false; } if (ctx.output_queues.empty()) { std::cerr << "[preprocess] no output queue for node " << id_ << "\n"; return false; } output_queues_ = ctx.output_queues; #if !defined(RK3588_ENABLE_RGA) if (requested_use_rga) { std::cerr << "[preprocess] use_rga=true but RGA not enabled at build time\n"; return false; } use_rga_ = false; #endif #if !defined(RK3588_ENABLE_FFMPEG) if (!use_rga_) { std::cerr << "[preprocess] neither RGA nor FFmpeg enabled\n"; return false; } #endif return true; } bool Start() override { std::cout << "[preprocess] start dst=" << dst_w_ << "x" << dst_h_ << (use_rga_ ? " (rga)" : " (swscale)") << "\n"; return true; } void Stop() override { #if defined(RK3588_ENABLE_FFMPEG) if (sws_ctx_) { sws_freeContext(sws_ctx_); sws_ctx_ = nullptr; } #endif } NodeStatus Process(FramePtr frame) override { if (!frame) return NodeStatus::DROP; #if defined(RK3588_ENABLE_RGA) if (use_rga_) { if (!ProcessRga(frame)) { return NodeStatus::ERROR; } } else { ProcessSwscale(frame); } #elif defined(RK3588_ENABLE_FFMPEG) ProcessSwscale(frame); #else ProcessPassthrough(frame); #endif return NodeStatus::OK; } private: void PushToDownstream(FramePtr frame) { for (auto& q : output_queues_) { q->Push(frame); } } void ProcessPassthrough(FramePtr frame) { PushToDownstream(frame); ++processed_; if (processed_ % 100 == 0) { std::cout << "[preprocess] passthrough frame " << frame->frame_id << "\n"; } } #if defined(RK3588_ENABLE_RGA) bool ProcessRga(FramePtr frame) { PixelFormat out_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format; int out_w = dst_w_; int out_h = dst_h_; if (keep_ratio_ && frame->width > 0 && frame->height > 0) { float scale = std::min(static_cast(dst_w_) / frame->width, static_cast(dst_h_) / frame->height); out_w = static_cast(frame->width * scale); out_h = static_cast(frame->height * scale); out_w = (out_w + 1) & ~1; out_h = (out_h + 1) & ~1; } int src_fmt_rga = ToRgaFormat(frame->format); int dst_fmt_rga = ToRgaFormat(out_fmt); bool need_cvt = (src_fmt_rga != dst_fmt_rga); bool need_resize = (frame->width != out_w || frame->height != out_h); // If no processing needed, passthrough directly if (!need_cvt && !need_resize) { PushToDownstream(frame); ++processed_; if (processed_ % 100 == 0) { std::cout << "[preprocess] passthrough frame " << frame->frame_id << " " << frame->width << "x" << frame->height << " (no change)\n"; } return true; } // Calculate proper strides (RGA requires aligned strides) // For YUV formats, wstride is the width of Y plane // For RGB/BGR formats, wstride is width (not width*3) int src_wstride = Align16(frame->width); int src_hstride = Align16(frame->height); int dst_wstride = Align16(out_w); int dst_hstride = Align16(out_h); if (src_fmt_rga == RK_FORMAT_UNKNOWN || dst_fmt_rga == RK_FORMAT_UNKNOWN) { std::cerr << "[preprocess] unsupported format for RGA\n"; return false; } size_t out_size = CalcImageSizeStrided(dst_wstride, dst_hstride, out_fmt); if (out_size == 0) { std::cerr << "[preprocess] invalid output size for RGA\n"; return false; } // Use DMA-BUF allocation to avoid >4GB address issue with RGA auto dma_buf = DmaAlloc(out_size); if (!dma_buf || !dma_buf->valid()) { std::cerr << "[preprocess] DMA alloc failed\n"; return false; } if (processed_ < 3) { std::cout << "[preprocess] src: " << frame->width << "x" << frame->height << " fmt=" << static_cast(frame->format) << " rga_fmt=" << src_fmt_rga << " wstride=" << src_wstride << " hstride=" << src_hstride << " data_size=" << frame->data_size << "\n"; std::cout << "[preprocess] dst: " << out_w << "x" << out_h << " fmt=" << static_cast(out_fmt) << " rga_fmt=" << dst_fmt_rga << " wstride=" << dst_wstride << " hstride=" << dst_hstride << "\n"; } rga_buffer_t src_buf{}; rga_buffer_t dst_buf{}; DmaBufferPtr src_dma_buf; // Keep alive if we allocate if (frame->dma_fd >= 0) { src_buf = wrapbuffer_fd_t(frame->dma_fd, frame->width, frame->height, src_wstride, src_hstride, src_fmt_rga); } else if (frame->data) { // Source doesn't have DMA fd, copy to DMA buffer first to avoid >4GB address issue size_t src_size = CalcImageSizeStrided(src_wstride, src_hstride, frame->format); src_dma_buf = DmaAlloc(src_size); if (!src_dma_buf || !src_dma_buf->valid()) { std::cerr << "[preprocess] DMA alloc for src failed\n"; return false; } // CPU writes to a DMA-BUF must be flushed before RGA reads it. DmaSyncStartFd(src_dma_buf->fd); if (!CopyToStridedBuffer(*frame, src_dma_buf->data(), src_dma_buf->size, src_wstride, src_hstride)) { DmaSyncEndFd(src_dma_buf->fd); std::cerr << "[preprocess] copy src to DMA failed\n"; return false; } DmaSyncEndFd(src_dma_buf->fd); src_buf = wrapbuffer_fd_t(src_dma_buf->fd, frame->width, frame->height, src_wstride, src_hstride, src_fmt_rga); } else { return false; } // Use DMA fd for destination buffer dst_buf = wrapbuffer_fd_t(dma_buf->fd, out_w, out_h, dst_wstride, dst_hstride, dst_fmt_rga); IM_STATUS status = IM_STATUS_SUCCESS; if (need_resize && need_cvt) { // Allocate DMA buffer for intermediate result auto tmp_dma = DmaAlloc(CalcImageSizeStrided(dst_wstride, dst_hstride, frame->format)); if (!tmp_dma || !tmp_dma->valid()) { std::cerr << "[preprocess] DMA alloc for tmp failed\n"; return false; } rga_buffer_t tmp = wrapbuffer_fd_t(tmp_dma->fd, out_w, out_h, dst_wstride, dst_hstride, src_fmt_rga); status = imresize(src_buf, tmp); if (status == IM_STATUS_SUCCESS) { status = imcvtcolor(tmp, dst_buf, src_fmt_rga, dst_fmt_rga, IM_COLOR_SPACE_DEFAULT); } } else if (need_resize) { status = imresize(src_buf, dst_buf); } else if (need_cvt) { status = imcvtcolor(src_buf, dst_buf, src_fmt_rga, dst_fmt_rga, IM_COLOR_SPACE_DEFAULT); } if (status != IM_STATUS_SUCCESS) { std::cerr << "[preprocess] RGA failed: " << imStrError(status) << "\n"; return false; } auto out_frame = std::make_shared(); out_frame->width = out_w; out_frame->height = out_h; out_frame->format = out_fmt; out_frame->stride = (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR) ? (dst_wstride * 3) : dst_wstride; out_frame->dma_fd = dma_buf->fd; out_frame->data = dma_buf->data(); out_frame->data_size = dma_buf->size; out_frame->data_owner = dma_buf; // DmaBuffer shared_ptr keeps fd alive out_frame->pts = frame->pts; out_frame->frame_id = frame->frame_id; out_frame->det = frame->det; out_frame->user_meta = frame->user_meta; SetupPlanes(*out_frame, out_fmt); PushToDownstream(out_frame); ++processed_; if (processed_ % 100 == 0) { std::cout << "[preprocess] rga frame " << out_frame->frame_id << " " << frame->width << "x" << frame->height << " -> " << out_w << "x" << out_h << "\n"; } return true; } #endif #if defined(RK3588_ENABLE_FFMPEG) void ProcessSwscale(FramePtr frame) { PixelFormat out_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format; int out_w = dst_w_; int out_h = dst_h_; if (keep_ratio_ && frame->width > 0 && frame->height > 0) { float scale = std::min(static_cast(dst_w_) / frame->width, static_cast(dst_h_) / frame->height); out_w = static_cast(frame->width * scale); out_h = static_cast(frame->height * scale); out_w = (out_w + 1) & ~1; out_h = (out_h + 1) & ~1; } AVPixelFormat src_av_fmt = ToAvFormat(frame->format); AVPixelFormat dst_av_fmt = ToAvFormat(out_fmt); if (src_av_fmt == AV_PIX_FMT_NONE || dst_av_fmt == AV_PIX_FMT_NONE) { PushToDownstream(frame); return; } if (!sws_ctx_ || frame->width != last_src_w_ || frame->height != last_src_h_ || src_av_fmt != last_src_fmt_ || dst_av_fmt != last_dst_fmt_) { if (sws_ctx_) sws_freeContext(sws_ctx_); sws_ctx_ = sws_getContext(frame->width, frame->height, src_av_fmt, out_w, out_h, dst_av_fmt, SWS_BILINEAR, nullptr, nullptr, nullptr); last_src_w_ = frame->width; last_src_h_ = frame->height; last_src_fmt_ = src_av_fmt; last_dst_fmt_ = dst_av_fmt; } if (!sws_ctx_) { PushToDownstream(frame); return; } size_t out_size = CalcImageSize(out_w, out_h, out_fmt); auto buffer = std::make_shared>(out_size); uint8_t* src_data[4] = {nullptr}; int src_linesize[4] = {0}; uint8_t* dst_data[4] = {nullptr}; int dst_linesize[4] = {0}; SetupAvPlanes(frame.get(), src_data, src_linesize); av_image_fill_arrays(dst_data, dst_linesize, buffer->data(), dst_av_fmt, out_w, out_h, 1); sws_scale(sws_ctx_, src_data, src_linesize, 0, frame->height, dst_data, dst_linesize); auto out_frame = std::make_shared(); out_frame->width = out_w; out_frame->height = out_h; out_frame->format = out_fmt; out_frame->stride = (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR) ? (out_w * 3) : out_w; out_frame->data = buffer->data(); out_frame->data_size = buffer->size(); out_frame->data_owner = buffer; out_frame->pts = frame->pts; out_frame->frame_id = frame->frame_id; out_frame->det = frame->det; out_frame->user_meta = frame->user_meta; SetupPlanes(*out_frame, out_fmt); PushToDownstream(out_frame); ++processed_; if (processed_ % 100 == 0) { std::cout << "[preprocess] swscale frame " << out_frame->frame_id << " " << frame->width << "x" << frame->height << " -> " << out_w << "x" << out_h << "\n"; } } static AVPixelFormat ToAvFormat(PixelFormat fmt) { switch (fmt) { case PixelFormat::NV12: return AV_PIX_FMT_NV12; case PixelFormat::YUV420: return AV_PIX_FMT_YUV420P; case PixelFormat::RGB: return AV_PIX_FMT_RGB24; case PixelFormat::BGR: return AV_PIX_FMT_BGR24; default: return AV_PIX_FMT_NONE; } } static void SetupAvPlanes(const Frame* f, uint8_t* data[4], int linesize[4]) { if (!f->data) return; if (f->format == PixelFormat::NV12) { data[0] = f->planes[0].data ? f->planes[0].data : f->data; data[1] = f->planes[1].data ? f->planes[1].data : (f->data + f->width * f->height); linesize[0] = f->planes[0].stride > 0 ? f->planes[0].stride : f->width; linesize[1] = f->planes[1].stride > 0 ? f->planes[1].stride : f->width; } else if (f->format == PixelFormat::YUV420) { data[0] = f->planes[0].data ? f->planes[0].data : f->data; int y_size = f->width * f->height; int uv_size = y_size / 4; data[1] = f->planes[1].data ? f->planes[1].data : (f->data + y_size); data[2] = f->planes[2].data ? f->planes[2].data : (f->data + y_size + uv_size); linesize[0] = f->planes[0].stride > 0 ? f->planes[0].stride : f->width; linesize[1] = f->planes[1].stride > 0 ? f->planes[1].stride : f->width / 2; linesize[2] = f->planes[2].stride > 0 ? f->planes[2].stride : f->width / 2; } else { data[0] = f->data; linesize[0] = f->stride > 0 ? f->stride : f->width * 3; } } #endif void SetupPlanes(Frame& f, PixelFormat fmt) { if (!f.data) return; if (fmt == PixelFormat::NV12) { f.plane_count = 2; int y_stride = f.stride > 0 ? f.stride : f.width; if (y_stride <= 0) y_stride = f.width; size_t y_bytes = static_cast(y_stride) * static_cast(f.height); if (f.data_size > 0) { const size_t candidate = (f.data_size * 2) / 3; // total = Y + UV = Y*3/2 if (candidate >= y_bytes && candidate <= f.data_size && (candidate % static_cast(y_stride)) == 0) { y_bytes = candidate; } } size_t uv_bytes = y_bytes / 2; if (f.data_size > 0 && y_bytes + uv_bytes > f.data_size) { uv_bytes = f.data_size > y_bytes ? (f.data_size - y_bytes) : 0; } f.planes[0] = {f.data, y_stride, static_cast(y_bytes), 0}; f.planes[1] = {f.data + y_bytes, y_stride, static_cast(uv_bytes), static_cast(y_bytes)}; return; } if (fmt == PixelFormat::YUV420) { f.plane_count = 3; int y_stride = f.stride > 0 ? f.stride : f.width; if (y_stride <= 0) y_stride = f.width; size_t y_bytes = static_cast(y_stride) * static_cast(f.height); size_t hstride = static_cast(f.height); if (f.data_size > 0) { const size_t candidate = (f.data_size * 2) / 3; if (candidate >= y_bytes && candidate <= f.data_size && (candidate % static_cast(y_stride)) == 0) { y_bytes = candidate; hstride = y_bytes / static_cast(y_stride); } } size_t uv_stride = static_cast(y_stride) / 2; size_t uv_h = hstride / 2; size_t u_bytes = uv_stride * uv_h; size_t v_bytes = u_bytes; size_t need = y_bytes + u_bytes + v_bytes; if (f.data_size > 0 && need > f.data_size) { // Fallback to tightly packed layout. y_stride = f.width; y_bytes = static_cast(f.width) * static_cast(f.height); uv_stride = static_cast(f.width) / 2; uv_h = static_cast(f.height) / 2; u_bytes = uv_stride * uv_h; v_bytes = u_bytes; } f.planes[0] = {f.data, y_stride, static_cast(y_bytes), 0}; f.planes[1] = {f.data + y_bytes, static_cast(uv_stride), static_cast(u_bytes), static_cast(y_bytes)}; f.planes[2] = {f.data + y_bytes + u_bytes, static_cast(uv_stride), static_cast(v_bytes), static_cast(y_bytes + u_bytes)}; return; } // RGB/BGR f.plane_count = 1; int stride_bytes = f.stride > 0 ? f.stride : (f.width * 3); f.planes[0] = {f.data, stride_bytes, static_cast(f.data_size), 0}; } std::string id_; int dst_w_ = 640; int dst_h_ = 640; bool keep_ratio_ = false; PixelFormat dst_fmt_ = PixelFormat::UNKNOWN; bool use_rga_ = true; std::shared_ptr> input_queue_; std::vector>> output_queues_; uint64_t processed_ = 0; #if defined(RK3588_ENABLE_FFMPEG) SwsContext* sws_ctx_ = nullptr; int last_src_w_ = 0; int last_src_h_ = 0; AVPixelFormat last_src_fmt_ = AV_PIX_FMT_NONE; AVPixelFormat last_dst_fmt_ = AV_PIX_FMT_NONE; #endif }; REGISTER_NODE(PreprocessNode, "preprocess"); } // namespace rk3588