#include #include #include #include #include #include #include #include #include #include #include #include "node.h" #include "utils/dma_alloc.h" #include "utils/logger.h" #if defined(RK3588_ENABLE_RGA) #include "im2d.hpp" #include "im2d_buffer.h" #include "im2d_type.h" #endif #if defined(RK3588_ENABLE_FFMPEG) extern "C" { #include #include } #endif namespace rk3588 { namespace { inline int Align16(int v) { return (v + 15) & ~15; } #if defined(RK3588_ENABLE_RGA) int ToRgaFormat(PixelFormat fmt) { switch (fmt) { case PixelFormat::NV12: return RK_FORMAT_YCbCr_420_SP; case PixelFormat::YUV420: return RK_FORMAT_YCbCr_420_P; case PixelFormat::RGB: return RK_FORMAT_RGB_888; case PixelFormat::BGR: return RK_FORMAT_BGR_888; default: return RK_FORMAT_UNKNOWN; } } std::atomic& GlobalRgaMaxInflightRef() { static std::atomic v{2}; return v; } int GlobalRgaMaxInflight() { static std::once_flag once; std::call_once(once, []() { const char* s = std::getenv("RK3588_RGA_MAX_INFLIGHT"); if (!s || !*s) return; try { int v = std::stoi(s); if (v > 0 && v <= 32) GlobalRgaMaxInflightRef().store(v); } catch (...) { } }); int v = GlobalRgaMaxInflightRef().load(); return v > 0 ? v : 1; } void SetGlobalRgaMaxInflight(int v) { if (v <= 0) return; if (v > 32) v = 32; GlobalRgaMaxInflightRef().store(v); } class RgaGate { public: void Acquire() { const int max_inflight = GlobalRgaMaxInflight(); std::unique_lock lock(mu_); cv_.wait(lock, [&]() { return in_flight_ < max_inflight; }); ++in_flight_; } void Release() { std::lock_guard lock(mu_); if (in_flight_ > 0) { --in_flight_; } cv_.notify_one(); } private: std::mutex mu_; std::condition_variable cv_; int in_flight_ = 0; }; RgaGate& GlobalRgaGate() { static RgaGate* g = new RgaGate(); return *g; } class ScopedRgaGate { public: ScopedRgaGate() { GlobalRgaGate().Acquire(); } ~ScopedRgaGate() { GlobalRgaGate().Release(); } ScopedRgaGate(const ScopedRgaGate&) = delete; ScopedRgaGate& operator=(const ScopedRgaGate&) = delete; }; void EnsureRgaInitializedOnce() { static std::once_flag once; std::call_once(once, []() { const IM_STATUS st = imcheckHeader(); if (st != IM_STATUS_NOERROR && st != IM_STATUS_SUCCESS) { std::cerr << "[preprocess] imcheckHeader failed: " << imStrError(st) << "\n"; } }); } #endif PixelFormat ParseFormat(const std::string& s) { if (s == "nv12" || s == "NV12") return PixelFormat::NV12; if (s == "yuv420" || s == "YUV420") return PixelFormat::YUV420; if (s == "rgb" || s == "RGB") return PixelFormat::RGB; if (s == "bgr" || s == "BGR") return PixelFormat::BGR; return PixelFormat::UNKNOWN; } size_t CalcImageSize(int w, int h, PixelFormat fmt) { switch (fmt) { case PixelFormat::NV12: case PixelFormat::YUV420: return static_cast(w) * h * 3 / 2; case PixelFormat::RGB: case PixelFormat::BGR: return static_cast(w) * h * 3; default: return 0; } } size_t CalcImageSizeStrided(int wstride, int hstride, PixelFormat fmt) { if (wstride <= 0 || hstride <= 0) return 0; const size_t ws = static_cast(wstride); const size_t hs = static_cast(hstride); switch (fmt) { case PixelFormat::NV12: { // Y: ws*hs, UV: ws*(hs/2) return ws * hs + ws * (hs / 2); } case PixelFormat::YUV420: { // Y: ws*hs, U/V: (ws/2)*(hs/2) const size_t y = ws * hs; const size_t uv = (ws / 2) * (hs / 2); return y + uv + uv; } case PixelFormat::RGB: case PixelFormat::BGR: return ws * hs * 3; default: return 0; } } bool CopyToStridedBuffer(const Frame& src, uint8_t* dst, size_t dst_size, int dst_wstride, int dst_hstride) { if (!dst || dst_size == 0) return false; if (dst_wstride <= 0 || dst_hstride <= 0) return false; std::memset(dst, 0, dst_size); const int w = src.width; const int h = src.height; if (w <= 0 || h <= 0) return false; if (src.format == PixelFormat::NV12) { const size_t y_bytes = static_cast(dst_wstride) * dst_hstride; const size_t uv_bytes = static_cast(dst_wstride) * (dst_hstride / 2); if (y_bytes + uv_bytes > dst_size) return false; const uint8_t* src_y = src.planes[0].data ? src.planes[0].data : src.data; const uint8_t* src_uv = src.planes[1].data ? src.planes[1].data : nullptr; const int src_y_stride = src.planes[0].stride > 0 ? src.planes[0].stride : w; const int src_uv_stride = src.planes[1].stride > 0 ? src.planes[1].stride : w; if (!src_y) return false; if (!src_uv) { // Fallback: packed NV12 layout. if (!src.data) return false; src_uv = src.data + static_cast(src_y_stride) * static_cast(h); } for (int row = 0; row < h; ++row) { std::memcpy(dst + static_cast(row) * dst_wstride, src_y + static_cast(row) * src_y_stride, static_cast(w)); } uint8_t* dst_uv = dst + y_bytes; const int uv_rows = h / 2; for (int row = 0; row < uv_rows; ++row) { std::memcpy(dst_uv + static_cast(row) * dst_wstride, src_uv + static_cast(row) * src_uv_stride, static_cast(w)); } return true; } if (src.format == PixelFormat::YUV420) { const size_t y_bytes = static_cast(dst_wstride) * dst_hstride; const size_t uv_stride = static_cast(dst_wstride) / 2; const size_t uv_h = static_cast(dst_hstride) / 2; const size_t u_bytes = uv_stride * uv_h; const size_t v_bytes = u_bytes; if (y_bytes + u_bytes + v_bytes > dst_size) return false; const uint8_t* src_y = src.planes[0].data ? src.planes[0].data : src.data; const uint8_t* src_u = src.planes[1].data ? src.planes[1].data : nullptr; const uint8_t* src_v = src.planes[2].data ? src.planes[2].data : nullptr; const int src_y_stride = src.planes[0].stride > 0 ? src.planes[0].stride : w; const int src_u_stride = src.planes[1].stride > 0 ? src.planes[1].stride : (w / 2); const int src_v_stride = src.planes[2].stride > 0 ? src.planes[2].stride : (w / 2); if (!src_y || !src_u || !src_v) return false; for (int row = 0; row < h; ++row) { std::memcpy(dst + static_cast(row) * dst_wstride, src_y + static_cast(row) * src_y_stride, static_cast(w)); } uint8_t* dst_u = dst + y_bytes; uint8_t* dst_v = dst + y_bytes + u_bytes; const int uv_rows = h / 2; const int uv_cols = w / 2; for (int row = 0; row < uv_rows; ++row) { std::memcpy(dst_u + static_cast(row) * uv_stride, src_u + static_cast(row) * src_u_stride, static_cast(uv_cols)); std::memcpy(dst_v + static_cast(row) * uv_stride, src_v + static_cast(row) * src_v_stride, static_cast(uv_cols)); } return true; } if (src.format == PixelFormat::RGB || src.format == PixelFormat::BGR) { const size_t need = static_cast(dst_wstride) * dst_hstride * 3; if (need > dst_size) return false; const uint8_t* src_rgb = src.planes[0].data ? src.planes[0].data : src.data; const int src_stride = src.planes[0].stride > 0 ? src.planes[0].stride : (src.stride > 0 ? src.stride : w * 3); if (!src_rgb) return false; const size_t dst_stride = static_cast(dst_wstride) * 3; const size_t row_bytes = static_cast(w) * 3; for (int row = 0; row < h; ++row) { std::memcpy(dst + static_cast(row) * dst_stride, src_rgb + static_cast(row) * src_stride, row_bytes); } return true; } return false; } } // namespace class PreprocessNode : public INode { public: std::string Id() const override { return id_; } std::string Type() const override { return "preprocess"; } bool Init(const SimpleJson& config, const NodeContext& ctx) override { id_ = config.ValueOr("id", "preprocess"); dst_w_ = config.ValueOr("dst_w", 640); dst_h_ = config.ValueOr("dst_h", 640); keep_ratio_ = config.ValueOr("keep_ratio", false); dst_packed_ = config.ValueOr("dst_packed", false); std::string fmt_str = config.ValueOr("dst_format", ""); if (!fmt_str.empty()) { dst_fmt_ = ParseFormat(fmt_str); } #if defined(RK3588_ENABLE_RGA) const int rga_max_inflight = config.ValueOr("rga_max_inflight", 0); if (rga_max_inflight > 0) { SetGlobalRgaMaxInflight(rga_max_inflight); } #endif const bool requested_use_rga = config.ValueOr("use_rga", true); use_rga_ = requested_use_rga; if (const SimpleJson* dbg = config.Find("debug"); dbg && dbg->IsObject()) { stats_log_ = dbg->ValueOr("stats", stats_log_); stats_interval_ = std::max(1, static_cast(dbg->ValueOr("stats_interval", static_cast(stats_interval_)))); } input_queue_ = ctx.input_queue; if (!input_queue_) { std::cerr << "[preprocess] no input queue for node " << id_ << "\n"; return false; } if (ctx.output_queues.empty()) { std::cerr << "[preprocess] no output queue for node " << id_ << "\n"; return false; } output_queues_ = ctx.output_queues; #if !defined(RK3588_ENABLE_RGA) if (requested_use_rga) { std::cerr << "[preprocess] use_rga=true but RGA not enabled at build time\n"; return false; } use_rga_ = false; #endif #if !defined(RK3588_ENABLE_FFMPEG) if (!use_rga_) { std::cerr << "[preprocess] neither RGA nor FFmpeg enabled\n"; return false; } #endif return true; } bool Start() override { LogInfo("[preprocess] start id=" + id_ + " dst=" + std::to_string(dst_w_) + "x" + std::to_string(dst_h_) + (use_rga_ ? " (rga)" : " (swscale)")); return true; } void Stop() override { #if defined(RK3588_ENABLE_FFMPEG) if (sws_ctx_) { sws_freeContext(sws_ctx_); sws_ctx_ = nullptr; } #endif } NodeStatus Process(FramePtr frame) override { if (!frame) return NodeStatus::DROP; #if defined(RK3588_ENABLE_RGA) if (use_rga_) { if (!ProcessRga(frame)) { return NodeStatus::ERROR; } } else { ProcessSwscale(frame); } #elif defined(RK3588_ENABLE_FFMPEG) ProcessSwscale(frame); #else ProcessPassthrough(frame); #endif return NodeStatus::OK; } private: void PushToDownstream(FramePtr frame) { for (auto& q : output_queues_) { q->Push(frame); } } void WarnMetaResizeOnce(const FramePtr& frame, int out_w, int out_h) { if (warned_meta_resize_) return; if (!frame) return; if (frame->width == out_w && frame->height == out_h) return; if (!frame->det && !frame->face_det && !frame->face_recog) return; warned_meta_resize_ = true; LogWarn("[preprocess] resized frame but forwarded det/face meta without coordinate scaling; ensure det/recog/osd use same resolution (id=" + id_ + ")"); } void ProcessPassthrough(FramePtr frame) { PushToDownstream(frame); ++processed_; if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) { LogInfo("[preprocess] passthrough frame=" + std::to_string(frame->frame_id) + " id=" + id_); } } #if defined(RK3588_ENABLE_RGA) bool ProcessRga(FramePtr frame) { EnsureRgaInitializedOnce(); PixelFormat out_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format; int out_w = dst_w_; int out_h = dst_h_; if (keep_ratio_ && frame->width > 0 && frame->height > 0) { float scale = std::min(static_cast(dst_w_) / frame->width, static_cast(dst_h_) / frame->height); out_w = static_cast(frame->width * scale); out_h = static_cast(frame->height * scale); out_w = (out_w + 1) & ~1; out_h = (out_h + 1) & ~1; } int src_fmt_rga = ToRgaFormat(frame->format); int dst_fmt_rga = ToRgaFormat(out_fmt); bool need_cvt = (src_fmt_rga != dst_fmt_rga); bool need_resize = (frame->width != out_w || frame->height != out_h); if (need_resize) { WarnMetaResizeOnce(frame, out_w, out_h); } // If no processing needed, passthrough directly if (!need_cvt && !need_resize) { PushToDownstream(frame); ++processed_; if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) { LogInfo("[preprocess] passthrough frame=" + std::to_string(frame->frame_id) + " " + std::to_string(frame->width) + "x" + std::to_string(frame->height) + " (no change)" + " id=" + id_); } return true; } // Calculate proper strides. // IMPORTANT: For DMA-BUF frames (e.g. MPP decode output), the actual vertical stride // (ver_stride) may be larger than `height`. We must honor that, otherwise RGA will // read UV from the wrong offset (典型花屏/错色). int src_wstride = Align16(frame->width); int src_hstride = Align16(frame->height); if (frame->format == PixelFormat::NV12 || frame->format == PixelFormat::YUV420) { const int y_stride = frame->planes[0].stride > 0 ? frame->planes[0].stride : (frame->stride > 0 ? frame->stride : frame->width); if (y_stride > 0) src_wstride = y_stride; if (frame->planes[0].size > 0 && y_stride > 0) { const int hs = frame->planes[0].size / y_stride; if (hs >= frame->height) src_hstride = hs; } } else if (frame->format == PixelFormat::RGB || frame->format == PixelFormat::BGR) { const int stride_bytes = frame->planes[0].stride > 0 ? frame->planes[0].stride : (frame->stride > 0 ? frame->stride : frame->width * 3); if (stride_bytes > 0 && (stride_bytes % 3) == 0) { src_wstride = stride_bytes / 3; } if (frame->planes[0].size > 0 && stride_bytes > 0) { const int hs = frame->planes[0].size / stride_bytes; if (hs >= frame->height) src_hstride = hs; } } int dst_wstride = Align16(out_w); // For AI input (RGB/BGR), allow a tightly packed output to avoid an extra per-frame memcpy // in downstream nodes (e.g. ai_yolo). if (dst_packed_ && (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR)) { dst_wstride = out_w; } int dst_hstride = Align16(out_h); if (src_fmt_rga == RK_FORMAT_UNKNOWN || dst_fmt_rga == RK_FORMAT_UNKNOWN) { std::cerr << "[preprocess] unsupported format for RGA\n"; return false; } size_t out_size = CalcImageSizeStrided(dst_wstride, dst_hstride, out_fmt); if (out_size == 0) { std::cerr << "[preprocess] invalid output size for RGA\n"; return false; } // Use DMA-BUF allocation to avoid >4GB address issue with RGA auto dma_buf = DmaAlloc(out_size); if (!dma_buf || !dma_buf->valid()) { std::cerr << "[preprocess] DMA alloc failed\n"; return false; } if (stats_log_ && processed_ < 3) { LogInfo("[preprocess] src: " + std::to_string(frame->width) + "x" + std::to_string(frame->height) + " fmt=" + std::to_string(static_cast(frame->format)) + " rga_fmt=" + std::to_string(src_fmt_rga) + " wstride=" + std::to_string(src_wstride) + " hstride=" + std::to_string(src_hstride) + " data_size=" + std::to_string(frame->data_size)); LogInfo("[preprocess] dst: " + std::to_string(out_w) + "x" + std::to_string(out_h) + " fmt=" + std::to_string(static_cast(out_fmt)) + " rga_fmt=" + std::to_string(dst_fmt_rga) + " wstride=" + std::to_string(dst_wstride) + " hstride=" + std::to_string(dst_hstride) + " out_size=" + std::to_string(out_size)); } rga_buffer_t src_buf{}; rga_buffer_t dst_buf{}; DmaBufferPtr src_dma_buf; // keep alive if we allocate/copy // Lazily allocate tmp buffer only when we must fall back to a 2-step pipeline. // (resize + cvtcolor). Prefer a single improcess() call to reduce scheduling overhead. DmaBufferPtr tmp_dma; const bool can_cpu_read_src = (frame->data != nullptr && frame->data_size > 0); auto CopySrcToDmaIfPossible = [&]() -> bool { if (!can_cpu_read_src) return false; // Use tight/aligned strides for the copied source buffer. const int copy_wstride = Align16(frame->width); const int copy_hstride = Align16(frame->height); const size_t src_size = CalcImageSizeStrided(copy_wstride, copy_hstride, frame->format); src_dma_buf = DmaAlloc(src_size); if (!src_dma_buf || !src_dma_buf->valid()) { return false; } // If source is a DMA-BUF, sync it before CPU reads. if (frame->dma_fd >= 0) { DmaSyncStartFd(frame->dma_fd); } // CPU writes to a DMA-BUF must be flushed before RGA reads it. DmaSyncStartFd(src_dma_buf->fd); const bool ok = CopyToStridedBuffer(*frame, src_dma_buf->data(), src_dma_buf->size, copy_wstride, copy_hstride); DmaSyncEndFd(src_dma_buf->fd); if (frame->dma_fd >= 0) { DmaSyncEndFd(frame->dma_fd); } if (!ok) { src_dma_buf.reset(); return false; } // Update strides used for RGA to match the copied buffer. src_wstride = copy_wstride; src_hstride = copy_hstride; return true; }; // If there's no DMA fd, we must allocate/copy into a DMA-BUF for RGA. if (frame->dma_fd < 0) { if (!CopySrcToDmaIfPossible()) { std::cerr << "[preprocess] no dma_fd and src copy failed\n"; return false; } } auto RunRgaOnce = [&](int src_fd, std::string& err) -> bool { // Serialize/limit librga/im2d usage; multiple pipelines call RGA concurrently. ScopedRgaGate guard; src_buf = wrapbuffer_fd_t(src_fd, frame->width, frame->height, src_wstride, src_hstride, src_fmt_rga); dst_buf = wrapbuffer_fd_t(dma_buf->fd, out_w, out_h, dst_wstride, dst_hstride, dst_fmt_rga); auto Check = [&](const rga_buffer_t& s, const rga_buffer_t& d) -> bool { const im_rect sr{0, 0, s.width, s.height}; const im_rect dr{0, 0, d.width, d.height}; // Do NOT call the imcheck(...) variadic macro with 0 extra args: // under -Wpedantic/-std=c++11 it expands to a zero-sized array. rga_buffer_t pat{}; const im_rect pr{0, 0, 0, 0}; const IM_STATUS chk = imcheck_t(s, d, pat, sr, dr, pr, 0); if (chk != IM_STATUS_NOERROR && chk != IM_STATUS_SUCCESS) { err = std::string("RGA imcheck failed: ") + imStrError(chk); return false; } return true; }; IM_STATUS status = IM_STATUS_SUCCESS; if (need_resize && need_cvt) { // Try to fuse resize + CSC in a single call. if (Check(src_buf, dst_buf)) { rga_buffer_t pat{}; const im_rect sr{0, 0, src_buf.width, src_buf.height}; const im_rect dr{0, 0, dst_buf.width, dst_buf.height}; const im_rect pr{0, 0, 0, 0}; status = improcess(src_buf, dst_buf, pat, sr, dr, pr, 0, nullptr, nullptr, IM_SYNC); if (status == IM_STATUS_SUCCESS) { return true; } } // Fallback: 2-step (resize + cvtcolor). if (!tmp_dma || !tmp_dma->valid()) { tmp_dma = DmaAlloc(CalcImageSizeStrided(dst_wstride, dst_hstride, frame->format)); if (!tmp_dma || !tmp_dma->valid()) { err = "DMA alloc for tmp failed"; return false; } } rga_buffer_t tmp = wrapbuffer_fd_t(tmp_dma->fd, out_w, out_h, dst_wstride, dst_hstride, src_fmt_rga); if (!Check(src_buf, tmp) || !Check(tmp, dst_buf)) { return false; } status = imresize(src_buf, tmp, 0, 0, 0, 1, nullptr); if (status == IM_STATUS_SUCCESS) { status = imcvtcolor(tmp, dst_buf, src_fmt_rga, dst_fmt_rga, IM_COLOR_SPACE_DEFAULT, 1, nullptr); } } else if (need_resize) { if (!Check(src_buf, dst_buf)) { return false; } status = imresize(src_buf, dst_buf, 0, 0, 0, 1, nullptr); } else if (need_cvt) { if (!Check(src_buf, dst_buf)) { return false; } status = imcvtcolor(src_buf, dst_buf, src_fmt_rga, dst_fmt_rga, IM_COLOR_SPACE_DEFAULT, 1, nullptr); } if (status != IM_STATUS_SUCCESS) { err = std::string("RGA failed: ") + imStrError(status); return false; } return true; }; std::string rga_err; const int src_fd = (src_dma_buf && src_dma_buf->valid()) ? src_dma_buf->fd : frame->dma_fd; if (src_fd < 0 || !RunRgaOnce(src_fd, rga_err)) { std::cerr << "[preprocess] " << (rga_err.empty() ? "RGA failed" : rga_err) << "\n"; return false; } auto out_frame = std::make_shared(); out_frame->width = out_w; out_frame->height = out_h; out_frame->format = out_fmt; out_frame->stride = (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR) ? (dst_wstride * 3) : dst_wstride; out_frame->dma_fd = dma_buf->fd; out_frame->data = dma_buf->data(); out_frame->data_size = dma_buf->size; out_frame->data_owner = dma_buf; // DmaBuffer shared_ptr keeps fd alive out_frame->pts = frame->pts; out_frame->frame_id = frame->frame_id; out_frame->det = frame->det; out_frame->face_det = frame->face_det; out_frame->face_recog = frame->face_recog; out_frame->user_meta = frame->user_meta; SetupPlanes(*out_frame, out_fmt); PushToDownstream(out_frame); ++processed_; if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) { LogInfo("[preprocess] rga frame=" + std::to_string(out_frame->frame_id) + " " + std::to_string(frame->width) + "x" + std::to_string(frame->height) + " -> " + std::to_string(out_w) + "x" + std::to_string(out_h)); } return true; } #endif #if defined(RK3588_ENABLE_FFMPEG) void ProcessSwscale(FramePtr frame) { PixelFormat out_fmt = (dst_fmt_ != PixelFormat::UNKNOWN) ? dst_fmt_ : frame->format; int out_w = dst_w_; int out_h = dst_h_; if (keep_ratio_ && frame->width > 0 && frame->height > 0) { float scale = std::min(static_cast(dst_w_) / frame->width, static_cast(dst_h_) / frame->height); out_w = static_cast(frame->width * scale); out_h = static_cast(frame->height * scale); out_w = (out_w + 1) & ~1; out_h = (out_h + 1) & ~1; } if (frame->width != out_w || frame->height != out_h) { WarnMetaResizeOnce(frame, out_w, out_h); } AVPixelFormat src_av_fmt = ToAvFormat(frame->format); AVPixelFormat dst_av_fmt = ToAvFormat(out_fmt); if (src_av_fmt == AV_PIX_FMT_NONE || dst_av_fmt == AV_PIX_FMT_NONE) { PushToDownstream(frame); return; } if (!sws_ctx_ || frame->width != last_src_w_ || frame->height != last_src_h_ || src_av_fmt != last_src_fmt_ || dst_av_fmt != last_dst_fmt_) { if (sws_ctx_) sws_freeContext(sws_ctx_); sws_ctx_ = sws_getContext(frame->width, frame->height, src_av_fmt, out_w, out_h, dst_av_fmt, SWS_BILINEAR, nullptr, nullptr, nullptr); last_src_w_ = frame->width; last_src_h_ = frame->height; last_src_fmt_ = src_av_fmt; last_dst_fmt_ = dst_av_fmt; } if (!sws_ctx_) { PushToDownstream(frame); return; } size_t out_size = CalcImageSize(out_w, out_h, out_fmt); auto buffer = std::make_shared>(out_size); uint8_t* src_data[4] = {nullptr}; int src_linesize[4] = {0}; uint8_t* dst_data[4] = {nullptr}; int dst_linesize[4] = {0}; SetupAvPlanes(frame.get(), src_data, src_linesize); av_image_fill_arrays(dst_data, dst_linesize, buffer->data(), dst_av_fmt, out_w, out_h, 1); sws_scale(sws_ctx_, src_data, src_linesize, 0, frame->height, dst_data, dst_linesize); auto out_frame = std::make_shared(); out_frame->width = out_w; out_frame->height = out_h; out_frame->format = out_fmt; out_frame->stride = (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR) ? (out_w * 3) : out_w; out_frame->data = buffer->data(); out_frame->data_size = buffer->size(); out_frame->data_owner = buffer; out_frame->pts = frame->pts; out_frame->frame_id = frame->frame_id; out_frame->det = frame->det; out_frame->face_det = frame->face_det; out_frame->face_recog = frame->face_recog; out_frame->user_meta = frame->user_meta; SetupPlanes(*out_frame, out_fmt); PushToDownstream(out_frame); ++processed_; if (stats_log_ && stats_interval_ > 0 && (processed_ % stats_interval_) == 0) { LogInfo("[preprocess] swscale frame=" + std::to_string(out_frame->frame_id) + " " + std::to_string(frame->width) + "x" + std::to_string(frame->height) + " -> " + std::to_string(out_w) + "x" + std::to_string(out_h) + " id=" + id_); } } static AVPixelFormat ToAvFormat(PixelFormat fmt) { switch (fmt) { case PixelFormat::NV12: return AV_PIX_FMT_NV12; case PixelFormat::YUV420: return AV_PIX_FMT_YUV420P; case PixelFormat::RGB: return AV_PIX_FMT_RGB24; case PixelFormat::BGR: return AV_PIX_FMT_BGR24; default: return AV_PIX_FMT_NONE; } } static void SetupAvPlanes(const Frame* f, uint8_t* data[4], int linesize[4]) { if (!f->data) return; if (f->format == PixelFormat::NV12) { data[0] = f->planes[0].data ? f->planes[0].data : f->data; data[1] = f->planes[1].data ? f->planes[1].data : (f->data + f->width * f->height); linesize[0] = f->planes[0].stride > 0 ? f->planes[0].stride : f->width; linesize[1] = f->planes[1].stride > 0 ? f->planes[1].stride : f->width; } else if (f->format == PixelFormat::YUV420) { data[0] = f->planes[0].data ? f->planes[0].data : f->data; int y_size = f->width * f->height; int uv_size = y_size / 4; data[1] = f->planes[1].data ? f->planes[1].data : (f->data + y_size); data[2] = f->planes[2].data ? f->planes[2].data : (f->data + y_size + uv_size); linesize[0] = f->planes[0].stride > 0 ? f->planes[0].stride : f->width; linesize[1] = f->planes[1].stride > 0 ? f->planes[1].stride : f->width / 2; linesize[2] = f->planes[2].stride > 0 ? f->planes[2].stride : f->width / 2; } else { data[0] = f->data; linesize[0] = f->stride > 0 ? f->stride : f->width * 3; } } #endif void SetupPlanes(Frame& f, PixelFormat fmt) { if (!f.data) return; if (fmt == PixelFormat::NV12) { f.plane_count = 2; int y_stride = f.stride > 0 ? f.stride : f.width; if (y_stride <= 0) y_stride = f.width; size_t y_bytes = static_cast(y_stride) * static_cast(f.height); if (f.data_size > 0) { const size_t candidate = (f.data_size * 2) / 3; // total = Y + UV = Y*3/2 if (candidate >= y_bytes && candidate <= f.data_size && (candidate % static_cast(y_stride)) == 0) { y_bytes = candidate; } } size_t uv_bytes = y_bytes / 2; if (f.data_size > 0 && y_bytes + uv_bytes > f.data_size) { uv_bytes = f.data_size > y_bytes ? (f.data_size - y_bytes) : 0; } f.planes[0] = {f.data, y_stride, static_cast(y_bytes), 0}; f.planes[1] = {f.data + y_bytes, y_stride, static_cast(uv_bytes), static_cast(y_bytes)}; return; } if (fmt == PixelFormat::YUV420) { f.plane_count = 3; int y_stride = f.stride > 0 ? f.stride : f.width; if (y_stride <= 0) y_stride = f.width; size_t y_bytes = static_cast(y_stride) * static_cast(f.height); size_t hstride = static_cast(f.height); if (f.data_size > 0) { const size_t candidate = (f.data_size * 2) / 3; if (candidate >= y_bytes && candidate <= f.data_size && (candidate % static_cast(y_stride)) == 0) { y_bytes = candidate; hstride = y_bytes / static_cast(y_stride); } } size_t uv_stride = static_cast(y_stride) / 2; size_t uv_h = hstride / 2; size_t u_bytes = uv_stride * uv_h; size_t v_bytes = u_bytes; size_t need = y_bytes + u_bytes + v_bytes; if (f.data_size > 0 && need > f.data_size) { // Fallback to tightly packed layout. y_stride = f.width; y_bytes = static_cast(f.width) * static_cast(f.height); uv_stride = static_cast(f.width) / 2; uv_h = static_cast(f.height) / 2; u_bytes = uv_stride * uv_h; v_bytes = u_bytes; } f.planes[0] = {f.data, y_stride, static_cast(y_bytes), 0}; f.planes[1] = {f.data + y_bytes, static_cast(uv_stride), static_cast(u_bytes), static_cast(y_bytes)}; f.planes[2] = {f.data + y_bytes + u_bytes, static_cast(uv_stride), static_cast(v_bytes), static_cast(y_bytes + u_bytes)}; return; } // RGB/BGR f.plane_count = 1; int stride_bytes = f.stride > 0 ? f.stride : (f.width * 3); f.planes[0] = {f.data, stride_bytes, static_cast(f.data_size), 0}; } std::string id_; int dst_w_ = 640; int dst_h_ = 640; bool keep_ratio_ = false; PixelFormat dst_fmt_ = PixelFormat::UNKNOWN; bool dst_packed_ = false; bool use_rga_ = true; bool stats_log_ = false; uint64_t stats_interval_ = 100; bool warned_meta_resize_ = false; std::shared_ptr> input_queue_; std::vector>> output_queues_; uint64_t processed_ = 0; #if defined(RK3588_ENABLE_FFMPEG) SwsContext* sws_ctx_ = nullptr; int last_src_w_ = 0; int last_src_h_ = 0; AVPixelFormat last_src_fmt_ = AV_PIX_FMT_NONE; AVPixelFormat last_dst_fmt_ = AV_PIX_FMT_NONE; #endif }; REGISTER_NODE(PreprocessNode, "preprocess"); } // namespace rk3588