From 03465b63073d3b95d0c7ec5e9a34f96f15c05d30 Mon Sep 17 00:00:00 2001 From: sladro Date: Fri, 26 Dec 2025 16:09:12 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BD=BF=E7=94=A8=20DMA-BUF=20=E5=88=86?= =?UTF-8?q?=E9=85=8D=E5=86=85=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 + include/utils/dma_alloc.h | 38 ++++++ plugins/CMakeLists.txt | 5 +- plugins/preprocess/preprocess_node.cpp | 35 ++++-- src/utils/dma_alloc.cpp | 161 +++++++++++++++++++++++++ 5 files changed, 229 insertions(+), 12 deletions(-) create mode 100644 include/utils/dma_alloc.h create mode 100644 src/utils/dma_alloc.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index e517d5f..318a1e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,8 @@ set(SRC_FILES src/media_server_app.cpp src/graph_manager.cpp src/plugin_loader.cpp + src/ai_scheduler.cpp + src/utils/dma_alloc.cpp ) add_executable(media-server ${SRC_FILES}) diff --git a/include/utils/dma_alloc.h b/include/utils/dma_alloc.h new file mode 100644 index 0000000..01fbf1d --- /dev/null +++ b/include/utils/dma_alloc.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include + +namespace rk3588 { + +struct DmaBuffer { + int fd = -1; + void* ptr = nullptr; + size_t size = 0; + + DmaBuffer() = default; + ~DmaBuffer(); + + DmaBuffer(const DmaBuffer&) = delete; + DmaBuffer& operator=(const DmaBuffer&) = delete; + + DmaBuffer(DmaBuffer&& other) noexcept; + DmaBuffer& operator=(DmaBuffer&& other) noexcept; + + uint8_t* data() { return static_cast(ptr); } + const uint8_t* data() const { return static_cast(ptr); } + bool valid() const { return fd >= 0 && ptr != nullptr; } +}; + +using DmaBufferPtr = std::shared_ptr; + +// Allocate DMA buffer using dma_heap +// Returns nullptr on failure +DmaBufferPtr DmaAlloc(size_t size); + +// Sync DMA buffer for CPU access +void DmaSyncStart(DmaBuffer* buf); +void DmaSyncEnd(DmaBuffer* buf); + +} // namespace rk3588 diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 6e6a6b7..d8cf6c8 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -156,7 +156,10 @@ set_target_properties(publish PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${RK_PLUGIN_OUTPUT_DIR} ) -add_library(preprocess SHARED preprocess/preprocess_node.cpp) +add_library(preprocess SHARED + preprocess/preprocess_node.cpp + ${CMAKE_SOURCE_DIR}/src/utils/dma_alloc.cpp +) target_include_directories(preprocess PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/third_party) target_link_libraries(preprocess PRIVATE project_options Threads::Threads) if(RK3588_ENABLE_FFMPEG) diff --git a/plugins/preprocess/preprocess_node.cpp b/plugins/preprocess/preprocess_node.cpp index 57a9e43..e5244da 100644 --- a/plugins/preprocess/preprocess_node.cpp +++ b/plugins/preprocess/preprocess_node.cpp @@ -7,6 +7,7 @@ #include #include "node.h" +#include "utils/dma_alloc.h" #if defined(RK3588_ENABLE_RGA) #include "im2d.hpp" @@ -193,7 +194,13 @@ private: continue; } - auto buffer = std::make_shared>(out_size); + // Use DMA-BUF allocation to avoid >4GB address issue with RGA + auto dma_buf = DmaAlloc(out_size); + if (!dma_buf || !dma_buf->valid()) { + std::cerr << "[preprocess] DMA alloc failed, falling back to std::vector\n"; + PushToDownstream(frame); + continue; + } // Calculate proper strides (RGA requires aligned strides) int src_wstride = frame->planes[0].stride > 0 ? frame->planes[0].stride @@ -216,17 +223,22 @@ private: continue; } - dst_buf = wrapbuffer_virtualaddr_t(buffer->data(), out_w, out_h, - dst_wstride, dst_hstride, dst_fmt_rga); + // Use DMA fd for destination buffer + dst_buf = wrapbuffer_fd_t(dma_buf->fd, out_w, out_h, + dst_wstride, dst_hstride, dst_fmt_rga); IM_STATUS status = IM_STATUS_SUCCESS; if (need_resize && need_cvt) { - // RGA3 can do resize + cvtcolor in one call via improcess - // But for simplicity, do resize first (output same format), then cvtcolor - auto tmp_buf = std::make_shared>(CalcImageSize(out_w, out_h, frame->format)); - rga_buffer_t tmp = wrapbuffer_virtualaddr_t(tmp_buf->data(), out_w, out_h, - dst_wstride, dst_hstride, src_fmt_rga); + // Allocate DMA buffer for intermediate result + auto tmp_dma = DmaAlloc(CalcImageSize(out_w, out_h, frame->format)); + if (!tmp_dma || !tmp_dma->valid()) { + std::cerr << "[preprocess] DMA alloc for tmp failed\n"; + PushToDownstream(frame); + continue; + } + rga_buffer_t tmp = wrapbuffer_fd_t(tmp_dma->fd, out_w, out_h, + dst_wstride, dst_hstride, src_fmt_rga); status = imresize(src_buf, tmp); if (status == IM_STATUS_SUCCESS) { status = imcvtcolor(tmp, dst_buf, src_fmt_rga, dst_fmt_rga, IM_COLOR_SPACE_DEFAULT); @@ -248,9 +260,10 @@ private: out_frame->height = out_h; out_frame->format = out_fmt; out_frame->stride = dst_wstride; - out_frame->data = buffer->data(); - out_frame->data_size = buffer->size(); - out_frame->data_owner = buffer; + out_frame->dma_fd = dma_buf->fd; + out_frame->data = dma_buf->data(); + out_frame->data_size = dma_buf->size; + out_frame->data_owner = dma_buf; // DmaBuffer shared_ptr keeps fd alive out_frame->pts = frame->pts; out_frame->frame_id = frame->frame_id; out_frame->det = frame->det; diff --git a/src/utils/dma_alloc.cpp b/src/utils/dma_alloc.cpp new file mode 100644 index 0000000..1523b1a --- /dev/null +++ b/src/utils/dma_alloc.cpp @@ -0,0 +1,161 @@ +#include "utils/dma_alloc.h" + +#include +#include +#include + +#if defined(__linux__) +#include +#include +#include +#include + +// dma_heap ioctl definitions (linux/dma-heap.h may not be available) +struct dma_heap_allocation_data { + uint64_t len; + uint32_t fd; + uint32_t fd_flags; + uint64_t heap_flags; +}; + +#define DMA_HEAP_IOC_MAGIC 'H' +#define DMA_HEAP_IOCTL_ALLOC _IOWR(DMA_HEAP_IOC_MAGIC, 0x0, struct dma_heap_allocation_data) + +// DMA-BUF sync ioctl +struct dma_buf_sync { + uint64_t flags; +}; + +#define DMA_BUF_SYNC_READ (1 << 0) +#define DMA_BUF_SYNC_WRITE (2 << 0) +#define DMA_BUF_SYNC_START (0 << 2) +#define DMA_BUF_SYNC_END (1 << 2) +#define DMA_BUF_BASE 'b' +#define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync) + +#endif + +namespace rk3588 { + +DmaBuffer::~DmaBuffer() { +#if defined(__linux__) + if (ptr && size > 0) { + munmap(ptr, size); + ptr = nullptr; + } + if (fd >= 0) { + close(fd); + fd = -1; + } +#endif + size = 0; +} + +DmaBuffer::DmaBuffer(DmaBuffer&& other) noexcept + : fd(other.fd), ptr(other.ptr), size(other.size) { + other.fd = -1; + other.ptr = nullptr; + other.size = 0; +} + +DmaBuffer& DmaBuffer::operator=(DmaBuffer&& other) noexcept { + if (this != &other) { +#if defined(__linux__) + if (ptr && size > 0) { + munmap(ptr, size); + } + if (fd >= 0) { + close(fd); + } +#endif + fd = other.fd; + ptr = other.ptr; + size = other.size; + other.fd = -1; + other.ptr = nullptr; + other.size = 0; + } + return *this; +} + +DmaBufferPtr DmaAlloc(size_t alloc_size) { +#if defined(__linux__) + // Try different dma_heap devices + const char* heap_paths[] = { + "/dev/dma_heap/system-uncached", + "/dev/dma_heap/system", + "/dev/dma_heap/cma", + nullptr + }; + + int heap_fd = -1; + for (int i = 0; heap_paths[i] != nullptr; ++i) { + heap_fd = open(heap_paths[i], O_RDWR | O_CLOEXEC); + if (heap_fd >= 0) { + break; + } + } + + if (heap_fd < 0) { + std::cerr << "[DmaAlloc] failed to open dma_heap device: " << strerror(errno) << "\n"; + return nullptr; + } + + dma_heap_allocation_data alloc_data{}; + alloc_data.len = alloc_size; + alloc_data.fd_flags = O_RDWR | O_CLOEXEC; + alloc_data.heap_flags = 0; + + if (ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &alloc_data) < 0) { + std::cerr << "[DmaAlloc] DMA_HEAP_IOCTL_ALLOC failed: " << strerror(errno) << "\n"; + close(heap_fd); + return nullptr; + } + + close(heap_fd); + + int dma_fd = static_cast(alloc_data.fd); + + void* ptr = mmap(nullptr, alloc_size, PROT_READ | PROT_WRITE, MAP_SHARED, dma_fd, 0); + if (ptr == MAP_FAILED) { + std::cerr << "[DmaAlloc] mmap failed: " << strerror(errno) << "\n"; + close(dma_fd); + return nullptr; + } + + auto buf = std::make_shared(); + buf->fd = dma_fd; + buf->ptr = ptr; + buf->size = alloc_size; + + return buf; +#else + (void)alloc_size; + std::cerr << "[DmaAlloc] not supported on this platform\n"; + return nullptr; +#endif +} + +void DmaSyncStart(DmaBuffer* buf) { +#if defined(__linux__) + if (!buf || buf->fd < 0) return; + dma_buf_sync sync{}; + sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE; + ioctl(buf->fd, DMA_BUF_IOCTL_SYNC, &sync); +#else + (void)buf; +#endif +} + +void DmaSyncEnd(DmaBuffer* buf) { +#if defined(__linux__) + if (!buf || buf->fd < 0) return; + dma_buf_sync sync{}; + sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE; + ioctl(buf->fd, DMA_BUF_IOCTL_SYNC, &sync); +#else + (void)buf; +#endif +} + +} // namespace rk3588