From 1c19ca1093ac0bd2b2cdfb8ba890929af24e7d41 Mon Sep 17 00:00:00 2001
From: sladro <sladro@163.com>
Date: Tue, 13 Jan 2026 19:52:26 +0800
Subject: [PATCH] =?UTF-8?q?=E8=A7=84=E8=8C=83=E4=BC=98=E5=8C=965?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 plugins/ai_face_det/ai_face_det_node.cpp     | 18 +++++++--
 plugins/ai_face_recog/ai_face_recog_node.cpp |  6 +++
 plugins/ai_yolo/ai_yolo_node.cpp             |  4 ++
 plugins/alarm/actions/clip_action.cpp        | 42 +++++++++++++++++++-
 src/ai_scheduler.cpp                         | 39 +++++++++++++++---
 5 files changed, 99 insertions(+), 10 deletions(-)
diff --git a/plugins/ai_face_det/ai_face_det_node.cpp b/plugins/ai_face_det/ai_face_det_node.cpp
index 185f944..0f3a288 100644
--- a/plugins/ai_face_det/ai_face_det_node.cpp
+++ b/plugins/ai_face_det/ai_face_det_node.cpp
@@ -14,6 +14,7 @@
 #include "ai_scheduler.h"
 #include "face/face_result.h"
 #include "node.h"
+#include "utils/dma_alloc.h"
 #include "utils/logger.h"
 
 namespace rk3588 {
@@ -557,6 +558,9 @@ private:
         const bool need_swap = (frame->format == PixelFormat::BGR && cfg->input_format == "rgb") ||
                                (frame->format == PixelFormat::RGB && cfg->input_format == "bgr");
 
+        const bool want_float_input = (cfg->input_dtype == "float" || cfg->input_dtype == "f32" ||
+                                       cfg->input_dtype == "float32");
+
         const int in_w = model_w_ > 0 ? model_w_ : src_w;
         const int in_h = model_h_ > 0 ? model_h_ : src_h;
         const size_t in_size = static_cast<size_t>(in_w) * static_cast<size_t>(in_h) * 3;
@@ -564,8 +568,14 @@ private:
         const uint8_t* input_ptr = nullptr;
 
         // Fast path: already packed, correct size, no channel swap.
-        if (!need_swap && src_w == in_w && src_h == in_h &&
-            static_cast<size_t>(src_stride) == src_row && frame->data_size >= src_row * static_cast<size_t>(src_h)) {
+        const bool fast_path = (!need_swap && src_w == in_w && src_h == in_h &&
+                                static_cast<size_t>(src_stride) == src_row &&
+                                frame->data_size >= src_row * static_cast<size_t>(src_h));
+
+        const bool sync_src = (frame->dma_fd >= 0) && (want_float_input || !fast_path);
+        if (sync_src) DmaSyncStartFd(frame->dma_fd);
+
+        if (fast_path) {
             input_ptr = src;
         } else {
             input_buf_.resize(in_size);
@@ -588,7 +598,7 @@ private:
         input.is_nhwc = true;
 
         // Default: keep existing UINT8 behavior.
-        if (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32") {
+        if (want_float_input) {
             float_input_buf_.resize(static_cast<size_t>(in_w) * static_cast<size_t>(in_h) * 3);
             const size_t pix = static_cast<size_t>(in_w) * static_cast<size_t>(in_h);
             const uint8_t* p = reinterpret_cast<const uint8_t*>(input_ptr);
@@ -615,6 +625,8 @@ private:
             input.type = RKNN_TENSOR_UINT8;
         }
 
+        if (sync_src) DmaSyncEndFd(frame->dma_fd);
+
         auto r = AiScheduler::Instance().InferBorrowed(model_handle_, input);
         if (!r.success) {
             LogWarn("[ai_face_det] inference failed: " + r.error);
diff --git a/plugins/ai_face_recog/ai_face_recog_node.cpp b/plugins/ai_face_recog/ai_face_recog_node.cpp
index 3754b0b..10285f7 100644
--- a/plugins/ai_face_recog/ai_face_recog_node.cpp
+++ b/plugins/ai_face_recog/ai_face_recog_node.cpp
@@ -15,6 +15,7 @@
 #include "ai_scheduler.h"
 #include "face/face_result.h"
 #include "node.h"
+#include "utils/dma_alloc.h"
 #include "utils/logger.h"
 
 #if defined(RK3588_ENABLE_SQLITE3)
@@ -826,6 +827,9 @@ private:
         const bool need_swap = (frame->format == PixelFormat::BGR && cfg->model_input_format == "rgb") ||
                                (frame->format == PixelFormat::RGB && cfg->model_input_format == "bgr");
 
+        const bool sync_src = (frame->dma_fd >= 0);
+        if (sync_src) DmaSyncStartFd(frame->dma_fd);
+
         FaceRecogResult rr;
         rr.img_w = w;
         rr.img_h = h;
@@ -925,6 +929,8 @@ private:
             rr.items.push_back(std::move(item));
         }
 
+        if (sync_src) DmaSyncEndFd(frame->dma_fd);
+
         frame->face_recog = std::make_shared<FaceRecogResult>(std::move(rr));
     }
 #endif
diff --git a/plugins/ai_yolo/ai_yolo_node.cpp b/plugins/ai_yolo/ai_yolo_node.cpp
index 337a9b9..66eaad9 100644
--- a/plugins/ai_yolo/ai_yolo_node.cpp
+++ b/plugins/ai_yolo/ai_yolo_node.cpp
@@ -11,6 +11,7 @@
 
 #include "ai_scheduler.h"
 #include "node.h"
+#include "utils/dma_alloc.h"
 #include "utils/logger.h"
 
 #if defined(RK3588_ENABLE_RKNN)
@@ -443,12 +444,15 @@ private:
                         ", h=" + std::to_string(h) + ")");
                 return;
             }
+
+            if (frame->dma_fd >= 0) DmaSyncStartFd(frame->dma_fd);
             rgb_tmp_.resize(packed_size);
             for (int y = 0; y < h; ++y) {
                 memcpy(rgb_tmp_.data() + static_cast<size_t>(y) * packed_row,
                        src + static_cast<size_t>(y) * static_cast<size_t>(src_stride),
                        packed_row);
             }
+            if (frame->dma_fd >= 0) DmaSyncEndFd(frame->dma_fd);
             input.data = rgb_tmp_.data();
             input.size = packed_size;
         }
diff --git a/plugins/alarm/actions/clip_action.cpp b/plugins/alarm/actions/clip_action.cpp
index 70d64c0..dc5be41 100644
--- a/plugins/alarm/actions/clip_action.cpp
+++ b/plugins/alarm/actions/clip_action.cpp
@@ -2,6 +2,7 @@
 
 #include <algorithm>
 #include <chrono>
+#include <cstdlib>
 #include <cstring>
 #include <ctime>
 #include <filesystem>
@@ -11,6 +12,12 @@
 #include <sstream>
 #include <thread>
 
+#if defined(_WIN32)
+#include <windows.h>
+#elif defined(__unix__) || defined(__APPLE__)
+#include <unistd.h>
+#endif
+
 #include "utils/logger.h"
 
 #if defined(RK3588_ENABLE_FFMPEG)
@@ -43,6 +50,39 @@ bool SafeLocalTime(std::time_t t, std::tm& out) {
 #endif
 }
 
+static std::filesystem::path CreateTempFilePath(const std::filesystem::path& dir, const std::string& prefix) {
+    std::filesystem::path base = dir;
+    if (base.empty()) base = ".";
+
+#if defined(_WIN32)
+    wchar_t tmp_dir[MAX_PATH];
+    DWORD n = GetTempPathW(MAX_PATH, tmp_dir);
+    if (n == 0 || n > MAX_PATH) {
+        return base / (prefix + "_tmp");
+    }
+    wchar_t tmp_file[MAX_PATH];
+    if (GetTempFileNameW(tmp_dir, L"clp", 0, tmp_file) == 0) {
+        return base / (prefix + "_tmp");
+    }
+    return std::filesystem::path(tmp_file);
+
+#elif defined(__unix__) || defined(__APPLE__)
+    std::string tmpl = (base / (prefix + "_XXXXXX")).string();
+    std::vector<char> buf(tmpl.begin(), tmpl.end());
+    buf.push_back('\0');
+    int fd = mkstemp(buf.data());
+    if (fd >= 0) close(fd);
+    if (fd < 0) {
+        return base / (prefix + "_tmp");
+    }
+    return std::filesystem::path(buf.data());
+
+#else
+    const auto now = std::chrono::high_resolution_clock::now().time_since_epoch().count();
+    return base / (prefix + "_" + std::to_string(static_cast<long long>(now)));
+#endif
+}
+
 static bool HasAnnexBStartCode(const uint8_t* d, size_t n) {
     if (!d || n < 4) return false;
     for (size_t i = 0; i + 3 < n; ++i) {
@@ -256,7 +296,7 @@ std::string ClipAction::ProcessClipFromPackets(const std::vector<std::shared_ptr
         LogWarn("[ClipAction] temp_directory_path failed; falling back to current directory");
         tmp_dir = ".";
     }
-    std::filesystem::path tmp_path = tmp_dir / ("clip_" + std::to_string(event.timestamp_ms) + "." + format_);
+    std::filesystem::path tmp_path = CreateTempFilePath(tmp_dir, "clip_" + std::to_string(event.timestamp_ms));
 
     AVFormatContext* fmt_ctx = nullptr;
     if (avformat_alloc_output_context2(&fmt_ctx, nullptr, format_.c_str(), tmp_path.string().c_str()) < 0 || !fmt_ctx) {
diff --git a/src/ai_scheduler.cpp b/src/ai_scheduler.cpp
index 78ba9dc..be81181 100644
--- a/src/ai_scheduler.cpp
+++ b/src/ai_scheduler.cpp
@@ -340,10 +340,24 @@ InferResult AiScheduler::Infer(ModelHandle handle, const InferInput& input) {
                                                static_cast<uint32_t>(input.size),
                                                offset);
         if (input_mem.mem) {
-            rknn_tensor_attr attr = ctx->input_attrs.empty() ? rknn_tensor_attr{} : ctx->input_attrs[0];
+            const rknn_tensor_attr model_attr = ctx->input_attrs.empty() ? rknn_tensor_attr{} : ctx->input_attrs[0];
+            const uint32_t required_size = (model_attr.size_with_stride > 0)
+                                               ? model_attr.size_with_stride
+                                               : model_attr.size;
+            const bool fmt_match = (ctx->input_attrs.empty()) ? false
+                                  : ((input.is_nhwc && model_attr.fmt == RKNN_TENSOR_NHWC) ||
+                                     (!input.is_nhwc && model_attr.fmt == RKNN_TENSOR_NCHW));
+            const bool type_match = (ctx->input_attrs.empty()) ? false : (model_attr.type == input.type);
+            const bool can_passthrough = fmt_match && type_match && required_size > 0 && input.size >= required_size;
+
+            rknn_tensor_attr attr = model_attr;
             attr.index = 0;
-            attr.type = input.type;
-            attr.fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
+            attr.pass_through = can_passthrough ? 1 : 0;
+            if (!can_passthrough) {
+                // Allow RKNN driver to convert/pack to model input when formats/types differ.
+                attr.type = input.type;
+                attr.fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
+            }
 
             const int mem_ret = rknn_set_io_mem(ctx->ctx, input_mem.mem, &attr);
             if (mem_ret == 0) {
@@ -496,10 +510,23 @@ AiScheduler::BorrowedInferResult AiScheduler::InferBorrowed(ModelHandle handle,
                                                static_cast<uint32_t>(input.size),
                                                offset);
         if (input_mem.mem) {
-            rknn_tensor_attr attr = ctx->input_attrs.empty() ? rknn_tensor_attr{} : ctx->input_attrs[0];
+            const rknn_tensor_attr model_attr = ctx->input_attrs.empty() ? rknn_tensor_attr{} : ctx->input_attrs[0];
+            const uint32_t required_size = (model_attr.size_with_stride > 0)
+                                               ? model_attr.size_with_stride
+                                               : model_attr.size;
+            const bool fmt_match = (ctx->input_attrs.empty()) ? false
+                                  : ((input.is_nhwc && model_attr.fmt == RKNN_TENSOR_NHWC) ||
+                                     (!input.is_nhwc && model_attr.fmt == RKNN_TENSOR_NCHW));
+            const bool type_match = (ctx->input_attrs.empty()) ? false : (model_attr.type == input.type);
+            const bool can_passthrough = fmt_match && type_match && required_size > 0 && input.size >= required_size;
+
+            rknn_tensor_attr attr = model_attr;
             attr.index = 0;
-            attr.type = input.type;
-            attr.fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
+            attr.pass_through = can_passthrough ? 1 : 0;
+            if (!can_passthrough) {
+                attr.type = input.type;
+                attr.fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
+            }
             const int mem_ret = rknn_set_io_mem(ctx->ctx, input_mem.mem, &attr);
             if (mem_ret == 0) {
                 used_io_mem = true;