规范优化5

This commit is contained in:
sladro 2026-01-13 19:52:26 +08:00
parent 1e5b964a7c
commit 1c19ca1093
5 changed files with 99 additions and 10 deletions

View File

@ -14,6 +14,7 @@
#include "ai_scheduler.h"
#include "face/face_result.h"
#include "node.h"
#include "utils/dma_alloc.h"
#include "utils/logger.h"
namespace rk3588 {
@ -557,6 +558,9 @@ private:
const bool need_swap = (frame->format == PixelFormat::BGR && cfg->input_format == "rgb") ||
(frame->format == PixelFormat::RGB && cfg->input_format == "bgr");
const bool want_float_input = (cfg->input_dtype == "float" || cfg->input_dtype == "f32" ||
cfg->input_dtype == "float32");
const int in_w = model_w_ > 0 ? model_w_ : src_w;
const int in_h = model_h_ > 0 ? model_h_ : src_h;
const size_t in_size = static_cast<size_t>(in_w) * static_cast<size_t>(in_h) * 3;
@ -564,8 +568,14 @@ private:
const uint8_t* input_ptr = nullptr;
// Fast path: already packed, correct size, no channel swap.
if (!need_swap && src_w == in_w && src_h == in_h &&
static_cast<size_t>(src_stride) == src_row && frame->data_size >= src_row * static_cast<size_t>(src_h)) {
const bool fast_path = (!need_swap && src_w == in_w && src_h == in_h &&
static_cast<size_t>(src_stride) == src_row &&
frame->data_size >= src_row * static_cast<size_t>(src_h));
const bool sync_src = (frame->dma_fd >= 0) && (want_float_input || !fast_path);
if (sync_src) DmaSyncStartFd(frame->dma_fd);
if (fast_path) {
input_ptr = src;
} else {
input_buf_.resize(in_size);
@ -588,7 +598,7 @@ private:
input.is_nhwc = true;
// Default: keep existing UINT8 behavior.
if (cfg->input_dtype == "float" || cfg->input_dtype == "f32" || cfg->input_dtype == "float32") {
if (want_float_input) {
float_input_buf_.resize(static_cast<size_t>(in_w) * static_cast<size_t>(in_h) * 3);
const size_t pix = static_cast<size_t>(in_w) * static_cast<size_t>(in_h);
const uint8_t* p = reinterpret_cast<const uint8_t*>(input_ptr);
@ -615,6 +625,8 @@ private:
input.type = RKNN_TENSOR_UINT8;
}
if (sync_src) DmaSyncEndFd(frame->dma_fd);
auto r = AiScheduler::Instance().InferBorrowed(model_handle_, input);
if (!r.success) {
LogWarn("[ai_face_det] inference failed: " + r.error);

View File

@ -15,6 +15,7 @@
#include "ai_scheduler.h"
#include "face/face_result.h"
#include "node.h"
#include "utils/dma_alloc.h"
#include "utils/logger.h"
#if defined(RK3588_ENABLE_SQLITE3)
@ -826,6 +827,9 @@ private:
const bool need_swap = (frame->format == PixelFormat::BGR && cfg->model_input_format == "rgb") ||
(frame->format == PixelFormat::RGB && cfg->model_input_format == "bgr");
const bool sync_src = (frame->dma_fd >= 0);
if (sync_src) DmaSyncStartFd(frame->dma_fd);
FaceRecogResult rr;
rr.img_w = w;
rr.img_h = h;
@ -925,6 +929,8 @@ private:
rr.items.push_back(std::move(item));
}
if (sync_src) DmaSyncEndFd(frame->dma_fd);
frame->face_recog = std::make_shared<FaceRecogResult>(std::move(rr));
}
#endif

View File

@ -11,6 +11,7 @@
#include "ai_scheduler.h"
#include "node.h"
#include "utils/dma_alloc.h"
#include "utils/logger.h"
#if defined(RK3588_ENABLE_RKNN)
@ -443,12 +444,15 @@ private:
", h=" + std::to_string(h) + ")");
return;
}
if (frame->dma_fd >= 0) DmaSyncStartFd(frame->dma_fd);
rgb_tmp_.resize(packed_size);
for (int y = 0; y < h; ++y) {
memcpy(rgb_tmp_.data() + static_cast<size_t>(y) * packed_row,
src + static_cast<size_t>(y) * static_cast<size_t>(src_stride),
packed_row);
}
if (frame->dma_fd >= 0) DmaSyncEndFd(frame->dma_fd);
input.data = rgb_tmp_.data();
input.size = packed_size;
}

View File

@ -2,6 +2,7 @@
#include <algorithm>
#include <chrono>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <filesystem>
@ -11,6 +12,12 @@
#include <sstream>
#include <thread>
#if defined(_WIN32)
#include <windows.h>
#elif defined(__unix__) || defined(__APPLE__)
#include <unistd.h>
#endif
#include "utils/logger.h"
#if defined(RK3588_ENABLE_FFMPEG)
@ -43,6 +50,39 @@ bool SafeLocalTime(std::time_t t, std::tm& out) {
#endif
}
static std::filesystem::path CreateTempFilePath(const std::filesystem::path& dir, const std::string& prefix) {
std::filesystem::path base = dir;
if (base.empty()) base = ".";
#if defined(_WIN32)
wchar_t tmp_dir[MAX_PATH];
DWORD n = GetTempPathW(MAX_PATH, tmp_dir);
if (n == 0 || n > MAX_PATH) {
return base / (prefix + "_tmp");
}
wchar_t tmp_file[MAX_PATH];
if (GetTempFileNameW(tmp_dir, L"clp", 0, tmp_file) == 0) {
return base / (prefix + "_tmp");
}
return std::filesystem::path(tmp_file);
#elif defined(__unix__) || defined(__APPLE__)
std::string tmpl = (base / (prefix + "_XXXXXX")).string();
std::vector<char> buf(tmpl.begin(), tmpl.end());
buf.push_back('\0');
int fd = mkstemp(buf.data());
if (fd >= 0) close(fd);
if (fd < 0) {
return base / (prefix + "_tmp");
}
return std::filesystem::path(buf.data());
#else
const auto now = std::chrono::high_resolution_clock::now().time_since_epoch().count();
return base / (prefix + "_" + std::to_string(static_cast<long long>(now)));
#endif
}
static bool HasAnnexBStartCode(const uint8_t* d, size_t n) {
if (!d || n < 4) return false;
for (size_t i = 0; i + 3 < n; ++i) {
@ -256,7 +296,7 @@ std::string ClipAction::ProcessClipFromPackets(const std::vector<std::shared_ptr
LogWarn("[ClipAction] temp_directory_path failed; falling back to current directory");
tmp_dir = ".";
}
std::filesystem::path tmp_path = tmp_dir / ("clip_" + std::to_string(event.timestamp_ms) + "." + format_);
std::filesystem::path tmp_path = CreateTempFilePath(tmp_dir, "clip_" + std::to_string(event.timestamp_ms));
AVFormatContext* fmt_ctx = nullptr;
if (avformat_alloc_output_context2(&fmt_ctx, nullptr, format_.c_str(), tmp_path.string().c_str()) < 0 || !fmt_ctx) {

View File

@ -340,10 +340,24 @@ InferResult AiScheduler::Infer(ModelHandle handle, const InferInput& input) {
static_cast<uint32_t>(input.size),
offset);
if (input_mem.mem) {
rknn_tensor_attr attr = ctx->input_attrs.empty() ? rknn_tensor_attr{} : ctx->input_attrs[0];
const rknn_tensor_attr model_attr = ctx->input_attrs.empty() ? rknn_tensor_attr{} : ctx->input_attrs[0];
const uint32_t required_size = (model_attr.size_with_stride > 0)
? model_attr.size_with_stride
: model_attr.size;
const bool fmt_match = (ctx->input_attrs.empty()) ? false
: ((input.is_nhwc && model_attr.fmt == RKNN_TENSOR_NHWC) ||
(!input.is_nhwc && model_attr.fmt == RKNN_TENSOR_NCHW));
const bool type_match = (ctx->input_attrs.empty()) ? false : (model_attr.type == input.type);
const bool can_passthrough = fmt_match && type_match && required_size > 0 && input.size >= required_size;
rknn_tensor_attr attr = model_attr;
attr.index = 0;
attr.type = input.type;
attr.fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
attr.pass_through = can_passthrough ? 1 : 0;
if (!can_passthrough) {
// Allow RKNN driver to convert/pack to model input when formats/types differ.
attr.type = input.type;
attr.fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
}
const int mem_ret = rknn_set_io_mem(ctx->ctx, input_mem.mem, &attr);
if (mem_ret == 0) {
@ -496,10 +510,23 @@ AiScheduler::BorrowedInferResult AiScheduler::InferBorrowed(ModelHandle handle,
static_cast<uint32_t>(input.size),
offset);
if (input_mem.mem) {
rknn_tensor_attr attr = ctx->input_attrs.empty() ? rknn_tensor_attr{} : ctx->input_attrs[0];
const rknn_tensor_attr model_attr = ctx->input_attrs.empty() ? rknn_tensor_attr{} : ctx->input_attrs[0];
const uint32_t required_size = (model_attr.size_with_stride > 0)
? model_attr.size_with_stride
: model_attr.size;
const bool fmt_match = (ctx->input_attrs.empty()) ? false
: ((input.is_nhwc && model_attr.fmt == RKNN_TENSOR_NHWC) ||
(!input.is_nhwc && model_attr.fmt == RKNN_TENSOR_NCHW));
const bool type_match = (ctx->input_attrs.empty()) ? false : (model_attr.type == input.type);
const bool can_passthrough = fmt_match && type_match && required_size > 0 && input.size >= required_size;
rknn_tensor_attr attr = model_attr;
attr.index = 0;
attr.type = input.type;
attr.fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
attr.pass_through = can_passthrough ? 1 : 0;
if (!can_passthrough) {
attr.type = input.type;
attr.fmt = input.is_nhwc ? RKNN_TENSOR_NHWC : RKNN_TENSOR_NCHW;
}
const int mem_ret = rknn_set_io_mem(ctx->ctx, input_mem.mem, &attr);
if (mem_ret == 0) {
used_io_mem = true;