性能优化3
Some checks failed
CI / host-build (push) Has been cancelled
CI / rk3588-cross-build (push) Has been cancelled

This commit is contained in:
sladro 2026-01-12 20:12:24 +08:00
parent de3b8a47eb
commit 00f254ae71
7 changed files with 97 additions and 19 deletions

View File

@ -30,7 +30,9 @@
"dst_w": 640,
"dst_h": 640,
"dst_format": "rgb",
"dst_packed": true,
"keep_ratio": false,
"rga_gate": "${name}",
"use_rga": false
},
{
@ -66,6 +68,7 @@
"dst_h": "${src_h}",
"dst_format": "nv12",
"keep_ratio": false,
"rga_gate": "${name}",
"use_rga": false
},
{

View File

@ -27,7 +27,9 @@
"dst_w": 640,
"dst_h": 640,
"dst_format": "rgb",
"dst_packed": true,
"keep_ratio": false,
"rga_gate": "cam1_sample_full_pipeline",
"use_rga": true
},
{
@ -114,6 +116,7 @@
"dst_h": 720,
"dst_format": "nv12",
"keep_ratio": false,
"rga_gate": "cam1_sample_full_pipeline",
"use_rga": true
},
{
@ -230,7 +233,9 @@
"dst_w": 640,
"dst_h": 640,
"dst_format": "rgb",
"dst_packed": true,
"keep_ratio": false,
"rga_gate": "cam2_sample_full_pipeline",
"use_rga": true
},
{
@ -317,6 +322,7 @@
"dst_h": 720,
"dst_format": "nv12",
"keep_ratio": false,
"rga_gate": "cam2_sample_full_pipeline",
"use_rga": true
},
{

View File

@ -31,7 +31,9 @@
"dst_w": 640,
"dst_h": 640,
"dst_format": "rgb",
"dst_packed": true,
"keep_ratio": false,
"rga_gate": "${name}",
"use_rga": true
},
{
@ -68,6 +70,7 @@
"dst_h": "${src_h}",
"dst_format": "nv12",
"keep_ratio": false,
"rga_gate": "${name}",
"use_rga": true
},
{

View File

@ -27,7 +27,9 @@
"dst_w": 1280,
"dst_h": 720,
"dst_format": "rgb",
"dst_packed": true,
"keep_ratio": false,
"rga_gate": "cam1_face_det_recog_test",
"use_rga": true
},
{
@ -83,6 +85,7 @@
"dst_h": 720,
"dst_format": "nv12",
"keep_ratio": false,
"rga_gate": "cam1_face_det_recog_test",
"use_rga": true
},
{

View File

@ -27,7 +27,9 @@
"dst_w": 320,
"dst_h": 320,
"dst_format": "rgb",
"dst_packed": true,
"keep_ratio": false,
"rga_gate": "cam1_face_det_test",
"use_rga": true
},
{
@ -62,6 +64,7 @@
"dst_h": 720,
"dst_format": "nv12",
"keep_ratio": false,
"rga_gate": "cam1_face_det_test",
"use_rga": true
},
{

View File

@ -27,7 +27,9 @@
"dst_w": 640,
"dst_h": 640,
"dst_format": "rgb",
"dst_packed": true,
"keep_ratio": false,
"rga_gate": "cam1_strict_minio_alarm",
"use_rga": true
},
{
@ -63,6 +65,7 @@
"dst_h": 720,
"dst_format": "nv12",
"keep_ratio": false,
"rga_gate": "cam1_strict_minio_alarm",
"use_rga": true
},
{

View File

@ -8,6 +8,7 @@
#include <memory>
#include <mutex>
#include <thread>
#include <unordered_map>
#include <vector>
#include "node.h"
@ -64,18 +65,13 @@ int GlobalRgaMaxInflight() {
return v > 0 ? v : 1;
}
void SetGlobalRgaMaxInflight(int v) {
if (v <= 0) return;
if (v > 32) v = 32;
GlobalRgaMaxInflightRef().store(v);
}
class RgaGate {
public:
explicit RgaGate(int max_inflight) : max_inflight_(max_inflight > 0 ? max_inflight : 1) {}
void Acquire() {
const int max_inflight = GlobalRgaMaxInflight();
std::unique_lock<std::mutex> lock(mu_);
cv_.wait(lock, [&]() { return in_flight_ < max_inflight; });
cv_.wait(lock, [&]() { return in_flight_ < max_inflight_; });
++in_flight_;
}
@ -87,23 +83,64 @@ public:
cv_.notify_one();
}
void SetMaxInflight(int v) {
if (v <= 0) return;
if (v > 32) v = 32;
{
std::lock_guard<std::mutex> lock(mu_);
max_inflight_ = v;
}
cv_.notify_all();
}
int MaxInflight() const {
std::lock_guard<std::mutex> lock(mu_);
return max_inflight_;
}
private:
std::mutex mu_;
mutable std::mutex mu_;
std::condition_variable cv_;
int in_flight_ = 0;
int max_inflight_ = 1;
};
RgaGate& GlobalRgaGate() {
static RgaGate* g = new RgaGate();
return *g;
class RgaGateRegistry {
public:
static RgaGateRegistry& Instance() {
static RgaGateRegistry* inst = new RgaGateRegistry();
return *inst;
}
RgaGate& Get(const std::string& key) {
std::lock_guard<std::mutex> lock(mu_);
auto it = gates_.find(key);
if (it != gates_.end()) return *it->second;
auto gate = std::make_unique<RgaGate>(GlobalRgaMaxInflight());
RgaGate& ref = *gate;
gates_.emplace(key, std::move(gate));
return ref;
}
private:
std::mutex mu_;
std::unordered_map<std::string, std::unique_ptr<RgaGate>> gates_;
};
RgaGate& GetRgaGate(const std::string& key) {
const std::string k = key.empty() ? "global" : key;
return RgaGateRegistry::Instance().Get(k);
}
class ScopedRgaGate {
public:
ScopedRgaGate() { GlobalRgaGate().Acquire(); }
~ScopedRgaGate() { GlobalRgaGate().Release(); }
explicit ScopedRgaGate(const std::string& key) : gate_(&GetRgaGate(key)) { gate_->Acquire(); }
~ScopedRgaGate() { gate_->Release(); }
ScopedRgaGate(const ScopedRgaGate&) = delete;
ScopedRgaGate& operator=(const ScopedRgaGate&) = delete;
private:
RgaGate* gate_ = nullptr;
};
void EnsureRgaInitializedOnce() {
@ -276,16 +313,25 @@ public:
dst_w_ = config.ValueOr<int>("dst_w", 640);
dst_h_ = config.ValueOr<int>("dst_h", 640);
keep_ratio_ = config.ValueOr<bool>("keep_ratio", false);
if (config.Find("dst_packed")) {
dst_packed_ = config.ValueOr<bool>("dst_packed", false);
dst_packed_explicit_ = true;
} else {
dst_packed_ = false;
dst_packed_explicit_ = false;
}
std::string fmt_str = config.ValueOr<std::string>("dst_format", "");
if (!fmt_str.empty()) {
dst_fmt_ = ParseFormat(fmt_str);
}
#if defined(RK3588_ENABLE_RGA)
rga_gate_ = config.ValueOr<std::string>("rga_gate", "global");
const int rga_max_inflight = config.ValueOr<int>("rga_max_inflight", 0);
if (rga_max_inflight > 0) {
SetGlobalRgaMaxInflight(rga_max_inflight);
GetRgaGate(rga_gate_).SetMaxInflight(rga_max_inflight);
}
#endif
const bool requested_use_rga = config.ValueOr<bool>("use_rga", true);
@ -324,8 +370,14 @@ public:
}
bool Start() override {
std::string extra;
#if defined(RK3588_ENABLE_RGA)
if (use_rga_) {
extra = " gate=" + rga_gate_ + " max_inflight=" + std::to_string(GetRgaGate(rga_gate_).MaxInflight());
}
#endif
LogInfo("[preprocess] start id=" + id_ + " dst=" + std::to_string(dst_w_) + "x" + std::to_string(dst_h_) +
(use_rga_ ? " (rga)" : " (swscale)"));
(use_rga_ ? " (rga)" : " (swscale)") + extra);
return true;
}
@ -447,7 +499,9 @@ private:
int dst_wstride = Align16(out_w);
// For AI input (RGB/BGR), allow a tightly packed output to avoid an extra per-frame memcpy
// in downstream nodes (e.g. ai_yolo).
if (dst_packed_ && (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR)) {
const bool want_packed_rgb = (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR) &&
(!dst_packed_explicit_ || dst_packed_);
if (want_packed_rgb) {
dst_wstride = out_w;
}
int dst_hstride = Align16(out_h);
@ -541,7 +595,7 @@ private:
auto RunRgaOnce = [&](int src_fd, std::string& err) -> bool {
// Serialize/limit librga/im2d usage; multiple pipelines call RGA concurrently.
ScopedRgaGate guard;
ScopedRgaGate guard(rga_gate_);
src_buf = wrapbuffer_fd_t(src_fd, frame->width, frame->height,
src_wstride, src_hstride, src_fmt_rga);
@ -854,8 +908,11 @@ private:
bool keep_ratio_ = false;
PixelFormat dst_fmt_ = PixelFormat::UNKNOWN;
bool dst_packed_ = false;
bool dst_packed_explicit_ = false;
bool use_rga_ = true;
std::string rga_gate_ = "global";
bool stats_log_ = false;
uint64_t stats_interval_ = 100;