性能优化3
This commit is contained in:
parent
de3b8a47eb
commit
00f254ae71
@ -30,7 +30,9 @@
|
||||
"dst_w": 640,
|
||||
"dst_h": 640,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "${name}",
|
||||
"use_rga": false
|
||||
},
|
||||
{
|
||||
@ -66,6 +68,7 @@
|
||||
"dst_h": "${src_h}",
|
||||
"dst_format": "nv12",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "${name}",
|
||||
"use_rga": false
|
||||
},
|
||||
{
|
||||
|
||||
@ -27,7 +27,9 @@
|
||||
"dst_w": 640,
|
||||
"dst_h": 640,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "cam1_sample_full_pipeline",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
@ -114,6 +116,7 @@
|
||||
"dst_h": 720,
|
||||
"dst_format": "nv12",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "cam1_sample_full_pipeline",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
@ -230,7 +233,9 @@
|
||||
"dst_w": 640,
|
||||
"dst_h": 640,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "cam2_sample_full_pipeline",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
@ -317,6 +322,7 @@
|
||||
"dst_h": 720,
|
||||
"dst_format": "nv12",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "cam2_sample_full_pipeline",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
|
||||
@ -31,7 +31,9 @@
|
||||
"dst_w": 640,
|
||||
"dst_h": 640,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "${name}",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
@ -68,6 +70,7 @@
|
||||
"dst_h": "${src_h}",
|
||||
"dst_format": "nv12",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "${name}",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
|
||||
@ -27,7 +27,9 @@
|
||||
"dst_w": 1280,
|
||||
"dst_h": 720,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "cam1_face_det_recog_test",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
@ -83,6 +85,7 @@
|
||||
"dst_h": 720,
|
||||
"dst_format": "nv12",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "cam1_face_det_recog_test",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
|
||||
@ -27,7 +27,9 @@
|
||||
"dst_w": 320,
|
||||
"dst_h": 320,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "cam1_face_det_test",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
@ -62,6 +64,7 @@
|
||||
"dst_h": 720,
|
||||
"dst_format": "nv12",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "cam1_face_det_test",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
|
||||
@ -27,7 +27,9 @@
|
||||
"dst_w": 640,
|
||||
"dst_h": 640,
|
||||
"dst_format": "rgb",
|
||||
"dst_packed": true,
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "cam1_strict_minio_alarm",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
@ -63,6 +65,7 @@
|
||||
"dst_h": 720,
|
||||
"dst_format": "nv12",
|
||||
"keep_ratio": false,
|
||||
"rga_gate": "cam1_strict_minio_alarm",
|
||||
"use_rga": true
|
||||
},
|
||||
{
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "node.h"
|
||||
@ -64,18 +65,13 @@ int GlobalRgaMaxInflight() {
|
||||
return v > 0 ? v : 1;
|
||||
}
|
||||
|
||||
void SetGlobalRgaMaxInflight(int v) {
|
||||
if (v <= 0) return;
|
||||
if (v > 32) v = 32;
|
||||
GlobalRgaMaxInflightRef().store(v);
|
||||
}
|
||||
|
||||
class RgaGate {
|
||||
public:
|
||||
explicit RgaGate(int max_inflight) : max_inflight_(max_inflight > 0 ? max_inflight : 1) {}
|
||||
|
||||
void Acquire() {
|
||||
const int max_inflight = GlobalRgaMaxInflight();
|
||||
std::unique_lock<std::mutex> lock(mu_);
|
||||
cv_.wait(lock, [&]() { return in_flight_ < max_inflight; });
|
||||
cv_.wait(lock, [&]() { return in_flight_ < max_inflight_; });
|
||||
++in_flight_;
|
||||
}
|
||||
|
||||
@ -87,23 +83,64 @@ public:
|
||||
cv_.notify_one();
|
||||
}
|
||||
|
||||
void SetMaxInflight(int v) {
|
||||
if (v <= 0) return;
|
||||
if (v > 32) v = 32;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
max_inflight_ = v;
|
||||
}
|
||||
cv_.notify_all();
|
||||
}
|
||||
|
||||
int MaxInflight() const {
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
return max_inflight_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mu_;
|
||||
mutable std::mutex mu_;
|
||||
std::condition_variable cv_;
|
||||
int in_flight_ = 0;
|
||||
int max_inflight_ = 1;
|
||||
};
|
||||
|
||||
RgaGate& GlobalRgaGate() {
|
||||
static RgaGate* g = new RgaGate();
|
||||
return *g;
|
||||
class RgaGateRegistry {
|
||||
public:
|
||||
static RgaGateRegistry& Instance() {
|
||||
static RgaGateRegistry* inst = new RgaGateRegistry();
|
||||
return *inst;
|
||||
}
|
||||
|
||||
RgaGate& Get(const std::string& key) {
|
||||
std::lock_guard<std::mutex> lock(mu_);
|
||||
auto it = gates_.find(key);
|
||||
if (it != gates_.end()) return *it->second;
|
||||
auto gate = std::make_unique<RgaGate>(GlobalRgaMaxInflight());
|
||||
RgaGate& ref = *gate;
|
||||
gates_.emplace(key, std::move(gate));
|
||||
return ref;
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mu_;
|
||||
std::unordered_map<std::string, std::unique_ptr<RgaGate>> gates_;
|
||||
};
|
||||
|
||||
RgaGate& GetRgaGate(const std::string& key) {
|
||||
const std::string k = key.empty() ? "global" : key;
|
||||
return RgaGateRegistry::Instance().Get(k);
|
||||
}
|
||||
|
||||
class ScopedRgaGate {
|
||||
public:
|
||||
ScopedRgaGate() { GlobalRgaGate().Acquire(); }
|
||||
~ScopedRgaGate() { GlobalRgaGate().Release(); }
|
||||
explicit ScopedRgaGate(const std::string& key) : gate_(&GetRgaGate(key)) { gate_->Acquire(); }
|
||||
~ScopedRgaGate() { gate_->Release(); }
|
||||
ScopedRgaGate(const ScopedRgaGate&) = delete;
|
||||
ScopedRgaGate& operator=(const ScopedRgaGate&) = delete;
|
||||
|
||||
private:
|
||||
RgaGate* gate_ = nullptr;
|
||||
};
|
||||
|
||||
void EnsureRgaInitializedOnce() {
|
||||
@ -276,16 +313,25 @@ public:
|
||||
dst_w_ = config.ValueOr<int>("dst_w", 640);
|
||||
dst_h_ = config.ValueOr<int>("dst_h", 640);
|
||||
keep_ratio_ = config.ValueOr<bool>("keep_ratio", false);
|
||||
dst_packed_ = config.ValueOr<bool>("dst_packed", false);
|
||||
|
||||
if (config.Find("dst_packed")) {
|
||||
dst_packed_ = config.ValueOr<bool>("dst_packed", false);
|
||||
dst_packed_explicit_ = true;
|
||||
} else {
|
||||
dst_packed_ = false;
|
||||
dst_packed_explicit_ = false;
|
||||
}
|
||||
|
||||
std::string fmt_str = config.ValueOr<std::string>("dst_format", "");
|
||||
if (!fmt_str.empty()) {
|
||||
dst_fmt_ = ParseFormat(fmt_str);
|
||||
}
|
||||
|
||||
#if defined(RK3588_ENABLE_RGA)
|
||||
rga_gate_ = config.ValueOr<std::string>("rga_gate", "global");
|
||||
const int rga_max_inflight = config.ValueOr<int>("rga_max_inflight", 0);
|
||||
if (rga_max_inflight > 0) {
|
||||
SetGlobalRgaMaxInflight(rga_max_inflight);
|
||||
GetRgaGate(rga_gate_).SetMaxInflight(rga_max_inflight);
|
||||
}
|
||||
#endif
|
||||
const bool requested_use_rga = config.ValueOr<bool>("use_rga", true);
|
||||
@ -324,8 +370,14 @@ public:
|
||||
}
|
||||
|
||||
bool Start() override {
|
||||
std::string extra;
|
||||
#if defined(RK3588_ENABLE_RGA)
|
||||
if (use_rga_) {
|
||||
extra = " gate=" + rga_gate_ + " max_inflight=" + std::to_string(GetRgaGate(rga_gate_).MaxInflight());
|
||||
}
|
||||
#endif
|
||||
LogInfo("[preprocess] start id=" + id_ + " dst=" + std::to_string(dst_w_) + "x" + std::to_string(dst_h_) +
|
||||
(use_rga_ ? " (rga)" : " (swscale)"));
|
||||
(use_rga_ ? " (rga)" : " (swscale)") + extra);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -447,7 +499,9 @@ private:
|
||||
int dst_wstride = Align16(out_w);
|
||||
// For AI input (RGB/BGR), allow a tightly packed output to avoid an extra per-frame memcpy
|
||||
// in downstream nodes (e.g. ai_yolo).
|
||||
if (dst_packed_ && (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR)) {
|
||||
const bool want_packed_rgb = (out_fmt == PixelFormat::RGB || out_fmt == PixelFormat::BGR) &&
|
||||
(!dst_packed_explicit_ || dst_packed_);
|
||||
if (want_packed_rgb) {
|
||||
dst_wstride = out_w;
|
||||
}
|
||||
int dst_hstride = Align16(out_h);
|
||||
@ -541,7 +595,7 @@ private:
|
||||
|
||||
auto RunRgaOnce = [&](int src_fd, std::string& err) -> bool {
|
||||
// Serialize/limit librga/im2d usage; multiple pipelines call RGA concurrently.
|
||||
ScopedRgaGate guard;
|
||||
ScopedRgaGate guard(rga_gate_);
|
||||
|
||||
src_buf = wrapbuffer_fd_t(src_fd, frame->width, frame->height,
|
||||
src_wstride, src_hstride, src_fmt_rga);
|
||||
@ -854,8 +908,11 @@ private:
|
||||
bool keep_ratio_ = false;
|
||||
PixelFormat dst_fmt_ = PixelFormat::UNKNOWN;
|
||||
bool dst_packed_ = false;
|
||||
bool dst_packed_explicit_ = false;
|
||||
bool use_rga_ = true;
|
||||
|
||||
std::string rga_gate_ = "global";
|
||||
|
||||
bool stats_log_ = false;
|
||||
uint64_t stats_interval_ = 100;
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user