rtsp_tensorrt/ref/tensorrt_engine.cpp
sladro e13cb3659c feat: 初始化项目结构
- 创建基本项目结构和目录
- 添加CMake构建系统
- 实现基础的配置解析功能
- 添加YOLO推理框架支持
- 集成RTSP和视频流处理功能
- 添加性能监控和日志系统
2024-12-24 16:25:03 +08:00

947 lines
35 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "inference/tensorrt_engine.hpp"
#include "common/logger.hpp"
#include <fstream>
#include <cuda_runtime.h>
#include <NvOnnxParser.h>
#include <dlfcn.h> // 用于动态库加载检查
#include <filesystem>
#include "common/cuda_helper.hpp"
// 重命名为 TRTLogger 避免冲突
class TRTLogger : public nvinfer1::ILogger {
void log(Severity severity, const char* msg) noexcept override {
switch (severity) {
case Severity::kINTERNAL_ERROR:
Logger::error(std::string("TensorRT Internal Error: ") + msg);
break;
case Severity::kERROR:
Logger::error(std::string("TensorRT Error: ") + msg);
break;
case Severity::kWARNING:
Logger::info(std::string("TensorRT Warning: ") + msg);
break;
default:
Logger::info(std::string("TensorRT Info: ") + msg);
break;
}
}
};
static TRTLogger gLogger; // 全局 logger 实例
class TensorRTEngine::Impl {
public:
nvinfer1::IRuntime* runtime = nullptr;
nvinfer1::ICudaEngine* engine = nullptr;
nvinfer1::IExecutionContext* context = nullptr;
cudaStream_t stream = nullptr;
void* buffers[2]; // 输入和输出缓冲区
int inputIndex;
int outputIndex;
int inputH = 640;
int inputW = 640;
int maxBatchSize = 1;
float* hostInput = nullptr;
float* hostOutput = nullptr;
// 添加 GPU 缓冲区
void* input_buffer = nullptr; // GPU 输入缓冲区
void* output_buffer = nullptr; // GPU 输出缓冲区
~Impl() {
if (runtime) delete runtime;
if (engine) delete engine;
if (context) delete context;
if (stream) cudaStreamDestroy(stream);
if (hostInput) delete[] hostInput;
if (hostOutput) delete[] hostOutput;
if (input_buffer) cudaFree(input_buffer);
if (output_buffer) cudaFree(output_buffer);
}
};
TensorRTEngine::TensorRTEngine(const std::string& model_path, int gpu_id) {
try {
Logger::info("TensorRTEngine constructor start");
// 验证参数
if (model_path.empty()) {
throw std::runtime_error("Model path is empty");
}
if (gpu_id < 0) {
throw std::runtime_error("Invalid GPU ID: " + std::to_string(gpu_id));
}
// 创建本地副本
model_path_ = std::string(model_path);
Logger::info("Parameters:");
Logger::info(" Model path: " + model_path_);
Logger::info(" GPU ID: " + std::to_string(gpu_id));
// 创建实现
pImpl = std::make_unique<Impl>();
if (!pImpl) {
throw std::runtime_error("Failed to create implementation");
}
// 检查模型文件
if (!std::filesystem::exists(model_path_)) {
throw std::runtime_error("Model file not found: " + model_path_);
}
auto file_size = std::filesystem::file_size(model_path_);
Logger::info("Model file exists, size: " + std::to_string(file_size) + " bytes");
// 初始化 CUDA
cudaError_t error = cudaSetDevice(gpu_id);
if (error != cudaSuccess) {
throw std::runtime_error("Failed to set CUDA device: " +
std::string(cudaGetErrorString(error)));
}
// 创建 CUDA 流
error = cudaStreamCreate(&pImpl->stream);
if (error != cudaSuccess) {
throw std::runtime_error("Failed to create CUDA stream: " +
std::string(cudaGetErrorString(error)));
}
// 加载模型
if (!loadModel()) {
throw std::runtime_error("Failed to load model");
}
Logger::info("TensorRTEngine constructor completed successfully");
}
catch (const std::exception& e) {
Logger::error("Error in TensorRTEngine constructor: " + std::string(e.what()));
throw;
}
}
TensorRTEngine::~TensorRTEngine() = default;
bool TensorRTEngine::convertONNX2TRT(const std::string& onnx_file) {
try {
Logger::info("=== Starting ONNX to TensorRT Conversion ===");
Logger::info("ONNX file: " + onnx_file);
// 使用与模型文件相同目录作为基准目录
std::filesystem::path model_dir = std::filesystem::path(onnx_file).parent_path();
std::filesystem::path engine_path = model_dir / "model.engine";
// 检查 engine 文件是否已存在
if (std::filesystem::exists(engine_path)) {
Logger::info("Found existing engine file: " + engine_path.string());
Logger::info("Size: " + std::to_string(std::filesystem::file_size(engine_path)) + " bytes");
return true;
}
Logger::info("Converting ONNX to TensorRT engine...");
// 检查文件是否存在
if (!std::filesystem::exists(onnx_file)) {
Logger::error("ONNX file does not exist: " + onnx_file);
return false;
}
// 获取文件大小
std::filesystem::path p(onnx_file);
auto file_size = std::filesystem::file_size(p);
Logger::info("ONNX file size: " + std::to_string(file_size) + " bytes");
// 创建 builder
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
if (!builder) {
throw std::runtime_error("Failed to create builder");
}
// 创建网络定义,启用显式批处理
const auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(explicitBatch);
if (!network) {
throw std::runtime_error("Failed to create network");
}
// 创建 ONNX 解析器
auto parser = nvonnxparser::createParser(*network, gLogger);
if (!parser) {
throw std::runtime_error("Failed to create parser");
}
// 解析 ONNX 文件
if (!parser->parseFromFile(onnx_file.c_str(),
static_cast<int>(nvinfer1::ILogger::Severity::kWARNING))) {
throw std::runtime_error("Failed to parse ONNX file");
}
// 获取网络输入
if (network->getNbInputs() == 0) {
throw std::runtime_error("Network has no inputs");
}
// 获取输入张量
nvinfer1::ITensor* input = network->getInput(0);
if (!input) {
throw std::runtime_error("Failed to get input tensor");
}
// 获取输入名称和维度
std::string inputName = input->getName();
Logger::info("Input tensor name: " + inputName);
// 获取输入维度
nvinfer1::Dims inputDims = input->getDimensions();
std::string dimStr = "Input dimensions: (";
for (int i = 0; i < inputDims.nbDims; i++) {
dimStr += std::to_string(inputDims.d[i]);
if (i < inputDims.nbDims - 1) dimStr += ", ";
}
dimStr += ")";
Logger::info(dimStr);
// 打印网络信息
Logger::info("Network layers:");
for (int i = 0; i < network->getNbLayers(); i++) {
auto layer = network->getLayer(i);
Logger::info("Layer " + std::to_string(i) + ": " + layer->getName());
// 打印每个层的输入维度
for (int j = 0; j < layer->getNbInputs(); j++) {
auto layerInput = layer->getInput(j);
if (layerInput) {
auto dims = layerInput->getDimensions();
std::string layerDimStr = " Input " + std::to_string(j) + " dims: (";
for (int k = 0; k < dims.nbDims; k++) {
layerDimStr += std::to_string(dims.d[k]);
if (k < dims.nbDims - 1) layerDimStr += ", ";
}
layerDimStr += ")";
Logger::info(layerDimStr);
}
}
}
// 创建构建配置
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
if (!config) {
throw std::runtime_error("Failed to create builder config");
}
// 设置 TensorRT 配置
config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, 1 << 30); // 1GB
config->setFlag(nvinfer1::BuilderFlag::kFP16); // 启用 FP16 精度
// 添加优化配置文件
nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile();
if (!profile) {
throw std::runtime_error("Failed to create optimization profile");
}
// 设置动态维度
nvinfer1::Dims minDims = inputDims;
nvinfer1::Dims optDims = inputDims;
nvinfer1::Dims maxDims = inputDims;
// 打印原始维度
Logger::info("Original dimensions:");
for (int i = 0; i < inputDims.nbDims; i++) {
Logger::info(" dim[" + std::to_string(i) + "] = " + std::to_string(inputDims.d[i]));
}
// 确保所有维度都是正数
for (int i = 0; i < inputDims.nbDims; i++) {
// 如果维度是 -1动态维度设置为合适的值
if (inputDims.d[i] == -1) {
if (i == 0) { // batch 维度
minDims.d[i] = 1;
optDims.d[i] = pImpl->maxBatchSize;
maxDims.d[i] = pImpl->maxBatchSize;
} else if (i == 1) { // channel 维度
minDims.d[i] = 3; // RGB
optDims.d[i] = 3;
maxDims.d[i] = 3;
} else if (i == 2) { // height 维度
minDims.d[i] = pImpl->inputH;
optDims.d[i] = pImpl->inputH;
maxDims.d[i] = pImpl->inputH;
} else if (i == 3) { // width 维度
minDims.d[i] = pImpl->inputW;
optDims.d[i] = pImpl->inputW;
maxDims.d[i] = pImpl->inputW;
} else {
minDims.d[i] = 1;
optDims.d[i] = 1;
maxDims.d[i] = 1;
}
} else {
// 如果不是动态维度,保持原值
minDims.d[i] = inputDims.d[i];
optDims.d[i] = inputDims.d[i];
maxDims.d[i] = inputDims.d[i];
}
}
// 打印设置的维度
Logger::info("Setting optimization profile dimensions:");
Logger::info("Min dimensions:");
for (int i = 0; i < minDims.nbDims; i++) {
Logger::info(" dim[" + std::to_string(i) + "] = " + std::to_string(minDims.d[i]));
}
Logger::info("Opt dimensions:");
for (int i = 0; i < optDims.nbDims; i++) {
Logger::info(" dim[" + std::to_string(i) + "] = " + std::to_string(optDims.d[i]));
}
Logger::info("Max dimensions:");
for (int i = 0; i < maxDims.nbDims; i++) {
Logger::info(" dim[" + std::to_string(i) + "] = " + std::to_string(maxDims.d[i]));
}
// 设置优化配置
if (!profile->setDimensions(inputName.c_str(), nvinfer1::OptProfileSelector::kMIN, minDims)) {
throw std::runtime_error("Failed to set minimum dimensions");
}
if (!profile->setDimensions(inputName.c_str(), nvinfer1::OptProfileSelector::kOPT, optDims)) {
throw std::runtime_error("Failed to set optimal dimensions");
}
if (!profile->setDimensions(inputName.c_str(), nvinfer1::OptProfileSelector::kMAX, maxDims)) {
throw std::runtime_error("Failed to set maximum dimensions");
}
config->addOptimizationProfile(profile);
// 构建引擎
Logger::info("Building TensorRT engine...");
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
if (!engine) {
throw std::runtime_error("Failed to build TensorRT engine");
}
// 序列化引擎
nvinfer1::IHostMemory* serializedEngine = engine->serialize();
std::ofstream engine_file(engine_path, std::ios::binary);
engine_file.write(static_cast<const char*>(serializedEngine->data()),
serializedEngine->size());
// 清理资源
delete serializedEngine;
delete engine;
delete config;
delete network;
delete parser;
delete builder;
Logger::info("Successfully converted ONNX to TensorRT engine");
Logger::info("=== Engine Conversion Completed ===");
Logger::info("Engine file saved to: " + engine_path.string());
Logger::info("Engine file size: " + std::to_string(std::filesystem::file_size(engine_path)) + " bytes");
return true;
}
catch (const std::exception& e) {
Logger::error("Error in convertONNX2TRT: " + std::string(e.what()));
return false;
}
}
bool TensorRTEngine::loadModel() {
try {
if (!pImpl) {
throw std::runtime_error("Implementation is null");
}
Logger::info("Loading model...");
// 使用与模型文件相同目录作为基准目录
std::filesystem::path model_dir = std::filesystem::path(model_path_).parent_path();
std::filesystem::path engine_path = model_dir / "model.engine";
// 检查引擎文件
if (!std::filesystem::exists(engine_path)) {
Logger::info("Engine file not found, converting from ONNX...");
if (!convertONNX2TRT(model_path_)) {
throw std::runtime_error("Failed to convert ONNX model");
}
}
// 加载 engine 文件
std::ifstream engine_file(engine_path, std::ios::binary);
if (!engine_file) {
throw std::runtime_error("Cannot open engine file: " + engine_path.string());
}
// 读取序列化的引擎文件
std::ifstream file(engine_path, std::ios::binary);
if (!file.good()) {
Logger::error("Failed to open engine file");
return false;
}
file.seekg(0, std::ios::end);
size_t size = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<char> engineData(size);
file.read(engineData.data(), size);
// 创建推理引擎
Logger::info("Creating TensorRT runtime...");
pImpl->runtime = nvinfer1::createInferRuntime(gLogger);
if (!pImpl->runtime) {
Logger::error("Failed to create TensorRT runtime");
return false;
}
Logger::info("Deserializing CUDA engine...");
pImpl->engine = pImpl->runtime->deserializeCudaEngine(engineData.data(), size);
if (!pImpl->engine) {
Logger::error("Failed to deserialize CUDA engine");
return false;
}
Logger::info("Creating execution context...");
pImpl->context = pImpl->engine->createExecutionContext();
if (!pImpl->context) {
Logger::error("Failed to create execution context");
return false;
}
// 获取输入输出张量信息
Logger::info("Getting tensor information...");
// 获取网络输入输出数量
int32_t nbIOTensors = pImpl->engine->getNbIOTensors();
Logger::info("Number of I/O tensors: " + std::to_string(nbIOTensors));
// 设置输入输出索引
pImpl->inputIndex = 0;
pImpl->outputIndex = 1;
// 获取输入张量信息
const char* inputName = pImpl->engine->getIOTensorName(pImpl->inputIndex);
if (!inputName) {
Logger::error("Failed to get input tensor name");
return false;
}
Logger::info("Input tensor name: " + std::string(inputName));
// 获取输入维度
auto dims = pImpl->engine->getTensorShape(inputName);
Logger::info("Input tensor dimensions: " + std::to_string(dims.nbDims) + " dimensions");
// 验证维度
if (dims.nbDims < 4) {
Logger::error("Invalid input dimensions: expected at least 4, got " +
std::to_string(dims.nbDims));
return false;
}
// 打印维度信息
std::string dimStr = "Input dimensions: (";
for (int i = 0; i < dims.nbDims; i++) {
dimStr += std::to_string(dims.d[i]);
if (i < dims.nbDims - 1) dimStr += ", ";
}
dimStr += ")";
Logger::info(dimStr);
// 设置输入尺寸
pImpl->inputH = dims.d[2];
pImpl->inputW = dims.d[3];
Logger::info("Input HxW: " + std::to_string(pImpl->inputH) + "x" +
std::to_string(pImpl->inputW));
// 获取输出张量信息
const char* outputName = pImpl->engine->getIOTensorName(pImpl->outputIndex);
if (!outputName) {
Logger::error("Failed to get output tensor name");
return false;
}
Logger::info("Output tensor name: " + std::string(outputName));
// 获取输出维度
auto outputDims = pImpl->engine->getTensorShape(outputName);
Logger::info("Output tensor dimensions: " + std::to_string(outputDims.nbDims) +
" dimensions");
std::string outDimStr = "Output dimensions: (";
for (int i = 0; i < outputDims.nbDims; i++) {
outDimStr += std::to_string(outputDims.d[i]);
if (i < outputDims.nbDims - 1) outDimStr += ", ";
}
outDimStr += ")";
Logger::info(outDimStr);
// 计算缓冲区大小
size_t inputSize = sizeof(float);
for (int i = 0; i < dims.nbDims; i++) {
inputSize *= (dims.d[i] > 0) ? static_cast<size_t>(dims.d[i]) : 1;
}
size_t outputSize = sizeof(float);
for (int i = 0; i < outputDims.nbDims; i++) {
outputSize *= (outputDims.d[i] > 0) ? static_cast<size_t>(outputDims.d[i]) : 1;
}
Logger::info("Allocating device memory...");
Logger::info("Input buffer size: " + std::to_string(inputSize) + " bytes");
Logger::info("Output buffer size: " + std::to_string(outputSize) + " bytes");
// 分配设备内存
cudaError_t error;
error = cudaMalloc(&pImpl->buffers[pImpl->inputIndex], inputSize);
if (error != cudaSuccess) {
Logger::error("Failed to allocate input buffer: " +
std::string(cudaGetErrorString(error)));
return false;
}
error = cudaMalloc(&pImpl->buffers[pImpl->outputIndex], outputSize);
if (error != cudaSuccess) {
Logger::error("Failed to allocate output buffer: " +
std::string(cudaGetErrorString(error)));
return false;
}
// 获取输出维度
nvinfer1::Dims output_dims = pImpl->engine->getTensorShape(pImpl->engine->getIOTensorName(1));
size_t total_output_size = 1;
for (int i = 0; i < output_dims.nbDims; i++) {
total_output_size *= output_dims.d[i];
}
Logger::info("Output tensor dimensions:");
Logger::info(" Number of dimensions: " + std::to_string(output_dims.nbDims));
for (int i = 0; i < output_dims.nbDims; i++) {
Logger::info(" Dimension " + std::to_string(i) + ": " + std::to_string(output_dims.d[i]));
}
Logger::info("Total output size: " + std::to_string(total_output_size));
// 分配主机输出缓冲区
pImpl->hostOutput = new float[total_output_size];
// 分配 GPU 缓冲区
size_t input_size = kMaxBatchSize * 3 * kInputH * kInputW * sizeof(float);
size_t output_size = kMaxBatchSize * total_output_size * sizeof(float);
// 使用 cudaMalloc 而不是 CUDA_CHECK 宏
error = cudaMalloc(&pImpl->input_buffer, input_size);
if (error != cudaSuccess) {
Logger::error("Failed to allocate input buffer: " +
std::string(cudaGetErrorString(error)));
return false;
}
error = cudaMalloc(&pImpl->output_buffer, output_size);
if (error != cudaSuccess) {
Logger::error("Failed to allocate output buffer: " +
std::string(cudaGetErrorString(error)));
cudaFree(pImpl->input_buffer);
pImpl->input_buffer = nullptr;
return false;
}
// 设置缓冲区指针
pImpl->buffers[pImpl->inputIndex] = pImpl->input_buffer;
pImpl->buffers[pImpl->outputIndex] = pImpl->output_buffer;
Logger::info("Model loaded successfully");
return true;
}
catch (const std::exception& e) {
Logger::error("Error in loadModel: " + std::string(e.what()));
return false;
}
}
void TensorRTEngine::preprocess(const cv::Mat& input_image, float* gpu_input) {
try {
Logger::info("Starting preprocessing...");
// 检查输入
if (input_image.empty()) {
throw std::runtime_error("Input image is empty");
}
if (!gpu_input) {
throw std::runtime_error("GPU input buffer is null");
}
// 调整图像大小
cv::Mat resized;
cv::resize(input_image, resized, cv::Size(pImpl->inputW, pImpl->inputH));
// BGR to RGB
cv::Mat rgb;
cv::cvtColor(resized, rgb, cv::COLOR_BGR2RGB);
// 转换为浮点型并归一化
cv::Mat float_img;
rgb.convertTo(float_img, CV_32F, 1.0/255.0);
// 分离通道
std::vector<cv::Mat> channels;
cv::split(float_img, channels);
// 检查通道数
if (channels.size() != 3) {
throw std::runtime_error("Expected 3 channels, got " +
std::to_string(channels.size()));
}
// 计算每通道的大小
size_t channel_size = pImpl->inputH * pImpl->inputW * sizeof(float);
// 复制数据到 GPU
for (int i = 0; i < 3; i++) {
cudaError_t error = cudaMemcpyAsync(
gpu_input + i * pImpl->inputH * pImpl->inputW,
channels[i].data,
channel_size,
cudaMemcpyHostToDevice,
pImpl->stream
);
if (error != cudaSuccess) {
throw std::runtime_error("Failed to copy channel " + std::to_string(i) +
" to GPU: " + cudaGetErrorString(error));
}
}
// 同步确保数据复制完成
cudaError_t error = cudaStreamSynchronize(pImpl->stream);
if (error != cudaSuccess) {
throw std::runtime_error("Failed to synchronize CUDA stream: " +
std::string(cudaGetErrorString(error)));
}
Logger::info("Preprocessing completed successfully");
}
catch (const std::exception& e) {
Logger::error("Error in preprocessing: " + std::string(e.what()));
throw;
}
}
bool TensorRTEngine::infer(const cv::Mat& input_image, std::vector<DetectionResult>& detections) {
try {
Logger::info("=== Starting Inference ===");
if (!pImpl->context || !pImpl->engine) {
Logger::error("TensorRT engine or context is null");
return false;
}
if (input_image.empty()) {
Logger::error("Input image is empty");
return false;
}
// 打印输入图像信息
Logger::info("Input image: " + std::to_string(input_image.cols) + "x" +
std::to_string(input_image.rows) + " channels: " +
std::to_string(input_image.channels()));
// 预处理
try {
preprocess(input_image, (float*)pImpl->buffers[pImpl->inputIndex]);
} catch (const std::exception& e) {
Logger::error("Error in preprocessing: " + std::string(e.what()));
return false;
}
// 执行推理
bool status = false;
try {
cudaStreamSynchronize(pImpl->stream); // 确保之前的操作完成
status = pImpl->context->executeV2(pImpl->buffers);
cudaStreamSynchronize(pImpl->stream); // 等待推理完成
} catch (const std::exception& e) {
Logger::error("Error during inference execution: " + std::string(e.what()));
return false;
}
if (!status) {
Logger::error("Failed to execute inference");
return false;
}
// 获取输出大小
nvinfer1::Dims output_dims = pImpl->engine->getTensorShape(pImpl->engine->getIOTensorName(1)); // 1 是输出索引
size_t output_size = 1;
for (int i = 0; i < output_dims.nbDims; i++) {
output_size *= output_dims.d[i];
}
// 复制结果回主机
cudaError_t error = cudaMemcpyAsync(
pImpl->hostOutput,
pImpl->buffers[pImpl->outputIndex],
output_size * sizeof(float), // 使用计算出的大小
cudaMemcpyDeviceToHost,
pImpl->stream
);
if (error != cudaSuccess) {
Logger::error("Failed to copy output data: " + std::string(cudaGetErrorString(error)) +
" (size: " + std::to_string(output_size) + ")");
return false;
}
// 同步等待结果
error = cudaStreamSynchronize(pImpl->stream);
if (error != cudaSuccess) {
Logger::error("Failed to synchronize CUDA stream: " +
std::string(cudaGetErrorString(error)));
return false;
}
// 后处理
try {
detections = postprocess(pImpl->hostOutput, 1);
} catch (const std::exception& e) {
Logger::error("Error in postprocessing: " + std::string(e.what()));
return false;
}
// 在检测结果后添加日志
if (!detections.empty()) {
Logger::info("=== Detection Results ===");
Logger::info("Found " + std::to_string(detections.size()) + " objects");
for (const auto& det : detections) {
Logger::info(" Object: shoe");
Logger::info(" Confidence: " + std::to_string(det.confidence));
Logger::info(" Box: (" + std::to_string(det.x1) + ", " +
std::to_string(det.y1) + ", " +
std::to_string(det.x2) + ", " +
std::to_string(det.y2) + ")");
}
}
// 保存检测结果
if (!detections.empty()) {
static int frame_count = 0;
frame_count++;
// 每100帧保存一次结果
if (frame_count % 100 == 0) {
cv::Mat output = input_image.clone();
// 绘制检测框
for (const auto& det : detections) {
cv::rectangle(output,
cv::Point(det.x1, det.y1),
cv::Point(det.x2, det.y2),
cv::Scalar(0, 255, 0), 2);
std::string label = "shoe: " + std::to_string(det.confidence);
cv::putText(output, label,
cv::Point(det.x1, det.y1 - 10),
cv::FONT_HERSHEY_SIMPLEX, 0.5,
cv::Scalar(0, 255, 0), 2);
}
// 保存图像
std::string filename = "results/detection_" +
std::to_string(frame_count) + ".jpg";
cv::imwrite(filename, output);
Logger::info("Saved detection result to: " + filename);
}
}
Logger::info("=== Inference Completed ===");
return true;
}
catch (const std::exception& e) {
Logger::error("Error during inference: " + std::string(e.what()));
return false;
}
}
std::vector<DetectionResult> TensorRTEngine::postprocess(float* output, int batch_size) {
std::vector<DetectionResult> results;
// 设置阈值
const float conf_threshold = 0.6f; // 提高置信度阈值
const float nms_threshold = 0.4f; // 降低 NMS 阈值
const float min_box_size = 20.0f; // 最小框尺寸(像素)
const float max_box_size = 416.0f; // 最大框尺寸(像素)
const int num_classes = 1; // 只有一个类别 (shoe)
const int num_boxes = 25200; // YOLOv5 输出的框数量
Logger::info("Post-processing parameters:");
Logger::info(" Confidence threshold: " + std::to_string(conf_threshold));
Logger::info(" NMS threshold: " + std::to_string(nms_threshold));
Logger::info(" Min box size: " + std::to_string(min_box_size));
Logger::info(" Max box size: " + std::to_string(max_box_size));
// 存储所有检测结果
std::vector<std::vector<DetectionResult>> class_detections(num_classes);
int total_boxes = 0;
int filtered_by_conf = 0;
int filtered_by_size = 0;
int filtered_by_nms = 0;
// 遍历所有预测框
for (int i = 0; i < num_boxes; i++) {
float* box = output + i * (5 + num_classes);
float confidence = box[4];
// 检查置信度
if (confidence < conf_threshold) {
filtered_by_conf++;
continue;
}
float class_score = box[5];
float final_score = confidence * class_score;
if (final_score > conf_threshold) {
DetectionResult det;
float cx = box[0] * pImpl->inputW;
float cy = box[1] * pImpl->inputH;
float w = box[2] * pImpl->inputW;
float h = box[3] * pImpl->inputH;
// 检查框的尺寸
if (w < min_box_size || h < min_box_size ||
w > max_box_size || h > max_box_size) {
filtered_by_size++;
continue;
}
det.x1 = std::max(0.0f, cx - w/2);
det.y1 = std::max(0.0f, cy - h/2);
det.x2 = std::min(float(pImpl->inputW), cx + w/2);
det.y2 = std::min(float(pImpl->inputH), cy + h/2);
det.confidence = final_score;
det.class_id = 0;
class_detections[0].push_back(det);
total_boxes++;
}
}
Logger::info("Detection statistics:");
Logger::info(" Total boxes processed: " + std::to_string(num_boxes));
Logger::info(" Filtered by confidence: " + std::to_string(filtered_by_conf));
Logger::info(" Filtered by size: " + std::to_string(filtered_by_size));
Logger::info(" Remaining after initial filtering: " + std::to_string(total_boxes));
// NMS
for (int c = 0; c < num_classes; c++) {
auto& dets = class_detections[c];
if (dets.empty()) continue;
std::sort(dets.begin(), dets.end(),
[](const DetectionResult& a, const DetectionResult& b) {
return a.confidence > b.confidence;
});
std::vector<bool> keep(dets.size(), true);
for (size_t i = 0; i < dets.size(); i++) {
if (!keep[i]) continue;
for (size_t j = i + 1; j < dets.size(); j++) {
if (!keep[j]) continue;
float iou = calculateIoU(dets[i], dets[j]);
if (iou > nms_threshold) {
keep[j] = false;
filtered_by_nms++;
}
}
}
for (size_t i = 0; i < dets.size(); i++) {
if (keep[i]) {
results.push_back(dets[i]);
}
}
}
Logger::info(" Filtered by NMS: " + std::to_string(filtered_by_nms));
Logger::info(" Final detection count: " + std::to_string(results.size()));
return results;
}
// 添加 IoU 计算函数
float TensorRTEngine::calculateIoU(const DetectionResult& a, const DetectionResult& b) {
float x1 = std::max(a.x1, b.x1);
float y1 = std::max(a.y1, b.y1);
float x2 = std::min(a.x2, b.x2);
float y2 = std::min(a.y2, b.y2);
if (x2 < x1 || y2 < y1) return 0.0f;
float intersection = (x2 - x1) * (y2 - y1);
float area_a = (a.x2 - a.x1) * (a.y2 - a.y1);
float area_b = (b.x2 - b.x1) * (b.y2 - b.y1);
return intersection / (area_a + area_b - intersection);
}
bool TensorRTEngine::inferGPU(float* gpu_input, std::vector<DetectionResult>& detections) {
try {
// 直接使用 GPU 内存中的数据进行推理
void* buffers[2] = {gpu_input, pImpl->output_buffer};
// 执行推理
bool status = false;
try {
cudaStreamSynchronize(pImpl->stream); // 确保之前的操作完成
status = pImpl->context->executeV2(buffers);
cudaStreamSynchronize(pImpl->stream); // 等待推理完成
} catch (const std::exception& e) {
Logger::error("Error during inference execution: " + std::string(e.what()));
return false;
}
if (!status) {
Logger::error("Failed to execute inference");
return false;
}
// 获取输出大小
nvinfer1::Dims output_dims = pImpl->engine->getTensorShape(pImpl->engine->getIOTensorName(1));
size_t output_size = 1;
for (int i = 0; i < output_dims.nbDims; i++) {
output_size *= output_dims.d[i];
}
// 复制结果回主机
cudaError_t error = cudaMemcpyAsync(
pImpl->hostOutput,
pImpl->output_buffer,
output_size * sizeof(float),
cudaMemcpyDeviceToHost,
pImpl->stream
);
if (error != cudaSuccess) {
Logger::error("Failed to copy output data: " + std::string(cudaGetErrorString(error)));
return false;
}
// 同步等待结果
error = cudaStreamSynchronize(pImpl->stream);
if (error != cudaSuccess) {
Logger::error("Failed to synchronize CUDA stream: " +
std::string(cudaGetErrorString(error)));
return false;
}
// 后处理
detections = postprocess(pImpl->hostOutput, 1);
return true;
}
catch (const std::exception& e) {
Logger::error("Error during GPU inference: " + std::string(e.what()));
return false;
}
}