rtsp_tensorrt/tests/test_trt_inference.cpp

#include <gtest/gtest.h>
#include "pipeline/inference/trt_inference.hpp"
#include "pipeline/common/yaml_config_parser.hpp"
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <fstream>
#include <sys/stat.h>
#include <filesystem>

using namespace pipeline;

class TrtInferenceTest : public ::testing::Test {
protected:
    static void SetUpTestSuite() {
        // 初始化CUDA环境
        cudaError_t err = cudaSetDevice(0);
        if (err != cudaSuccess) {
            throw std::runtime_error("Failed to set CUDA device: " +
                                   std::string(cudaGetErrorString(err)));
        }

        // 重置设备以确保清洁状态
        err = cudaDeviceReset();
        if (err != cudaSuccess) {
            throw std::runtime_error("Failed to reset CUDA device: " +
                                   std::string(cudaGetErrorString(err)));
        }

        // 设置设备标志，启用内存池
        err = cudaSetDeviceFlags(cudaDeviceScheduleAuto |
                               cudaDeviceLmemResizeToMax |
                               cudaDeviceMapHost);
        if (err != cudaSuccess) {
            throw std::runtime_error("Failed to set device flags: " +
                                   std::string(cudaGetErrorString(err)));
        }

        // 设置缓存限制
        size_t limit = 128 * 1024 * 1024;  // 128MB
        err = cudaDeviceSetLimit(cudaLimitMallocHeapSize, limit);
        if (err != cudaSuccess) {
            throw std::runtime_error("Failed to set malloc heap size limit: " +
                                   std::string(cudaGetErrorString(err)));
        }
    }

    static void TearDownTestSuite() {
        // 确保所有CUDA操作完成
        cudaDeviceSynchronize();
        // 清理缓存
        cudaDeviceReset();
    }

    void SetUp() override {
        // 配置测试参数
        config_.model.input_shape = {3, 640, 640};  // 修改为引擎期望的尺寸
        config_.model.engine_path = "/app/models/yolov8n.engine";
        config_.model.precision = "FP16";
        config_.max_batch_size = 1;
        config_.threshold.conf = 0.5f;
        config_.threshold.nms = 0.45f;
        config_.gpu_id = 0;
        config_.workspace_size = 1ULL << 26;  // 64MB

        // 创建CUDA流
        cudaError_t err = cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking);
        if (err != cudaSuccess) {
            throw std::runtime_error("Failed to create CUDA stream in SetUp: " +
                                   std::string(cudaGetErrorString(err)));
        }

        // 准备测试图像（使用正确的输入尺寸）
        test_image_ = cv::Mat(640, 640, CV_8UC3, cv::Scalar(0, 0, 0));
        cv::randu(test_image_, cv::Scalar(0, 0, 0), cv::Scalar(255, 255, 255));

        // 清空CUDA错误状态
        err = cudaGetLastError();
        if (err != cudaSuccess) {
            throw std::runtime_error("CUDA error in SetUp: " +
                                   std::string(cudaGetErrorString(err)));
        }
    }

    void TearDown() override {
        try {
            // 同步并销毁CUDA流
            if (stream_) {
                cudaStreamSynchronize(stream_);
                cudaStreamDestroy(stream_);
                stream_ = nullptr;
            }

            // 释放图像资源
            test_image_.release();

            // 清理CUDA缓存
            cudaDeviceSynchronize();
            cudaMemPool_t mempool;
            if (cudaDeviceGetDefaultMemPool(&mempool, 0) == cudaSuccess) {
                cudaMemPoolTrimTo(mempool, 0);
            }

            // 清理CUDA错误状态
            cudaGetLastError();
        } catch (const std::exception& e) {
            std::cerr << "Exception in TearDown: " << e.what() << std::endl;
        }
    }

    void checkCudaError(const char* message) {
        // 首先同步流
        if (stream_) {
            cudaError_t sync_err = cudaStreamSynchronize(stream_);
            if (sync_err != cudaSuccess) {
                ADD_FAILURE() << "Stream synchronization failed: " << cudaGetErrorString(sync_err);
                throw std::runtime_error("Stream synchronization failed: " +
                                       std::string(cudaGetErrorString(sync_err)));
            }
        }

        // 然后检查错误
        cudaError_t err = cudaGetLastError();
        if (err != cudaSuccess) {
            ADD_FAILURE() << message << ": " << cudaGetErrorString(err);
            throw std::runtime_error(std::string(message) + ": " + cudaGetErrorString(err));
        }
    }

    InferenceConfig config_;
    cv::Mat test_image_;
    cudaStream_t stream_{nullptr};
};

// 基本功能测试
TEST_F(TrtInferenceTest, BasicInference) {
    try {
        // 创建推理对象
        std::unique_ptr<TrtInference> inference;
        ASSERT_NO_THROW(inference = std::make_unique<TrtInference>(config_));
        checkCudaError("After creating TrtInference");

        // 加载引擎
        ASSERT_TRUE(inference->loadEngine()) << "Engine loading failed";
        checkCudaError("After loading engine");

        // 准备输入数据
        std::vector<cv::Mat> images{test_image_};

        // 执行推理
        std::vector<DetectionResult> results;
        ASSERT_TRUE(inference->infer(images, results)) << "Inference failed";
        checkCudaError("After inference");

        // 确保资源被正确释放
        inference.reset();
        cudaStreamSynchronize(stream_);
        checkCudaError("After releasing inference object");

    } catch (const std::exception& e) {
        FAIL() << "Exception: " << e.what();
    }
}

// 批处理测试
TEST_F(TrtInferenceTest, BatchProcessing) {
    try {
        std::unique_ptr<TrtInference> inference;
        ASSERT_NO_THROW(inference = std::make_unique<TrtInference>(config_));
        checkCudaError("After creating TrtInference");

        ASSERT_TRUE(inference->loadEngine());
        checkCudaError("After loading engine");

        // 创建批处理输入（使用较小的批量）
        std::vector<cv::Mat> images;
        images.push_back(test_image_.clone());  // 只使用一个图像进行测试

        std::vector<DetectionResult> results;
        ASSERT_TRUE(inference->infer(images, results));
        checkCudaError("After inference");

        EXPECT_EQ(results.size(), images.size());

        // 清理资源
        images.clear();
        inference.reset();
        cudaStreamSynchronize(stream_);
        checkCudaError("After batch processing");

    } catch (const std::exception& e) {
        FAIL() << "Exception: " << e.what();
    }
}

// 错误处理测试
TEST_F(TrtInferenceTest, ErrorHandling) {
    try {
        std::unique_ptr<TrtInference> inference;
        ASSERT_NO_THROW(inference = std::make_unique<TrtInference>(config_));
        checkCudaError("After creating TrtInference");

        ASSERT_TRUE(inference->loadEngine());
        checkCudaError("After loading engine");

        std::vector<DetectionResult> results;

        // 测试空输入
        {
            std::vector<cv::Mat> empty_images;
            EXPECT_FALSE(inference->infer(empty_images, results));
            checkCudaError("After testing empty input");
        }

        // 测试超出批大小
        {
            std::vector<cv::Mat> too_many_images(2, test_image_);  // 只使用2张图像
            EXPECT_FALSE(inference->infer(too_many_images, results));
            checkCudaError("After testing batch size limit");
        }

        // 清理资源
        inference.reset();
        cudaStreamSynchronize(stream_);
        checkCudaError("After error handling tests");

    } catch (const std::exception& e) {
        FAIL() << "Exception: " << e.what();
    }
}

// 简化的性能测试
TEST_F(TrtInferenceTest, SimplePerformance) {
    try {
        std::unique_ptr<TrtInference> inference;
        ASSERT_NO_THROW(inference = std::make_unique<TrtInference>(config_));
        checkCudaError("After creating TrtInference");

        ASSERT_TRUE(inference->loadEngine());
        checkCudaError("After loading engine");

        std::vector<cv::Mat> images{test_image_};
        std::vector<DetectionResult> results;

        // 预热
        ASSERT_TRUE(inference->infer(images, results));
        cudaStreamSynchronize(stream_);
        checkCudaError("After warmup");

        // 简化的性能测试（减少迭代次数）
        const int num_iterations = 2;  // 进一步减少迭代次数
        for (int i = 0; i < num_iterations; ++i) {
            ASSERT_TRUE(inference->infer(images, results));
            cudaStreamSynchronize(stream_);
            checkCudaError(("During performance test iteration " + std::to_string(i)).c_str());
        }

        // 清理资源
        inference.reset();
        cudaStreamSynchronize(stream_);
        checkCudaError("After performance test");

    } catch (const std::exception& e) {
        FAIL() << "Exception: " << e.what();
    }
}