convert-the-model-to-rknn/012验证yolov8ONNX模型.py

274 lines
9.8 KiB
Python

import cv2
import numpy as np
import onnxruntime as ort
class YOLOv8ONNX:
def __init__(self, model_path, conf_threshold=0.5, iou_threshold=0.4):
"""
初始化YOLOv8 ONNX模型
Args:
model_path: ONNX模型文件路径
conf_threshold: 置信度阈值
iou_threshold: NMS IoU阈值
"""
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
# 创建ONNX Runtime会话
self.session = ort.InferenceSession(model_path)
# 获取模型输入输出信息
self.input_name = self.session.get_inputs()[0].name
self.output_name = self.session.get_outputs()[0].name
# 获取输入尺寸
input_shape = self.session.get_inputs()[0].shape
self.input_height = input_shape[2]
self.input_width = input_shape[3]
def preprocess(self, image):
"""
预处理图像
Args:
image: 输入图像 (BGR格式)
Returns:
preprocessed_image: 预处理后的图像
scale_ratio: 缩放比例
pad_info: 填充信息 (pad_x, pad_y)
"""
# 获取原图尺寸
h, w = image.shape[:2]
# 计算缩放比例
scale = min(self.input_height / h, self.input_width / w)
new_h, new_w = int(h * scale), int(w * scale)
# 等比例缩放
resized_image = cv2.resize(image, (new_w, new_h))
# 计算填充
pad_x = (self.input_width - new_w) // 2
pad_y = (self.input_height - new_h) // 2
# 创建填充后的图像
padded_image = np.full((self.input_height, self.input_width, 3), 114, dtype=np.uint8)
padded_image[pad_y:pad_y + new_h, pad_x:pad_x + new_w] = resized_image
# 转换为模型输入格式: BGR -> RGB, HWC -> CHW, 归一化
input_image = padded_image[:, :, ::-1].transpose(2, 0, 1).astype(np.float32) / 255.0
input_image = np.expand_dims(input_image, axis=0) # 添加batch维度
return input_image, scale, (pad_x, pad_y)
def postprocess(self, outputs, scale, pad_info, original_shape):
"""
后处理模型输出 - 针对YOLOv8格式优化
Args:
outputs: 模型原始输出
scale: 图像缩放比例
pad_info: 填充信息
original_shape: 原图尺寸
Returns:
boxes: 检测框 [[x1, y1, x2, y2], ...]
scores: 置信度分数
class_ids: 类别ID
"""
predictions = outputs[0] # 形状通常是: [1, 6, 8400] 或 [1, num_classes+4, num_boxes]
# YOLOv8输出格式: [batch, 4+num_classes, num_boxes]
# 需要转置为 [batch, num_boxes, 4+num_classes]
if len(predictions.shape) == 3:
predictions = predictions.transpose(0, 2, 1) # [1, num_boxes, 4+num_classes]
predictions = predictions[0] # 移除batch维度: [num_boxes, 4+num_classes]
# 打印调试信息
# print(f"预测输出形状: {predictions.shape}")
# print(f"前几个预测值: {predictions[:5]}")
# 分离坐标和分类信息
boxes = predictions[:, :4] # [x_center, y_center, width, height]
scores = predictions[:, 4:] # 类别置信度 [num_boxes, num_classes]
# print(f"检测框形状: {boxes.shape}")
# print(f"分数形状: {scores.shape}")
# 获取最高置信度和对应类别
class_ids = np.argmax(scores, axis=1)
confidences = np.max(scores, axis=1)
# print(f"置信度范围: {confidences.min():.4f} - {confidences.max():.4f}")
# print(f"检测到的类别: {np.unique(class_ids)}")
# 过滤低置信度检测
valid_indices = confidences > self.conf_threshold
valid_boxes = boxes[valid_indices]
valid_confidences = confidences[valid_indices]
valid_class_ids = class_ids[valid_indices]
# print(f"过滤后检测数量: {len(valid_boxes)}")
if len(valid_boxes) == 0:
return [], [], []
# 转换为 [x1, y1, x2, y2] 格式
x_center, y_center, width, height = valid_boxes[:, 0], valid_boxes[:, 1], valid_boxes[:, 2], valid_boxes[:, 3]
x1 = x_center - width / 2
y1 = y_center - height / 2
x2 = x_center + width / 2
y2 = y_center + height / 2
converted_boxes = np.stack([x1, y1, x2, y2], axis=1)
# 坐标反变换到原图
pad_x, pad_y = pad_info
converted_boxes[:, [0, 2]] = (converted_boxes[:, [0, 2]] - pad_x) / scale
converted_boxes[:, [1, 3]] = (converted_boxes[:, [1, 3]] - pad_y) / scale
# 限制坐标范围
h, w = original_shape[:2]
converted_boxes[:, [0, 2]] = np.clip(converted_boxes[:, [0, 2]], 0, w)
converted_boxes[:, [1, 3]] = np.clip(converted_boxes[:, [1, 3]], 0, h)
# 非极大值抑制 (NMS)
indices = cv2.dnn.NMSBoxes(
converted_boxes.tolist(),
valid_confidences.tolist(),
self.conf_threshold,
self.iou_threshold
)
if len(indices) > 0:
indices = indices.flatten()
return converted_boxes[indices], valid_confidences[indices], valid_class_ids[indices]
return [], [], []
def detect(self, image):
"""
对图像进行目标检测
Args:
image: 输入图像 (BGR格式)
Returns:
boxes: 检测框列表
scores: 置信度分数列表
class_ids: 类别ID列表
"""
# 预处理
input_image, scale, pad_info = self.preprocess(image)
# 推理
outputs = self.session.run([self.output_name], {self.input_name: input_image})
# 后处理
boxes, scores, class_ids = self.postprocess(outputs, scale, pad_info, image.shape)
return boxes, scores, class_ids
def draw_detections(image, boxes, scores, class_ids, class_names=None):
"""
在图像上绘制检测结果
Args:
image: 输入图像
boxes: 检测框
scores: 置信度分数
class_ids: 类别ID
class_names: 类别名称列表(可选)
Returns:
绘制了检测结果的图像
"""
result_image = image.copy()
for i, (box, score, class_id) in enumerate(zip(boxes, scores, class_ids)):
x1, y1, x2, y2 = map(int, box)
# 绘制边界框
cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
# 准备标签文本
if class_names and class_id < len(class_names):
label = f"{class_names[class_id]}: {score:.2f}"
else:
label = f"Class {class_id}: {score:.2f}"
# 绘制标签背景
label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
cv2.rectangle(result_image, (x1, y1 - label_size[1] - 10),
(x1 + label_size[0], y1), (0, 255, 0), -1)
# 绘制标签文本
cv2.putText(result_image, label, (x1, y1 - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
return result_image
# # 使用示例
# if __name__ == "__main__":
# # 1. 初始化检测器 (降低置信度阈值进行测试)
# detector = YOLOv8ONNX("path/to/your/model.onnx", conf_threshold=0.25, iou_threshold=0.45)
# # 2. 加载图像
# image = cv2.imread("path/to/your/image.jpg")
# # 3. 进行检测
# boxes, scores, class_ids = detector.detect(image)
# # 4. 定义你的两个类别名称 (根据训练时的类别顺序调整)
# class_names = ["class_0", "class_1"] # 替换为你的实际类别名称
# # 5. 打印检测结果
# print(f"检测到 {len(boxes)} 个目标:")
# for i, (box, score, class_id) in enumerate(zip(boxes, scores, class_ids)):
# class_name = class_names[class_id] if class_id < len(class_names) else f"unknown_{class_id}"
# print(f" {i+1}: {class_name} - 置信度: {score:.3f} - 位置: {box}")
# # 6. 绘制结果
# result_image = draw_detections(image, boxes, scores, class_ids, class_names)
# # 7. 显示或保存结果
# cv2.imshow("Detection Result", result_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# # 保存结果
# # cv2.imwrite("result.jpg", result_image)
# # 使用示例
if __name__ == "__main__":
# 1. 初始化检测器
detector = YOLOv8ONNX("/home/admin-root/haotian/康达瑞贝斯机器狗/yolov8_20250820.onnx", conf_threshold=0.25, iou_threshold=0.45)
# 2. 加载图像
image = cv2.imread("/home/admin-root/haotian/康达瑞贝斯机器狗/YoloV8Obj/dataset_20250819/train/images/1e4c75b76e531606e2adc491a8f09ae8_frame_000140.jpg")
# 3. 进行检测
boxes, scores, class_ids = detector.detect(image)
print(boxes, scores, class_ids)
# 4. 可选:定义类别名称
class_names = ["extinguish", "other"] # 根据你的模型调整
# 5. 打印检测结果
print(f"检测到 {len(boxes)} 个目标:")
for i, (box, score, class_id) in enumerate(zip(boxes, scores, class_ids)):
class_name = class_names[class_id] if class_id < len(class_names) else f"unknown_{class_id}"
print(f" {i+1}: {class_name} - 置信度: {score:.3f} - 位置: {box}")
# 5. 绘制结果
result_image = draw_detections(image, boxes, scores, class_ids, class_names)
# 6. 显示或保存结果
# cv2.imshow("Detection Result", result_image)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# 保存结果
cv2.imwrite("result_yolov8.jpg", result_image)