import cv2 import numpy as np import onnxruntime as ort class YOLOv8ONNX: def __init__(self, model_path, conf_threshold=0.5, iou_threshold=0.4): """ 初始化YOLOv8 ONNX模型 Args: model_path: ONNX模型文件路径 conf_threshold: 置信度阈值 iou_threshold: NMS IoU阈值 """ self.conf_threshold = conf_threshold self.iou_threshold = iou_threshold # 创建ONNX Runtime会话 self.session = ort.InferenceSession(model_path) # 获取模型输入输出信息 self.input_name = self.session.get_inputs()[0].name self.output_name = self.session.get_outputs()[0].name # 获取输入尺寸 input_shape = self.session.get_inputs()[0].shape self.input_height = input_shape[2] self.input_width = input_shape[3] def preprocess(self, image): """ 预处理图像 Args: image: 输入图像 (BGR格式) Returns: preprocessed_image: 预处理后的图像 scale_ratio: 缩放比例 pad_info: 填充信息 (pad_x, pad_y) """ # 获取原图尺寸 h, w = image.shape[:2] # 计算缩放比例 scale = min(self.input_height / h, self.input_width / w) new_h, new_w = int(h * scale), int(w * scale) # 等比例缩放 resized_image = cv2.resize(image, (new_w, new_h)) # 计算填充 pad_x = (self.input_width - new_w) // 2 pad_y = (self.input_height - new_h) // 2 # 创建填充后的图像 padded_image = np.full((self.input_height, self.input_width, 3), 114, dtype=np.uint8) padded_image[pad_y:pad_y + new_h, pad_x:pad_x + new_w] = resized_image # 转换为模型输入格式: BGR -> RGB, HWC -> CHW, 归一化 input_image = padded_image[:, :, ::-1].transpose(2, 0, 1).astype(np.float32) / 255.0 input_image = np.expand_dims(input_image, axis=0) # 添加batch维度 return input_image, scale, (pad_x, pad_y) def postprocess(self, outputs, scale, pad_info, original_shape): """ 后处理模型输出 - 针对YOLOv8格式优化 Args: outputs: 模型原始输出 scale: 图像缩放比例 pad_info: 填充信息 original_shape: 原图尺寸 Returns: boxes: 检测框 [[x1, y1, x2, y2], ...] scores: 置信度分数 class_ids: 类别ID """ predictions = outputs[0] # 形状通常是: [1, 6, 8400] 或 [1, num_classes+4, num_boxes] # YOLOv8输出格式: [batch, 4+num_classes, num_boxes] # 需要转置为 [batch, num_boxes, 4+num_classes] if len(predictions.shape) == 3: predictions = predictions.transpose(0, 2, 1) # [1, num_boxes, 4+num_classes] predictions = predictions[0] # 移除batch维度: [num_boxes, 4+num_classes] # 打印调试信息 # print(f"预测输出形状: {predictions.shape}") # print(f"前几个预测值: {predictions[:5]}") # 分离坐标和分类信息 boxes = predictions[:, :4] # [x_center, y_center, width, height] scores = predictions[:, 4:] # 类别置信度 [num_boxes, num_classes] # print(f"检测框形状: {boxes.shape}") # print(f"分数形状: {scores.shape}") # 获取最高置信度和对应类别 class_ids = np.argmax(scores, axis=1) confidences = np.max(scores, axis=1) # print(f"置信度范围: {confidences.min():.4f} - {confidences.max():.4f}") # print(f"检测到的类别: {np.unique(class_ids)}") # 过滤低置信度检测 valid_indices = confidences > self.conf_threshold valid_boxes = boxes[valid_indices] valid_confidences = confidences[valid_indices] valid_class_ids = class_ids[valid_indices] # print(f"过滤后检测数量: {len(valid_boxes)}") if len(valid_boxes) == 0: return [], [], [] # 转换为 [x1, y1, x2, y2] 格式 x_center, y_center, width, height = valid_boxes[:, 0], valid_boxes[:, 1], valid_boxes[:, 2], valid_boxes[:, 3] x1 = x_center - width / 2 y1 = y_center - height / 2 x2 = x_center + width / 2 y2 = y_center + height / 2 converted_boxes = np.stack([x1, y1, x2, y2], axis=1) # 坐标反变换到原图 pad_x, pad_y = pad_info converted_boxes[:, [0, 2]] = (converted_boxes[:, [0, 2]] - pad_x) / scale converted_boxes[:, [1, 3]] = (converted_boxes[:, [1, 3]] - pad_y) / scale # 限制坐标范围 h, w = original_shape[:2] converted_boxes[:, [0, 2]] = np.clip(converted_boxes[:, [0, 2]], 0, w) converted_boxes[:, [1, 3]] = np.clip(converted_boxes[:, [1, 3]], 0, h) # 非极大值抑制 (NMS) indices = cv2.dnn.NMSBoxes( converted_boxes.tolist(), valid_confidences.tolist(), self.conf_threshold, self.iou_threshold ) if len(indices) > 0: indices = indices.flatten() return converted_boxes[indices], valid_confidences[indices], valid_class_ids[indices] return [], [], [] def detect(self, image): """ 对图像进行目标检测 Args: image: 输入图像 (BGR格式) Returns: boxes: 检测框列表 scores: 置信度分数列表 class_ids: 类别ID列表 """ # 预处理 input_image, scale, pad_info = self.preprocess(image) # 推理 outputs = self.session.run([self.output_name], {self.input_name: input_image}) # 后处理 boxes, scores, class_ids = self.postprocess(outputs, scale, pad_info, image.shape) return boxes, scores, class_ids def draw_detections(image, boxes, scores, class_ids, class_names=None): """ 在图像上绘制检测结果 Args: image: 输入图像 boxes: 检测框 scores: 置信度分数 class_ids: 类别ID class_names: 类别名称列表(可选) Returns: 绘制了检测结果的图像 """ result_image = image.copy() for i, (box, score, class_id) in enumerate(zip(boxes, scores, class_ids)): x1, y1, x2, y2 = map(int, box) # 绘制边界框 cv2.rectangle(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2) # 准备标签文本 if class_names and class_id < len(class_names): label = f"{class_names[class_id]}: {score:.2f}" else: label = f"Class {class_id}: {score:.2f}" # 绘制标签背景 label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] cv2.rectangle(result_image, (x1, y1 - label_size[1] - 10), (x1 + label_size[0], y1), (0, 255, 0), -1) # 绘制标签文本 cv2.putText(result_image, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) return result_image # # 使用示例 # if __name__ == "__main__": # # 1. 初始化检测器 (降低置信度阈值进行测试) # detector = YOLOv8ONNX("path/to/your/model.onnx", conf_threshold=0.25, iou_threshold=0.45) # # 2. 加载图像 # image = cv2.imread("path/to/your/image.jpg") # # 3. 进行检测 # boxes, scores, class_ids = detector.detect(image) # # 4. 定义你的两个类别名称 (根据训练时的类别顺序调整) # class_names = ["class_0", "class_1"] # 替换为你的实际类别名称 # # 5. 打印检测结果 # print(f"检测到 {len(boxes)} 个目标:") # for i, (box, score, class_id) in enumerate(zip(boxes, scores, class_ids)): # class_name = class_names[class_id] if class_id < len(class_names) else f"unknown_{class_id}" # print(f" {i+1}: {class_name} - 置信度: {score:.3f} - 位置: {box}") # # 6. 绘制结果 # result_image = draw_detections(image, boxes, scores, class_ids, class_names) # # 7. 显示或保存结果 # cv2.imshow("Detection Result", result_image) # cv2.waitKey(0) # cv2.destroyAllWindows() # # 保存结果 # # cv2.imwrite("result.jpg", result_image) # # 使用示例 if __name__ == "__main__": # 1. 初始化检测器 detector = YOLOv8ONNX("/home/admin-root/haotian/康达瑞贝斯机器狗/yolov8_20250820.onnx", conf_threshold=0.25, iou_threshold=0.45) # 2. 加载图像 image = cv2.imread("/home/admin-root/haotian/康达瑞贝斯机器狗/YoloV8Obj/dataset_20250819/train/images/1e4c75b76e531606e2adc491a8f09ae8_frame_000140.jpg") # 3. 进行检测 boxes, scores, class_ids = detector.detect(image) print(boxes, scores, class_ids) # 4. 可选:定义类别名称 class_names = ["extinguish", "other"] # 根据你的模型调整 # 5. 打印检测结果 print(f"检测到 {len(boxes)} 个目标:") for i, (box, score, class_id) in enumerate(zip(boxes, scores, class_ids)): class_name = class_names[class_id] if class_id < len(class_names) else f"unknown_{class_id}" print(f" {i+1}: {class_name} - 置信度: {score:.3f} - 位置: {box}") # 5. 绘制结果 result_image = draw_detections(image, boxes, scores, class_ids, class_names) # 6. 显示或保存结果 # cv2.imshow("Detection Result", result_image) # cv2.waitKey(0) # cv2.destroyAllWindows() # 保存结果 cv2.imwrite("result_yolov8.jpg", result_image)