import os.path from lxml import etree def parse_xml_to_dict(xml): if len(xml) == 0: # 遍历到底层,直接返回 tag对应的信息 return {xml.tag: xml.text} result = {} for child in xml: child_result = parse_xml_to_dict(child) # 递归遍历标签信息 if child.tag != 'object': result[child.tag] = child_result[child.tag] else: if child.tag not in result: # 因为object可能有多个,所以需要放入列表里 result[child.tag] = [] result[child.tag].append(child_result[child.tag]) return {xml.tag: result} def sal_dataset(path): all_xml_path = [os.path.join(path, t) for t in os.listdir(path)] # 图中没有框的图片 z_obj = list() # 图中只有一个框的图片 o_obj = list() # 图中两个框的图片 t_obj = list() # 图中有许多框的图片 m_obj = list() for i in range(len(all_xml_path)): with open(all_xml_path[i], encoding='gb18030', errors='ignore') as fid: # 防止出现非法字符报错 xml_str = fid.read() xml = etree.fromstring(xml_str) data = parse_xml_to_dict(xml)["annotation"] # 读取 xml文件信息 try: if len(data['object']) == 1: # 把只有帽子的图片去了 if data['object'][0]['name'] == 'Person': # print(data['object'][0]['name']) o_obj.append(all_xml_path[i]) elif len(data['object']) == 2: t_obj.append(all_xml_path[i]) else: m_obj.append(all_xml_path[i]) except: z_obj.append(all_xml_path[i]) # print(0) print(f'一个框的图片个数:{len(o_obj)}, 两个框的图片个数:{len(t_obj)},多个框的图片个数:{len(m_obj)}') with open('sal_dat_安全帽手套数据集a.txt', 'w', encoding='utf-8') as file: file.write('无框\n') for item in z_obj: file.write('%s\n' % item) file.write('\n') file.write('一框\n') for item in o_obj: file.write('%s\n' % item) file.write('\n') file.write('二框\n') for item in t_obj: file.write('%s\n' % item) file.write('\n') file.write('多框\n') for item in m_obj: file.write('%s\n' % item) file.write('\n') if __name__ == '__main__': path = 'E:/haotian/YOLO安全帽手套检测数据集(含1000张图片)+对应voc、coco和yolo三种格式标签+划分脚本+训练教程/datasets/Annotaions_PH' sal_dataset(path)