yolo_standard_libray/003统计数据集项目.py
2025-03-07 11:35:40 +08:00

84 lines
2.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os.path
from lxml import etree
def parse_xml_to_dict(xml):
if len(xml) == 0: # 遍历到底层,直接返回 tag对应的信息
return {xml.tag: xml.text}
result = {}
for child in xml:
child_result = parse_xml_to_dict(child) # 递归遍历标签信息
if child.tag != 'object':
result[child.tag] = child_result[child.tag]
else:
if child.tag not in result: # 因为object可能有多个所以需要放入列表里
result[child.tag] = []
result[child.tag].append(child_result[child.tag])
return {xml.tag: result}
def sal_dataset(path):
all_xml_path = [os.path.join(path, t) for t in os.listdir(path)]
# 图中没有框的图片
z_obj = list()
# 图中只有一个框的图片
o_obj = list()
# 图中两个框的图片
t_obj = list()
# 图中有许多框的图片
m_obj = list()
for i in range(len(all_xml_path)):
with open(all_xml_path[i], encoding='gb18030', errors='ignore') as fid: # 防止出现非法字符报错
xml_str = fid.read()
xml = etree.fromstring(xml_str)
data = parse_xml_to_dict(xml)["annotation"] # 读取 xml文件信息
try:
if len(data['object']) == 1:
# 把只有帽子的图片去了
if data['object'][0]['name'] == 'Person':
# print(data['object'][0]['name'])
o_obj.append(all_xml_path[i])
elif len(data['object']) == 2:
t_obj.append(all_xml_path[i])
else:
m_obj.append(all_xml_path[i])
except:
z_obj.append(all_xml_path[i])
# print(0)
print(f'一个框的图片个数:{len(o_obj)}, 两个框的图片个数:{len(t_obj)},多个框的图片个数:{len(m_obj)}')
with open('sal_dat_安全帽手套数据集a.txt', 'w', encoding='utf-8') as file:
file.write('无框\n')
for item in z_obj:
file.write('%s\n' % item)
file.write('\n')
file.write('一框\n')
for item in o_obj:
file.write('%s\n' % item)
file.write('\n')
file.write('二框\n')
for item in t_obj:
file.write('%s\n' % item)
file.write('\n')
file.write('多框\n')
for item in m_obj:
file.write('%s\n' % item)
file.write('\n')
if __name__ == '__main__':
path = 'E:/haotian/YOLO安全帽手套检测数据集(含1000张图片)+对应voc、coco和yolo三种格式标签+划分脚本+训练教程/datasets/Annotaions_PH'
sal_dataset(path)