84 lines
2.7 KiB
Python
84 lines
2.7 KiB
Python
import os.path
|
||
|
||
from lxml import etree
|
||
|
||
|
||
def parse_xml_to_dict(xml):
|
||
if len(xml) == 0: # 遍历到底层,直接返回 tag对应的信息
|
||
return {xml.tag: xml.text}
|
||
|
||
result = {}
|
||
for child in xml:
|
||
child_result = parse_xml_to_dict(child) # 递归遍历标签信息
|
||
if child.tag != 'object':
|
||
result[child.tag] = child_result[child.tag]
|
||
else:
|
||
if child.tag not in result: # 因为object可能有多个,所以需要放入列表里
|
||
result[child.tag] = []
|
||
result[child.tag].append(child_result[child.tag])
|
||
return {xml.tag: result}
|
||
|
||
|
||
def sal_dataset(path):
|
||
all_xml_path = [os.path.join(path, t) for t in os.listdir(path)]
|
||
|
||
# 图中没有框的图片
|
||
z_obj = list()
|
||
# 图中只有一个框的图片
|
||
o_obj = list()
|
||
# 图中两个框的图片
|
||
t_obj = list()
|
||
# 图中有许多框的图片
|
||
m_obj = list()
|
||
for i in range(len(all_xml_path)):
|
||
|
||
with open(all_xml_path[i], encoding='gb18030', errors='ignore') as fid: # 防止出现非法字符报错
|
||
xml_str = fid.read()
|
||
xml = etree.fromstring(xml_str)
|
||
data = parse_xml_to_dict(xml)["annotation"] # 读取 xml文件信息
|
||
try:
|
||
|
||
if len(data['object']) == 1:
|
||
# 把只有帽子的图片去了
|
||
if data['object'][0]['name'] == 'Person':
|
||
# print(data['object'][0]['name'])
|
||
o_obj.append(all_xml_path[i])
|
||
elif len(data['object']) == 2:
|
||
t_obj.append(all_xml_path[i])
|
||
else:
|
||
m_obj.append(all_xml_path[i])
|
||
|
||
except:
|
||
z_obj.append(all_xml_path[i])
|
||
# print(0)
|
||
print(f'一个框的图片个数:{len(o_obj)}, 两个框的图片个数:{len(t_obj)},多个框的图片个数:{len(m_obj)}')
|
||
|
||
with open('sal_dat_安全帽手套数据集a.txt', 'w', encoding='utf-8') as file:
|
||
file.write('无框\n')
|
||
for item in z_obj:
|
||
file.write('%s\n' % item)
|
||
file.write('\n')
|
||
|
||
file.write('一框\n')
|
||
for item in o_obj:
|
||
file.write('%s\n' % item)
|
||
file.write('\n')
|
||
|
||
file.write('二框\n')
|
||
for item in t_obj:
|
||
file.write('%s\n' % item)
|
||
file.write('\n')
|
||
|
||
file.write('多框\n')
|
||
for item in m_obj:
|
||
file.write('%s\n' % item)
|
||
file.write('\n')
|
||
|
||
|
||
|
||
if __name__ == '__main__':
|
||
path = 'E:/haotian/YOLO安全帽手套检测数据集(含1000张图片)+对应voc、coco和yolo三种格式标签+划分脚本+训练教程/datasets/Annotaions_PH'
|
||
|
||
sal_dataset(path)
|
||
|