python：VOC格式数据集转换为YOLO数据集格式

常有 · 发表于 2024-9-10 07:00:21

作者：CSDN@_养乐多_本文将介绍如何将目标检测中常用的VOC格式数据集转换为YOLO数据集，并进行数据集比例划分，从而方便的进行YOLO目标检测。如果不想分两步，可以直接看第三节代码。文章目录一、将VOC格式数据集转换为YOLO格式数据集二、YOLO格式数据集划分（训练、验证、测试）三、一步到位一、将VOC格式数据集转换为YOLO格式数据集执行以下脚本将VOC格式数据集转换为YOLO格式数据集。但是需要注意的是：转换之后的数据集只有Images和labels两个文件。还需要执行第二节中的脚本进行数据集划分，将总的数据集划分为训练、验证、测试数据集；使用的话，需要修改class_mapping中类别名和对应标签，还有VOC数据集路径、YOLO数据集路径。importosimportshutilimportxml.etree.ElementTreeasET#VOC格式数据集路径voc_data_path='E:\\DataSet\\helmet-VOC'voc_annotations_path=os.path.join(voc_data_path,'Annotations')voc_images_path=os.path.join(voc_data_path,'JPEGImages')#YOLO格式数据集保存路径yolo_data_path='E:\\DataSet\\helmet-YOLO'yolo_images_path=os.path.join(yolo_data_path,'images')yolo_labels_path=os.path.join(yolo_data_path,'labels')#创建YOLO格式数据集目录os.makedirs(yolo_images_path,exist_ok=True)os.makedirs(yolo_labels_path,exist_ok=True)#类别映射(可以根据自己的数据集进行调整)class_mapping={'head':0,'helmet':1,'person':2,#添加更多类别...}defconvert_voc_to_yolo(voc_annotation_file,yolo_label_file):tree=ET.parse(voc_annotation_file)root=tree.getroot()size=root.find('size')width=float(size.find('width').text)height=float(size.find('height').text)withopen(yolo_label_file,'w')asf:forobjinroot.findall('object'):cls=obj.find('name').textifclsnotinclass_mapping:continuecls_id=class_mapping[cls]xmlbox=obj.find('bndbox')xmin=float(xmlbox.find('xmin').text)ymin=float(xmlbox.find('ymin').text)xmax=float(xmlbox.find('xmax').text)ymax=float(xmlbox.find('ymax').text)x_center=(xmin+xmax)/2.0/widthy_center=(ymin+ymax)/2.0/heightw=(xmax-xmin)/widthh=(ymax-ymin)/heightf.write(f"{cls_id}{x_center}{y_center}{w}{h}\n")#遍历VOC数据集的Annotations目录，进行转换forvoc_annotationinos.listdir(voc_annotations_path):ifvoc_annotation.endswith('.xml'):voc_annotation_file=os.path.join(voc_annotations_path,voc_annotation)image_id=os.path.splitext(voc_annotation)[0]voc_image_file=os.path.join(voc_images_path,f"{image_id}.jpg")yolo_label_file=os.path.join(yolo_labels_path,f"{image_id}.txt")yolo_image_file=os.path.join(yolo_images_path,f"{image_id}.jpg")convert_voc_to_yolo(voc_annotation_file,yolo_label_file)ifos.path.exists(voc_image_file):shutil.copy(voc_image_file,yolo_image_file)print("转换完成！")12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667二、YOLO格式数据集划分（训练、验证、测试）随机将数据集按照0.7-0.2-0.1比例划分为训练、验证、测试数据集。注意，修改代码中图片的后缀，如果是.jpg，就把.png修改为.jpg。最终结果，importosimportshutilimportrandom#YOLO格式数据集保存路径yolo_images_path1='E:\\DataSet\\helmet-VOC'yolo_labels_path1='E:\\DataSet\\helmet-YOLO'yolo_data_path=yolo_labels_path1yolo_images_path=os.path.join(yolo_images_path1,'JPEGImages')yolo_labels_path=os.path.join(yolo_labels_path1,'labels')#创建划分后的目录结构train_images_path=os.path.join(yolo_data_path,'train','images')train_labels_path=os.path.join(yolo_data_path,'train','labels')val_images_path=os.path.join(yolo_data_path,'val','images')val_labels_path=os.path.join(yolo_data_path,'val','labels')test_images_path=os.path.join(yolo_data_path,'test','images')test_labels_path=os.path.join(yolo_data_path,'test','labels')os.makedirs(train_images_path,exist_ok=True)os.makedirs(train_labels_path,exist_ok=True)os.makedirs(val_images_path,exist_ok=True)os.makedirs(val_labels_path,exist_ok=True)os.makedirs(test_images_path,exist_ok=True)os.makedirs(test_labels_path,exist_ok=True)#获取所有图片文件名（不包含扩展名）image_files=[f[:-4]forfinos.listdir(yolo_images_path)iff.endswith('.png')]#随机打乱文件顺序random.shuffle(image_files)#划分数据集比例train_ratio=0.7val_ratio=0.2test_ratio=0.1train_count=int(train_ratio*len(image_files))val_count=int(val_ratio*len(image_files))test_count=len(image_files)-train_count-val_counttrain_files=image_files[:train_count]val_files=image_files[train_count:train_count+val_count]test_files=image_files[train_count+val_count:]#移动文件到相应的目录defmove_files(files,src_images_path,src_labels_path,dst_images_path,dst_labels_path):forfileinfiles:src_image_file=os.path.join(src_images_path,f"{file}.png")src_label_file=os.path.join(src_labels_path,f"{file}.txt")dst_image_file=os.path.join(dst_images_path,f"{file}.png")dst_label_file=os.path.join(dst_labels_path,f"{file}.txt")ifos.path.exists(src_image_file)andos.path.exists(src_label_file):shutil.move(src_image_file,dst_image_file)shutil.move(src_label_file,dst_label_file)#移动训练集文件move_files(train_files,yolo_images_path,yolo_labels_path,train_images_path,train_labels_path)#移动验证集文件move_files(val_files,yolo_images_path,yolo_labels_path,val_images_path,val_labels_path)#移动测试集文件move_files(test_files,yolo_images_path,yolo_labels_path,test_images_path,test_labels_path)print("数据集划分完成！")123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566三、一步到位如果不想分两步进行格式转换，那么以下脚本结合了以上两步，直接得到最后按比例划分训练、验证、测试的数据集结果。注意：需要修改voc_data_path，yolo_data_path，class_mapping以及‘.png’后缀。importosimportshutilimportrandomimportxml.etree.ElementTreeasETfromtqdmimporttqdm#VOC格式数据集路径voc_data_path='E:\\DataSet-VOC'voc_annotations_path=os.path.join(voc_data_path,'Annotations')voc_images_path=os.path.join(voc_data_path,'JPEGImages')#YOLO格式数据集保存路径yolo_data_path='E:\\DataSet-YOLO'yolo_images_path=os.path.join(yolo_data_path,'images')yolo_labels_path=os.path.join(yolo_data_path,'labels')#创建YOLO格式数据集目录os.makedirs(yolo_images_path,exist_ok=True)os.makedirs(yolo_labels_path,exist_ok=True)#类别映射(可以根据自己的数据集进行调整)class_mapping={'head':0,'helmet':1,'person':2,#添加更多类别...}defconvert_voc_to_yolo(voc_annotation_file,yolo_label_file):tree=ET.parse(voc_annotation_file)root=tree.getroot()size=root.find('size')width=float(size.find('width').text)height=float(size.find('height').text)withopen(yolo_label_file,'w')asf:forobjinroot.findall('object'):cls=obj.find('name').textifclsnotinclass_mapping:continuecls_id=class_mapping[cls]xmlbox=obj.find('bndbox')xmin=float(xmlbox.find('xmin').text)ymin=float(xmlbox.find('ymin').text)xmax=float(xmlbox.find('xmax').text)ymax=float(xmlbox.find('ymax').text)x_center=(xmin+xmax)/2.0/widthy_center=(ymin+ymax)/2.0/heightw=(xmax-xmin)/widthh=(ymax-ymin)/heightf.write(f"{cls_id}{x_center}{y_center}{w}{h}\n")#遍历VOC数据集的Annotations目录，进行转换print("开始VOC到YOLO格式转换...")forvoc_annotationintqdm(os.listdir(voc_annotations_path)):ifvoc_annotation.endswith('.xml'):voc_annotation_file=os.path.join(voc_annotations_path,voc_annotation)image_id=os.path.splitext(voc_annotation)[0]voc_image_file=os.path.join(voc_images_path,f"{image_id}.png")yolo_label_file=os.path.join(yolo_labels_path,f"{image_id}.txt")yolo_image_file=os.path.join(yolo_images_path,f"{image_id}.png")convert_voc_to_yolo(voc_annotation_file,yolo_label_file)ifos.path.exists(voc_image_file):shutil.copy(voc_image_file,yolo_image_file)print("VOC到YOLO格式转换完成！")#划分数据集train_images_path=os.path.join(yolo_data_path,'train','images')train_labels_path=os.path.join(yolo_data_path,'train','labels')val_images_path=os.path.join(yolo_data_path,'val','images')val_labels_path=os.path.join(yolo_data_path,'val','labels')test_images_path=os.path.join(yolo_data_path,'test','images')test_labels_path=os.path.join(yolo_data_path,'test','labels')os.makedirs(train_images_path,exist_ok=True)os.makedirs(train_labels_path,exist_ok=True)os.makedirs(val_images_path,exist_ok=True)os.makedirs(val_labels_path,exist_ok=True)os.makedirs(test_images_path,exist_ok=True)os.makedirs(test_labels_path,exist_ok=True)#获取所有图片文件名（不包含扩展名）image_files=[f[:-4]forfinos.listdir(yolo_images_path)iff.endswith('.png')]#随机打乱文件顺序random.shuffle(image_files)#划分数据集比例train_ratio=0.7val_ratio=0.2test_ratio=0.1train_count=int(train_ratio*len(image_files))val_count=int(val_ratio*len(image_files))test_count=len(image_files)-train_count-val_counttrain_files=image_files[:train_count]val_files=image_files[train_count:train_count+val_count]test_files=image_files[train_count+val_count:]#移动文件到相应的目录defmove_files(files,src_images_path,src_labels_path,dst_images_path,dst_labels_path):forfileintqdm(files):src_image_file=os.path.join(src_images_path,f"{file}.png")src_label_file=os.path.join(src_labels_path,f"{file}.txt")dst_image_file=os.path.join(dst_images_path,f"{file}.png")dst_label_file=os.path.join(dst_labels_path,f"{file}.txt")ifos.path.exists(src_image_file)andos.path.exists(src_label_file):shutil.move(src_image_file,dst_image_file)shutil.move(src_label_file,dst_label_file)#移动训练集文件print("移动训练集文件...")move_files(train_files,yolo_images_path,yolo_labels_path,train_images_path,train_labels_path)#移动验证集文件print("移动验证集文件...")move_files(val_files,yolo_images_path,yolo_labels_path,val_images_path,val_labels_path)#移动测试集文件print("移动测试集文件...")move_files(test_files,yolo_images_path,yolo_labels_path,test_images_path,test_labels_path)print("数据集划分完成！")#删除原始的images和labels文件夹shutil.rmtree(yolo_images_path)shutil.rmtree(yolo_labels_path)print("原始images和labels文件夹删除完成！")123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135

		自动登录	找回密码
密码			会员注册