Python数据增强图像数据集扩充

见贤思齐 · 发表于 2024-9-8 13:11:58

前言：该脚本用于图像数据增强，特别是目标检测任务中的图像和标签数据增强。通过应用一系列数据增强技术（如旋转、平移、裁剪、加噪声、改变亮度、cutout、翻转等），生成多样化的图像数据集，以提高目标检测模型的鲁棒性和准确性。效果：img存的原始图像168张图片，img2扩充的数量为5040张图片目录1.环境准备2.显示图片函数3.数据增强类3.1类初始化3.2数据增强方法3.3数据增强主方法4.XML解析工具类 4.1解析XML4.2保存图片 4.3保存XML 5.主函数完整程序1.环境准备这段代码导入了脚本所需的库，用于图像处理（cv2、numpy）、随机操作（random）、文件操作（os）、XML解析（etree）等。#-*-coding=utf-8-*-importtimeimportrandomimportcopyimportcv2importosimportmathimportnumpyasnpfromskimage.utilimportrandom_noisefromlxmlimportetree,objectifyimportxml.etree.ElementTreeasETimportargparse2.显示图片函数该函数用于显示图片，并在图片上绘制边界框（boundingbox）。defshow_pic(img,bboxes=None):'''输入:img:图像arraybboxes:图像的所有boundingboxlist,格式为[[x_min,y_min,x_max,y_max]....]'''foriinrange(len(bboxes)):bbox=bboxes[i]x_min=bbox[0]y_min=bbox[1]x_max=bbox[2]y_max=bbox[3]cv2.rectangle(img,(int(x_min),int(y_min)),(int(x_max),int(y_max)),(0,255,0),3)cv2.namedWindow('pic',0)cv2.moveWindow('pic',0,0)cv2.resizeWindow('pic',1200,800)cv2.imshow('pic',img)cv2.waitKey(0)cv2.destroyAllWindows()3.数据增强类3.1类初始化该类初始化函数设置了数据增强的各种参数和是否启用某种增强方式的标志。classDataAugmentForObjectDetection():def__init__(self,rotation_rate=0.5,max_rotation_angle=5,crop_rate=0.5,shift_rate=0.5,change_light_rate=0.5,add_noise_rate=0.5,flip_rate=0.5,cutout_rate=0.5,cut_out_length=50,cut_out_holes=1,cut_out_threshold=0.5,is_addNoise=True,is_changeLight=True,is_cutout=True,is_rotate_img_bbox=True,is_crop_img_bboxes=True,is_shift_pic_bboxes=True,is_filp_pic_bboxes=True):self.rotation_rate=rotation_rateself.max_rotation_angle=max_rotation_angleself.crop_rate=crop_rateself.shift_rate=shift_rateself.change_light_rate=change_light_rateself.add_noise_rate=add_noise_rateself.flip_rate=flip_rateself.cutout_rate=cutout_rateself.cut_out_length=cut_out_lengthself.cut_out_holes=cut_out_holesself.cut_out_threshold=cut_out_thresholdself.is_addNoise=is_addNoiseself.is_changeLight=is_changeLightself.is_cutout=is_cutoutself.is_rotate_img_bbox=is_rotate_img_bboxself.is_crop_img_bboxes=is_crop_img_bboxesself.is_shift_pic_bboxes=is_shift_pic_bboxesself.is_filp_pic_bboxes=is_filp_pic_bboxes3.2数据增强方法加噪声。为图像添加高斯噪声。def_addNoise(self,img):returnrandom_noise(img,mode='gaussian',clip=True)*255改变亮度。随机改变图像亮度。def_changeLight(self,img):alpha=random.uniform(0.35,1)blank=np.zeros(img.shape,img.dtype)returncv2.addWeighted(img,alpha,blank,1-alpha,0)cutout。随机在图像中遮挡某些部分（cutout），避免遮挡太多目标。def_cutout(self,img,bboxes,length=100,n_holes=1,threshold=0.5):defcal_iou(boxA,boxB):xA=max(boxA[0],boxB[0])yA=max(boxA[1],boxB[1])xB=min(boxA[2],boxB[2])yB=min(boxA[3],boxB[3])ifxBthreshold:chongdie=Truebreakmask[y1:y2,x1:x2,:]=0.img=img*maskreturnimg旋转。旋转图像和对应的边界框。def_rotate_img_bbox(self,img,bboxes,angle=5,scale=1.):w,h=img.shape[1],img.shape[0]rangle=np.deg2rad(angle)nw=(abs(np.sin(rangle)*h)+abs(np.cos(rangle)*w))*scalenh=(abs(np.cos(rangle)*h)+abs(np.sin(rangle)*w))*scalerot_mat=cv2.getRotationMatrix2D((nw*0.5,nh*0.5),angle,scale)rot_move=np.dot(rot_mat,np.array([(nw-w)*0.5,(nh-h)*0.5,0]))rot_mat[0,2]+=rot_move[0]rot_mat[1,2]+=rot_move[1]rot_img=cv2.warpAffine(img,rot_mat,(int(math.ceil(nw)),int(math.ceil(nh))),flags=cv2.INTER_LANCZOS4)rot_bboxes=[]forbboxinbboxes:points=np.array([[bbox[0],bbox[1]],[bbox[2],bbox[1]],[bbox[2],bbox[3]],[bbox[0],bbox[3]]])new_points=cv2.transform(points[None,:,:],rot_mat)[0]rx,ry,rw,rh=cv2.boundingRect(new_points)corrected_bbox=[max(0,rx),max(0,ry),min(nw,rx+rw),min(nh,ry+rh)]corrected_bbox=[int(val)forvalincorrected_bbox]rot_bboxes.append(corrected_bbox)returnrot_img,rot_bboxes裁剪。随机裁剪图像，同时裁剪对应的边界框。def_crop_img_bboxes(self,img,bboxes):w=img.shape[1]h=img.shape[0]x_min=wx_max=0y_min=hy_max=0forbboxinbboxes:x_min=min(x_min,bbox[0])y_min=min(y_min,bbox[1])x_max=max(x_max,bbox[2])y_max=max(y_max,bbox[3])d_to_left=x_mind_to_right=w-x_maxd_to_top=y_mind_to_bottom=h-y_maxcrop_x_min=int(x_min-random.uniform(0,d_to_left))crop_y_min=int(y_min-random.uniform(0,d_to_top))crop_x_max=int(x_max+random.uniform(0,d_to_right))crop_y_max=int(y_max+random.uniform(0,d_to_bottom))crop_x_min=max(0,crop_x_min)crop_y_min=max(0,crop_y_min)crop_x_max=min(w,crop_x_max)crop_y_max=min(h,crop_y_max)crop_img=img[crop_y_min:crop_y_max,crop_x_min:crop_x_max]crop_bboxes=list()forbboxinbboxes:crop_bboxes.append([bbox[0]-crop_x_min,bbox[1]-crop_y_min,bbox[2]-crop_x_min,bbox[3]-crop_y_min])returncrop_img,crop_bboxes平移。随机平移图像和对应的边界框。def_shift_pic_bboxes(self,img,bboxes):h,w=img.shape[:2]x=random.uniform(-w*0.2,w*0.2)y=random.uniform(-h*0.2,h*0.2)M=np.float32([[1,0,x],[0,1,y]])shift_img=cv2.warpAffine(img,M,(w,h))shift_bboxes=[]forbboxinbboxes:new_bbox=[bbox[0]+x,bbox[1]+y,bbox[2]+x,bbox[3]+y]corrected_bbox=[max(0,new_bbox[0]),max(0,new_bbox[1]),min(w,new_bbox[2]),min(h,new_bbox[3])]corrected_bbox=[int(val)forvalincorrected_bbox]shift_bboxes.append(corrected_bbox)returnshift_img,shift_bboxes 翻转。随机翻转图像和对应的边界框。def_filp_pic_bboxes(self,img,bboxes):flipCode=random.choice([-1,0,1])flip_img=cv2.flip(img,flipCode)h,w,_=img.shapeflip_bboxes=[]forbboxinbboxes:x_min,y_min,x_max,y_max=bboxifflipCode==0:new_bbox=[x_min,h-y_max,x_max,h-y_min]elifflipCode==1:new_bbox=[w-x_max,y_min,w-x_min,y_max]else:new_bbox=[w-x_max,h-y_max,w-x_min,h-y_min]flip_bboxes.append(new_bbox)returnflip_img,flip_bboxes3.3数据增强主方法综合应用各种数据增强方法，对输入图像和边界框进行增强。defdataAugment(self,img,bboxes):change_num=0whilechange_numself.rotation_rate:change_num+=1angle=random.uniform(-self.max_rotation_angle,self.max_rotation_angle)scale=random.uniform(0.7,0.8)img,bboxes=self._rotate_img_bbox(img,bboxes,angle,scale)ifself.is_shift_pic_bboxes:ifrandom.random()self.change_light_rate:change_num+=1img=self._changeLight(img)ifself.is_addNoise:ifrandom.random()threshold:chongdie=Truebreakmask[y1:y2,x1:x2,:]=0.img=img*maskreturnimg#---4.旋转---#def_rotate_img_bbox(self,img,bboxes,angle=5,scale=1.):w,h=img.shape[1],img.shape[0]rangle=np.deg2rad(angle)#angleinradiansnw=(abs(np.sin(rangle)*h)+abs(np.cos(rangle)*w))*scalenh=(abs(np.cos(rangle)*h)+abs(np.sin(rangle)*w))*scalerot_mat=cv2.getRotationMatrix2D((nw*0.5,nh*0.5),angle,scale)rot_move=np.dot(rot_mat,np.array([(nw-w)*0.5,(nh-h)*0.5,0]))rot_mat[0,2]+=rot_move[0]rot_mat[1,2]+=rot_move[1]rot_img=cv2.warpAffine(img,rot_mat,(int(math.ceil(nw)),int(math.ceil(nh))),flags=cv2.INTER_LANCZOS4)rot_bboxes=[]forbboxinbboxes:points=np.array([[bbox[0],bbox[1]],[bbox[2],bbox[1]],[bbox[2],bbox[3]],[bbox[0],bbox[3]]])new_points=cv2.transform(points[None,:,:],rot_mat)[0]rx,ry,rw,rh=cv2.boundingRect(new_points)corrected_bbox=[max(0,rx),max(0,ry),min(nw,rx+rw),min(nh,ry+rh)]corrected_bbox=[int(val)forvalincorrected_bbox]#Converttointandcorrectorderifnecessaryrot_bboxes.append(corrected_bbox)returnrot_img,rot_bboxes#---5.裁剪---#def_crop_img_bboxes(self,img,bboxes):'''裁剪后的图片要包含所有的框输入:img:图像arraybboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min,y_min,x_max,y_max],要确保是数值输出:crop_img:裁剪后的图像arraycrop_bboxes:裁剪后的boundingbox的坐标list'''#裁剪图像w=img.shape[1]h=img.shape[0]x_min=w#裁剪后的包含所有目标框的最小的框x_max=0y_min=hy_max=0forbboxinbboxes:x_min=min(x_min,bbox[0])y_min=min(y_min,bbox[1])x_max=max(x_max,bbox[2])y_max=max(y_max,bbox[3])d_to_left=x_min#包含所有目标框的最小框到左边的距离d_to_right=w-x_max#包含所有目标框的最小框到右边的距离d_to_top=y_min#包含所有目标框的最小框到顶端的距离d_to_bottom=h-y_max#包含所有目标框的最小框到底部的距离#随机扩展这个最小框crop_x_min=int(x_min-random.uniform(0,d_to_left))crop_y_min=int(y_min-random.uniform(0,d_to_top))crop_x_max=int(x_max+random.uniform(0,d_to_right))crop_y_max=int(y_max+random.uniform(0,d_to_bottom))#随机扩展这个最小框,防止别裁的太小#crop_x_min=int(x_min-random.uniform(d_to_left//2,d_to_left))#crop_y_min=int(y_min-random.uniform(d_to_top//2,d_to_top))#crop_x_max=int(x_max+random.uniform(d_to_right//2,d_to_right))#crop_y_max=int(y_max+random.uniform(d_to_bottom//2,d_to_bottom))#确保不要越界crop_x_min=max(0,crop_x_min)crop_y_min=max(0,crop_y_min)crop_x_max=min(w,crop_x_max)crop_y_max=min(h,crop_y_max)crop_img=img[crop_y_min:crop_y_max,crop_x_min:crop_x_max]#裁剪boundingbox#裁剪后的boundingbox坐标计算crop_bboxes=list()forbboxinbboxes:crop_bboxes.append([bbox[0]-crop_x_min,bbox[1]-crop_y_min,bbox[2]-crop_x_min,bbox[3]-crop_y_min])returncrop_img,crop_bboxes#---6.平移---#def_shift_pic_bboxes(self,img,bboxes):h,w=img.shape[:2]x=random.uniform(-w*0.2,w*0.2)y=random.uniform(-h*0.2,h*0.2)M=np.float32([[1,0,x],[0,1,y]])shift_img=cv2.warpAffine(img,M,(w,h))shift_bboxes=[]forbboxinbboxes:new_bbox=[bbox[0]+x,bbox[1]+y,bbox[2]+x,bbox[3]+y]corrected_bbox=[max(0,new_bbox[0]),max(0,new_bbox[1]),min(w,new_bbox[2]),min(h,new_bbox[3])]corrected_bbox=[int(val)forvalincorrected_bbox]#Converttointandcorrectorderifnecessaryshift_bboxes.append(corrected_bbox)returnshift_img,shift_bboxes#---7.镜像---#def_filp_pic_bboxes(self,img,bboxes):#RandomlydecidetheflipmethodflipCode=random.choice([-1,0,1])#-1:both;0:vertical;1:horizontalflip_img=cv2.flip(img,flipCode)#Applythefliph,w,_=img.shapeflip_bboxes=[]forbboxinbboxes:x_min,y_min,x_max,y_max=bboxifflipCode==0:#Verticalflipnew_bbox=[x_min,h-y_max,x_max,h-y_min]elifflipCode==1:#Horizontalflipnew_bbox=[w-x_max,y_min,w-x_min,y_max]else:#Bothflipsnew_bbox=[w-x_max,h-y_max,w-x_min,h-y_min]flip_bboxes.append(new_bbox)returnflip_img,flip_bboxes#图像增强方法defdataAugment(self,img,bboxes):'''图像增强输入:img:图像arraybboxes:该图像的所有框坐标输出:img:增强后的图像bboxes:增强后图片对应的box'''change_num=0#改变的次数#print('------')whilechange_numself.rotation_rate:#旋转change_num+=1angle=random.uniform(-self.max_rotation_angle,self.max_rotation_angle)scale=random.uniform(0.7,0.8)img,bboxes=self._rotate_img_bbox(img,bboxes,angle,scale)ifself.is_shift_pic_bboxes:ifrandom.random()self.change_light_rate:#改变亮度change_num+=1img=self._changeLight(img)ifself.is_addNoise:ifrandom.random()

		自动登录	找回密码
密码			会员注册