常用scripts整理--2020.05.13
平时在进行数据处理时,经常会用到一些脚本文件,可以提高效率,我把自己常用到的进行整理总结,希望可以有所帮助
1.批量修改图片名字
#coding=utf-8
'''
code by zzg 2020-04-07
'''
import os
import re
import sys
path = r"***自己的图片文件夹"
fileList = os.listdir(path) # 待修改文件夹
print("修改前:" + str(fileList)) # 输出文件夹中包含的文件
os.chdir(path) # 将当前工作目录修改为待修改文件夹的位置
num = 0 # 名称变量
for fileName in fileList: # 遍历文件夹中所有文件pat = ".+.(jpg|jpeg|JPG)" # 匹配文件名正则表达式pattern = re.findall(pat, fileName) # 进行匹配# print(pattern)print('num:', num, 'filename:', fileName)os.rename(fileName, ('1' + '_' + str(num) + '.'+ pattern[0])) # 文件重新命名num = num + 1 # 改变编号,继续下一项
print("---------------------------------------------------")
sys.stdin.flush() # 刷新
print("修改后:" + str(os.listdir(path))) # 输出修改后文件夹中包含的文件
2.批量修改图片大小
#coding=utf-8
'''
code by zzg 2020-04-07
'''
#提取目录下所有图片,更改尺寸后保存到另一目录
from PIL import Image
import os.path
import globdef convertjpg(jpgfile,outdir,width=128,height=128):img=Image.open(jpgfile)try:new_img=img.resize((width,height),Image.BILINEAR) new_img.save(os.path.join(outdir,os.path.basename(jpgfile)))except Exception as e:print(e)
for jpgfile in glob.glob("train/*.jpg"):convertjpg(jpgfile,"train01/")
3.批量打空标签
# coding:utf-8
'''
code by zzg 2020-04-04'''
##针对负样本的图片批量生成空白标签import os,sys
import glob
from PIL import Image
import pdb# the direction/path of Image,Label
src_img_dir = "images/"
src_xml_dir = "xml/"img_Lists = glob.glob(src_img_dir + '/*.jpg')
# print(img_Lists)img_basenames = []
for item in img_Lists:img_basenames.append(os.path.basename(item))
# print(img_basenames)img_name = []
for item in img_basenames:temp1, temp2 = os.path.splitext(item)img_name.append(temp1)
# print(img_name)#pdb.set_trace()
for img in img_name:im = Image.open((src_img_dir + '/' + img + '.jpg'))width, height = im.size#print(width)xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')xml_file.write('<annotation>n')xml_file.write('<folder>VOC2007</folder>n')xml_file.write('<filename>' +str(img) + '.jpg' + '</filename>n')xml_file.write('<source>n')xml_file.write('<database>' + 'Unknown' + '</database>n')xml_file.write('</source>n')xml_file.write('<size>n')xml_file.write('<width>' + str(width) + '</width>n')xml_file.write('<height>' + str(height) + '</height>n')xml_file.write('<depth>3</depth>n')xml_file.write('</size>n')#print(len(gt))xml_file.write('</annotation>')print("finshed convert!!")
4.批量xml转json(一个文件夹下所有xml转到对应的json)
'''
code by zzg 2020-05-13
'''
#批量修改文件夹下的xml为json并存储到另一个文件夹import glob
import xmltodict
import jsonpath = 'xml/'
path2 = 'json/'def pythonXmlToJson(path):xml_dir = glob.glob(path + '*.xml')# print(len(xml_dir))for x in xml_dir:with open(x) as fd:convertedDict = xmltodict.parse(fd.read())jsonStr = json.dumps(convertedDict, indent=1)print("jsonStr=",jsonStr)print(x.split('.')[0])json_file = x.split('.')[0].split('/')[-1] +'.json'with open(path2 + '/' + json_file, 'w') as json_file:json_file.write(jsonStr)print("xml_json finished!")print(len(xml_dir))
pythonXmlToJson(path)
5.批量修改图片后缀(jpg-->JPG)
先在txt中编辑 ren *.JPG *.jpg 后改.txt为.bat
6.批量移动文件夹下的指定文件(比如xml和jpg混合,移出jpg)
code by zzg 2020-05-12
'''
#复制或者移动一个文件夹下的所有图片或者其他指定文件到另一个文件夹
import os
import shutil
path = 'train/'
new_path = 'image/'
new_path1 = 'xml/'
count = 0
for root, dirs, files in os.walk(path):for i in range(len(files)):#if (files[i][-3:] == 'jpg' or files[i][-3:] == 'JPG'):if (files[i][-3:] == 'xml'):count += 1file_path = root + '/' + files[i]new_file_path = new_path1 + '/' + files[i]shutil.copy(file_path, new_file_path)#shutil.move(file_path, new_file_path))print(count)
print("move finished!!")
7.为图片添加m*n的矩形格网
"""
code by zzg --2020-05-27
"""
##为图片添加m*n的矩形格网
from PIL import Image
from PIL import ImageDraw
import osinput_img = r'resize1.jpg'# 生成格网
(filepath,filename) = os.path.split(input_img)
img = Image.open(input_img)
img_d = ImageDraw.Draw(img)
x_len, y_len = img.size
x_step = x_len/13
y_step = y_len/13
print(x_len)
print(y_len)
x_len = int(x_len)
x_step = int(x_step)
y_len = int(y_len)
y_step = int(y_step)for x in range(0, x_len, x_step):img_d.line(((x, 0), (x, y_len)), (0, 0, 0))
for y in range(0, y_len, y_step):j = y_len - y - 1img_d.line(((0, j), (x_len, j)), (0, 0, 0))
img.save(os.path.join(filepath,"grid1_13_"+filename) )# 为格网编码 左下角开始
cnt = 1
for i in range(0,x_len,x_step):img_d.text((i,y_len-y_step), str(cnt).encode("utf8"),fill=(255,0,0))cnt+=1
cnt = 1
for j in range(y_step,y_len,y_step):z = y_len - jimg_d.text((0,z), str(cnt).encode("utf8"),fill=(255,0,0) )cnt+=1
img.save(os.path.join(filepath, "grid_geocoding_"+filename))
示例:
8.批量截取图片指定内容并保存以及重写对应的xml(或者用作批量修改图片名字以及对应的xml标签)
'''
code by zzg 2020-05-30
'''
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# get annotation object bndbox locationtry:import xml.etree.cElementTree as ET
except ImportError:import xml.etree.ElementTree as ETimport os,sys
import glob
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import pdb#the direction/path of Image,Label
src_img_dir = "image"
src_xml_dir = "xml"
dst_img_dir = "image-crop"
dst_xml_dir = "xml-crop"img_Lists = glob.glob(src_img_dir + '/*.jpeg')
#print(img_Lists)img_basenames = []
for item in img_Lists:img_basenames.append(os.path.basename(item))#print(img_basenames)img_name = []
for item in img_basenames:temp1, temp2 = os.path.splitext(item)img_name.append(temp1)# print(img_name)cnt = 0
for img in img_name:# im0 = Image.open((src_img_dir + '/' + img + '.jpeg'))# print(type(im0))# width, height = im0.size# print(im0.size)#print(width)## read the scr_imageim = cv2.imread(src_img_dir + '/' + img + '.jpeg')# print(type(im))# print(im.shape)# width, height = im.shape[:2][::-1] ##get w and h# print(width, height)##read the scr_xmlAnotPath = src_xml_dir + '/' + img + '.xml'# print(AnotPath)tree = ET.ElementTree(file=AnotPath) # print(tree)root = tree.getroot()# print(root)ObjectSet = root.findall('object')ObjBndBoxSet = []ObjBndBoxSet1 = {} for Object in ObjectSet:ObjName = Object.find('name').textBndBox = Object.find('bndbox')x1 = int(BndBox.find('xmin').text)#-1 y1 = int(BndBox.find('ymin').text)#-1x2 = int(BndBox.find('xmax').text)#-1y2 = int(BndBox.find('ymax').text)#-1BndBoxLoc = [ObjName,x1,y1,x2,y2]# print(x1,y1,x2,y2)ObjBndBoxSet.append(BndBoxLoc) # if ObjName in ObjBndBoxSet:# ObjBndBoxSet1[ObjName].append(BndBoxLoc)#如果字典结构中含有这个类别了,那么这个目标框要追加到其值的末尾# else:# ObjBndBoxSet1[ObjName] = [BndBoxLoc]#如果字典结构中没有这个类别,那么这个目标框就直接赋值给其值吧 print(ObjBndBoxSet)#get the face [name,x01,y01,x02,y02] = ObjBndBoxSet[0]# print(len(ObjBndBoxSet))width = x02 - x01height = y02 - y01img1 = cv2.rectangle(im,(x01,y01),(x02,y02),(255,0,0),2)img2 = im[y01:y02, x01:x02]# plt.imshow(img2)# plt.show()# save the crop-image in dst_cropcv2.imwrite(dst_img_dir + '/' + img + '.jpeg',img2)# rewrite xml to dst_xmlxml_file = open((dst_xml_dir + '/' + img + '.xml'), 'w')xml_file.write('<annotation>n')xml_file.write('<folder>VOC2007</folder>n')xml_file.write('<filename>' +str(img) + '.jpeg' + '</filename>n')xml_file.write('<size>n')xml_file.write('<width>' + str(width) + '</width>n')xml_file.write('<height>' + str(height) + '</height>n')xml_file.write('<depth>3</depth>n')xml_file.write('</size>n')print("===========start rewrite bndbox==============")for x in ObjBndBoxSet[1:]:# print(x)[classname,x1,y1,x2,y2] = x x1 = x1 - x01y1 = y1 - y01x2 = x2 - x01y2 = y2 - y01 xml_file.write('<object>n')xml_file.write('<name>' + classname + '</name>n')xml_file.write('<pose>Unspecified</pose>n')xml_file.write('<truncated>0</truncated>n')xml_file.write('<difficult>0</difficult>n')xml_file.write('<bndbox>n')xml_file.write('<xmin>' + str(x1) + '</xmin>n')xml_file.write('<ymin>' + str(y1) + '</ymin>n') xml_file.write('<xmax>' + str(x2) + '</xmax>n')xml_file.write('<ymax>' + str(y2) + '</ymax>n')xml_file.write('</bndbox>n')xml_file.write('</object>n') xml_file.write('</annotation>')cnt += 1print(cnt)print("=======================finished!===================")
9.转csv为voc_xml格式,用于目标检测
'''
code by zzg-2020-06-02
'''
import os
import numpy as np
import codecs
import pandas as pd
import json
from glob import glob
import cv2
import shutil
from sklearn.model_selection import train_test_split
from IPython import embed#1.标签路径
csv_file = "train.csv"
saved_path = "VOC2007/" #保存路径
image_save_path = "./JPEGImages/"
image_raw_parh = "train/"#2.创建要求文件夹
if not os.path.exists(saved_path + "Annotations"):os.makedirs(saved_path + "Annotations")
if not os.path.exists(saved_path + "JPEGImages/"):os.makedirs(saved_path + "JPEGImages/")
if not os.path.exists(saved_path + "ImageSets/Main/"):os.makedirs(saved_path + "ImageSets/Main/")#3.获取待处理文件
data = pd.read_csv(csv_file,header=None,index_col=False,names=['image_id','width','height','bbox','source'])##合并相同名字的行
data_lite = data[['image_id','bbox']]
# print(data_lite)
data_lite['bbox'] = data_lite['bbox'].apply(lambda x: ','+ x)
data1 = data_lite.groupby(by='image_id').sum()
# data1 = data_lite.groupby(by='image_id')['bbox'].sum()
data1['bbox'] = data1['bbox'].apply(lambda x : x[1:])
data1 = data1[0:3373] ##去除最后一行标签
# print(data1)total_csv_annotations = {}
for row in data1.itertuples():# print(row[0],row[1])total_csv_annotations[row[0]] = row[1]##适用于没用行中没有相同名字的csv
# total_csv_annotations = {}
# annotations = pd.read_csv(csv_file,header=None).values
# print(annotations )# key = annotation[0].split(os.sep)[-1]
# value = np.array(annotation[3:])
# value = value[0]
# # print(key)
# # print(type(value))
# # print(value)
# # print(total_csv_annotations.keys())# # total_csv_annotations[key] = value
# total = total_csv_annotations
# print(total)#4.读取标注信息并写入 xml
# print(total_csv_annotations.items())count = 0
for filename,label in total_csv_annotations.items():#embed()# print(filename)count += 1print(count)height, width, channels = cv2.imread(image_raw_parh + filename + '.jpg').shape#embed()with codecs.open(saved_path + "Annotations/"+filename+'.xml',"w","utf-8") as xml:xml.write('<annotation>n')xml.write('t<folder>' + 'VOC2007' + '</folder>n')xml.write('t<filename>' + filename + '.jpg' + '</filename>n')xml.write('t<source>n')xml.write('tt<database>Unknown</database>n')xml.write('t</source>n')xml.write('t<size>n')xml.write('tt<width>'+ str(width) + '</width>n')xml.write('tt<height>'+ str(height) + '</height>n')xml.write('tt<depth>' + str(channels) + '</depth>n')xml.write('t</size>n')xml.write('tt<segmented>0</segmented>n')if isinstance(label,float):## 空白xml.write('</annotation>')continue# print(label)label = label.replace('[','').replace(']','').replace(' ', '').split(',')# print(label)box_cnt = len(label) // 4# print(label[3])for i in range(box_cnt):xmin = int(float(label[i*4]))ymin = int(float(label[i*4+1]))width = int(float(label[i*4+2]))height= int(float(label[i*4+3]))xmax = xmin + widthymax = ymin + height# classname = 'wheat'if xmax <= xmin:passelif ymax <= ymin:passelse:xml.write('t<object>n')xml.write('tt<name>'+'wheat'+'</name>n')xml.write('tt<pose>Unspecified</pose>n')xml.write('tt<truncated>1</truncated>n')xml.write('tt<difficult>0</difficult>n')xml.write('tt<bndbox>n')xml.write('ttt<xmin>' + str(xmin) + '</xmin>n')xml.write('ttt<ymin>' + str(ymin) + '</ymin>n')xml.write('ttt<xmax>' + str(xmax) + '</xmax>n')xml.write('ttt<ymax>' + str(ymax) + '</ymax>n')xml.write('tt</bndbox>n')xml.write('t</object>n')print(filename,xmin,ymin,xmax,ymax)xml.write('</annotation>')#5.split files for txt
txtsavepath = saved_path + "ImageSets/Main/"
ftrainval = open(txtsavepath+'/trainval.txt', 'w')
# ftest = open(txtsavepath+'/test.txt', 'w')
ftrain = open(txtsavepath+'/train.txt', 'w')
fval = open(txtsavepath+'/val.txt', 'w')
total_files = glob(saved_path+"./Annotations/*.xml")
total_files = [i.split("/")[-1].split(".xml")[0] for i in total_files]
#test_filepath = ""
for file in total_files:ftrainval.write(file + "n")#6.move images to voc JPEGImages folder
for image in glob(image_raw_parh+"/*.jpg"):shutil.copy(image,saved_path+image_save_path)train_files,val_files = train_test_split(total_files,test_size=0.15,random_state=42)for file in train_files:ftrain.write(file + "n")
#val
for file in val_files:fval.write(file + "n")ftrainval.close()
ftrain.close()
fval.close()