一、准备数据集
kagglecatsanddogs网上一搜一大堆,这里我就不上传了,需要的话可以私信
导包
import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
猫和狗的照片各12500张
print(len(os.listdir('./temp/cats/')))
print(len(os.listdir('./temp/dogs/')))
"""
12500
12500
"""
生成训练数据文件夹和测试数据文件夹
import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfiledef create_dir(file_dir):if os.path.exists(file_dir):print("True")shutil.rmtree(file_dir)#删除再创建os.makedirs(file_dir)else:os.makedirs(file_dir)cat_source_dir = "./temp/cats/"
train_cats_dir = "./temp/train/cats/"
test_cats_dir = "./temp/test/cats/"dot_source_dir = "./temp/dogs/"
train_dogs_dir = "./temp/train/dogs/"
test_dogs_dir = "./temp/test/dogs/"create_dir(train_cats_dir)#创建猫的训练集文件夹
create_dir(test_cats_dir)#创建猫的测试集文件夹
create_dir(train_dogs_dir)#创建狗的训练集文件夹
create_dir(test_dogs_dir)#创建狗的测试集文件夹"""
True
True
True
True
"""
将总的猫狗图像按9:1分成训练集和测试集,猫和狗各12500张
最终temp/train/cats
和temp/train/dogs
两个文件夹下各12500 * 0.9=11250张
temp/test/cats
和temp/test/dogs
这两个文件夹下各12500 * 0.1=1250张
cats和dogs为总共的猫狗图像
test和train为准备的数据集文件
import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfiledef split_data(source,train,test,split_size):files = []for filename in os.listdir(source):file = source + filenameif os.path.getsize(file)>0:files.append(filename)else:print(filename + "is zero file,please ignoring")train_length = int(len(files)*split_size)test_length = int(len(files)-train_length)shuffled_set = random.sample(files,len(files))train_set = shuffled_set[0:train_length]test_set = shuffled_set[-test_length:]for filename in train_set:this_file = source + filenamedestination = train + filenamecopyfile(this_file,destination)for filename in test_set:this_file = source + filenamedestination = test + filenamecopyfile(this_file,destination)cat_source_dir = "./temp/cats/"
train_cats_dir = "./temp/train/cats/"
test_cats_dir = "./temp/test/cats/"dot_source_dir = "./temp/dogs/"
train_dogs_dir = "./temp/train/dogs/"
test_dogs_dir = "./temp/test/dogs/"split_size = 0.9
split_data(cat_source_dir,train_cats_dir,test_cats_dir,split_size)
split_data(dog_source_dir,train_dogs_dir,test_dogs_dir,split_size)
二、模型的搭建和训练
先对数据进行归一化操作,预处理进行优化一下
import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfiletrain_dir = "./temp/train/"
train_datagen = ImageDataGenerator(rescale=1.0/255.0)#优化网络,先进行归一化操作
train_generator = train_datagen.flow_from_directory(train_dir,batch_size=100,class_mode='binary',target_size=(150,150))#二分类,训练样本的输入的要一致validation_dir = "./temp/test/"
validation_datagen = ImageDataGenerator(rescale=1.0/255.0)
validation_generator = validation_datagen.flow_from_directory(validation_dir,batch_size=100,class_mode='binary',target_size=(150,150))
"""
Found 22500 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.
"""
搭建模型架构
model = tf.keras.models.Sequential([tf.keras.layers.Conv2D(16,(3,3),activation='relu',input_shape=(150,150,3)),tf.keras.layers.MaxPooling2D(2,2),tf.keras.layers.Conv2D(32,(3,3),activation='relu'),tf.keras.layers.MaxPooling2D(2,2),tf.keras.layers.Conv2D(64,(3,3),activation='relu'),tf.keras.layers.MaxPooling2D(2,2),tf.keras.layers.Flatten(),tf.keras.layers.Dense(512,activation='relu'),tf.keras.layers.Dense(1,activation='sigmoid')
])
model.compile(optimizer=RMSprop(lr=0.001),loss='binary_crossentropy',metrics=['acc'])
训练模型
225:因为数据一共22500张,猫和狗各12500张,其对于训练集个11250张,故训练集共22500张,在预处理第一段代码中,batch_size=100设置了一批100个,故总共应该有225批
epochs=2:两轮,也就是所有的样本全部训练一次
每轮包含225批,每一批有100张样本
history = model.fit_generator(train_generator,epochs=2,#进行2轮训练,每轮255批verbose=1,#要不记录每次训练的日志,1表示记录validation_data=validation_generator)"""
Instructions for updating:
Use tf.cast instead.
Epoch 1/2
131/225 [================>.............] - ETA: 2:03 - loss: 0.7204 - acc: 0.6093
"""
history是模型运行过程的结果
三、分析训练结果
import matplotlib.image as mpimg
import matplotlib.pyplot as pltacc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']epochs = range(len(acc))
epoch太少了,导致是直线,多训练几轮实际应该是折线图
准确率
plt.plot(epochs,acc,'r',"training accuracy")
plt.plot(epochs,val_acc,'b',"validation accuracy")
plt.title("training and validation accuracy")
plt.figure()
损失值
plt.plot(epochs,loss,'r',"training loss")
plt.plot(epochs,val_loss,'b',"validation loss")
plt.figure()
四、模型的使用验证
import numpy as np
from google.colab import files
from tensorflow.keras.preprocessing import imageuploaded = files.upload()
for fn in uploaded.keys():path = 'G:/Juptyer_workspace/Tensorflow_mooc/sjj/test/' + fn#该路径为要用模型测试的路径img = image.load_img(path,target_size=(150,150))x = image.img_to_array(img)#多维数组x = np.expand_dims(x,axis=0)#拉伸images = np.vstack([x])#水平方向拉直classes = model.predict(images,batch_size=10)print(classes[0])if classes[0]>0.5:print(fn + "it is a dog")else:print(fn + "it is a cat")