首先看数据集路径:
cats和dogs存放的就是各种大小的猫狗图片。
读取数据集代码:
import os
import matplotlib.pyplot as plt
"""
读取数据 返回数据的文件夹名字,和具体的猫狗的路径
"""
def read_data():
#提取数据集的样本路径base_dir='./data/cats_and_dogs_filtered'train_dir=os.path.join(base_dir,'train')validation_dir=os.path.join(base_dir,'validation')train_cats_dir=os.path.join(train_dir,'cats')train_dogs_dir=os.path.join(train_dir,'dogs')validation_cats_dir=os.path.join(validation_dir,'cats')validation_dogs_dir=os.path.join(validation_dir,'dogs')#对狗和猫的图片名称提取存放在列表里train_cat_fnames=os.listdir(train_cats_dir)train_cat_fnames.sort()print(train_cat_fnames[:10])train_dog_fnames=os.listdir(train_dogs_dir)train_dog_fnames.sort()print(train_dog_fnames[:10])# print(len(os.listdir(train_cats_dir)))# pic_index=0# plt.figure(figsize=(16,16))#设置画布大小为1600×1600# # fig=plt.gcf()# # fig.set_size_inches(ncols*4,nrows*4)# pic_index+=8#提取数据集具体的路径进入列表中next_cat_pix=[os.path.join(train_cats_dir,fname)for fname in train_cat_fnames]#print(next_cat_pix)next_dog_pix=[os.path.join(train_dogs_dir,fname)for fname in train_dog_fnames]return train_dir,validation_dir,next_cat_pix,next_dog_pixdef test():train_dir,validation_dir,next_cat_pix,next_dog_pix=read_data()print(train_dir)print(validation_dir)print(next_dog_pix)nrows = 4ncols = 4for i,img_path in enumerate(next_cat_pix+next_dog_pix):if i<16:sp=plt.subplot(nrows,ncols,i+1)sp.axis('off')#去除轴img=plt.imread(img_path)#读取图片plt.imshow(img)plt.show()if __name__ == '__main__':# read_data()test()
打印结果:打印16张照片看看
模型代码:
import numpy as np
import matplotlib.pyplot as plt
import random
import data_read
import tensorflow as tf
from keras.models import Model
from keras import layers,optimizers
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator,img_to_array,load_img
"""
获得所需求的图片--未进行图像增强
"""
def data_deal():# 获取数据的路径train_dir, validation_dir, next_cat_pix, next_dog_pix = data_read.read_data()#像素缩小到0~1train_datagen=ImageDataGenerator(rescale=1./255)test_datagen=ImageDataGenerator(rescale=1./255)#从文件夹获取所需要求的图片#优点 能够根据train下的两个文件夹二分类train_generator=train_datagen.flow_from_directory(train_dir,target_size=(150,150),batch_size=20,class_mode='binary')test_generator = test_datagen.flow_from_directory(validation_dir,target_size=(150, 150),batch_size=20,class_mode='binary')# print(train_generator)# print(test_generator.samples)return train_generator,test_generator
"""
定义模型
"""
def define_model():
#定义TF backend session# tf_config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))# K.set_session(tf.Session(config=tf_config))#卷积过程 三层卷积img_input=layers.Input(shape=(150,150,3))x=layers.Conv2D(filters=16,kernel_size=(3,3),activation='relu')(img_input)print('第一次卷积尺寸={}'.format(x.shape))x=layers.MaxPooling2D(strides=(2,2))(x)print('第一次池化尺寸={}'.format(x.shape))x=layers.Conv2D(filters=32,kernel_size=(3,3),activation='relu')(x)print('第二次卷积尺寸={}'.format(x.shape))x=layers.MaxPooling2D(strides=(2,2))(x)print('第二次池化尺寸={}'.format(x.shape))x=layers.Conv2D(filters=64,kernel_size=(3,3),activation='relu')(x)print('第三次卷积尺寸={}'.format(x.shape))x=layers.MaxPooling2D(strides=(2,2))(x)print('第三次池化尺寸={}'.format(x.shape))#全连接层x=layers.Flatten()(x)x=layers.Dense(512,activation='relu')(x)output=layers.Dense(1,activation='sigmoid')(x)model=Model(inputs=img_input,outputs=output,name='CAT_DOG_Model')return img_input,model
"""
训练模型
"""
def train_model():#构建网络模型img_input,model=define_model()#编译模型model.compile(optimizer=optimizers.RMSprop(lr=0.001),loss='binary_crossentropy',metrics=['accuracy'])train_generator,test_generator=data_deal()#verbose:日志显示,0为不在标准输出流输出日志信息,1为输出进度条记录,2为每个epoch输出一行记录print('=============开始训练模型==================')#训练模型history=model.fit_generator(train_generator,steps_per_epoch=100,#2000 images=batch_szie*stepsepochs=10,validation_data=test_generator,validation_steps=50,#1000=20*50verbose=2)# 模型参数个数model = model.summary()# print(model)#精度acc=history.history['acc']val_acc=history.history['val_acc']print('训练集精度={}'.format(acc))print('验证集精度={}'.format(val_acc))#损失loss=history.history['loss']val_loss=history.history['val_loss']print('训练集损失值={}'.format(loss))print('验证集损失值={}'.format(val_loss))#epochs的数量epochs=range(len(acc))plt.plot(epochs,acc)plt.plot(epochs, val_acc)plt.title('training and validation accuracy')plt.figure()plt.plot(epochs, loss)plt.plot(epochs, val_loss)plt.title('training and validation loss')plt.show()"""
查看卷积层生成的图
"""
def visualize_model():img_input,model=define_model()# print(model.layers)#存储每一层的tensor的shape 类型等successive_outputs=[layer.output for layer in model.layers]print('查看输出={}'.format(successive_outputs))visualization_model=Model(img_input,successive_outputs)#从训练集例返回图片的地址train_dir, validation_dir, cat_img_files,dog_img_files = data_read.read_data()#返回随机一张图片的地址img_path=random.choice(cat_img_files+dog_img_files)img=load_img(img_path,target_size=(150,150))x=img_to_array(img)#print(x.shape)#变成(1,150,150,3)x=x.reshape((1,)+x.shape)x/=255#(samples,150,150,3) 存储10层的信息successive_feature_maps=visualization_model.predict(x)print('该模型结构层数={}'.format(len(successive_feature_maps)))for i in range(len(successive_feature_maps)):print('第{}层shape={}'.format(i,successive_feature_maps[i].shape))layer_names=[layer.name for layer in model.layers]#zip 打包成一个个元组以列表形式返回[(),()]#并且遍历元组里的内容images_per_row = 16for layer_name,feature_map in zip(layer_names,successive_feature_maps):if len(feature_map.shape)==4:#只查看卷积层n_features=feature_map.shape[-1]#(1,150,150,3)取3 取出深度size=feature_map.shape[1]##(1,150,150,3)取150 尺寸大小n_cols = n_features // images_per_rowdisplay_grid=np.zeros((size*n_cols,size*images_per_row))for col in range(n_cols):for row in range(images_per_row):x=feature_map[0,:,:,col*images_per_row+row]x-=x.mean()x/=(x.std()+0.001)x*=64x+=128#限定x的值大小 小于0 则为0 大于255则为255x=np.clip(x,0,255).astype('uint8')display_grid[col*size:(col+1)*size,row*size:(row+1)*size]=x#第一种显示方法scale=1./sizeplt.figure(figsize=(scale*display_grid.shape[1],scale*display_grid.shape[0]))plt.title(layer_name)plt.grid(False)plt.imshow(display_grid, aspect='auto', cmap='viridis')plt.savefig('{}+{}.jpg'.format(layer_name,col))#第二种显示方法# sp = plt.subplot(4, 4, i + 1)# sp.axis('off') # 去除轴# plt.imshow(display_grid[:,i*size:(i+1)*size],aspect='auto',cmap='viridis')plt.show()
if __name__ == '__main__':train_model()#visualize_model()# data_deal()
训练10个epoch打印结果:
可看出训练精度一直上升,损失值一直减少,测试精度上升一定就稳定了,且损失值一直上升,因为发生了过拟合,下一步就要解决过拟合。
调用卷积层可视化函数,打印
第一次卷积尺寸=(?, 148, 148, 16)
第一次池化尺寸=(?, 74, 74, 16)
第二次卷积尺寸=(?, 72, 72, 32)
第二次池化尺寸=(?, 36, 36, 32)
第三次卷积尺寸=(?, 34, 34, 64)
第三次池化尺寸=(?, 17, 17, 64)
的卷积图,如下:
可发现越到后面越模糊,因为提取了高级特征,具有泛化能力。