数据集的载入,随机产生mini-batch放在tf_utils.py,代码如下
import h5py
import numpy as np
import tensorflow as tf
import mathdef load_dataset():train_dataset = h5py.File('datasets/train_signs.h5', "r")train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set featurestrain_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labelstest_dataset = h5py.File('datasets/test_signs.h5', "r")test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set featurestest_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labelsclasses = np.array(test_dataset["list_classes"][:]) # the list of classestrain_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classesdef random_mini_batches(X, Y, mini_batch_size, seed = 0):"""Creates a list of random minibatches from (X, Y)Arguments:X -- input data, of shape (input size, number of examples)Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)mini_batch_size - size of the mini-batches, integerseed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.Returns:mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)"""m = X.shape[1] # number of training examplesmini_batches = []np.random.seed(seed)# Step 1: Shuffle (X, Y)permutation = list(np.random.permutation(m))shuffled_X = X[:, permutation]shuffled_Y = Y[:, permutation]#.reshape((Y.shape[0],m))# Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionningfor k in range(0, num_complete_minibatches):mini_batch_X = shuffled_X[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]mini_batch_Y = shuffled_Y[:, k * mini_batch_size : k * mini_batch_size + mini_batch_size]mini_batch = (mini_batch_X, mini_batch_Y)mini_batches.append(mini_batch)# Handling the end case (last mini-batch < mini_batch_size)if m % mini_batch_size != 0:mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size : m]mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size : m]mini_batch = (mini_batch_X, mini_batch_Y)mini_batches.append(mini_batch)return mini_batchesdef convert_to_one_hot(Y, C):##Y.reshape(-1) 变成一行Y = np.eye(C)[Y.reshape(-1)].Treturn Ydef predict(X, parameters):W1 = tf.convert_to_tensor(parameters["W1"])b1 = tf.convert_to_tensor(parameters["b1"])W2 = tf.convert_to_tensor(parameters["W2"])b2 = tf.convert_to_tensor(parameters["b2"])W3 = tf.convert_to_tensor(parameters["W3"])b3 = tf.convert_to_tensor(parameters["b3"])params = {"W1": W1,"b1": b1,"W2": W2,"b2": b2,"W3": W3,"b3": b3}x = tf.placeholder("float", [12288, 1])z3 = forward_propagation_for_predict(x, params)p = tf.argmax(z3)sess = tf.Session()prediction = sess.run(p, feed_dict = {x: X})return predictiondef forward_propagation_for_predict(X, parameters):"""Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAXArguments:X -- input dataset placeholder, of shape (input size, number of examples)parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"the shapes are given in initialize_parametersReturns:Z3 -- the output of the last LINEAR unit"""# Retrieve the parameters from the dictionary "parameters" W1 = parameters['W1']b1 = parameters['b1']W2 = parameters['W2']b2 = parameters['b2']W3 = parameters['W3']b3 = parameters['b3'] # Numpy Equivalents:Z1 = tf.add(tf.matmul(W1, X), b1) # Z1 = np.dot(W1, X) + b1A1 = tf.nn.relu(Z1) # A1 = relu(Z1)Z2 = tf.add(tf.matmul(W2, A1), b2) # Z2 = np.dot(W2, a1) + b2A2 = tf.nn.relu(Z2) # A2 = relu(Z2)Z3 = tf.add(tf.matmul(W3, A2), b3) # Z3 = np.dot(W3,Z2) + b3return Z3
首先看数据集:
import tf_utils
import cv2
train_set_x_orig, train_set_Y, test_set_x_orig, test_set_Y, classes = tf_utils.load_dataset()
print('训练样本={}'.format(train_set_x_orig.shape))
print('训练样本标签={}'.format(train_set_Y.shape))
print('测试样本={}'.format(test_set_x_orig.shape))
print('测试样本标签={}'.format(test_set_Y.shape))
print('第五个样本={}'.format(train_set_Y[0,5]))
cv2.imshow('1.jpg',train_set_x_orig[5,:,:,:])
cv2.waitKey()
打印结果:可看出1080个训练样本,size为(64,64,3),其中手势数字用相应的数字代表,故后面要处理成one-hot(samples,6)
利用三层神经网络,W1=(25,64*64*3),W2=(12,25),W1=(6,12),输入X=(64*64*3,samples),最终y_pred=(6,samples),做一个转置与给定的真实y做损失,代码如下:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tf_utils
import cv2
"""
创建 placeholder
"""
def create_placeholder(n_x,n_y):X=tf.placeholder(tf.float32,shape=[n_x,None],name='X')Y = tf.placeholder(tf.float32, shape=[n_y, None], name='Y')return X,Y
"""
初始化权重
"""
def initialize_parameters():tf.set_random_seed(1)W1=tf.get_variable(name='W1',shape=[25,12288],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer(seed=1))b1 = tf.get_variable(name='b1', shape=[25, 1], dtype=tf.float32,initializer=tf.zeros_initializer())W2 = tf.get_variable(name='W2', shape=[12, 25], dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer(seed=1))b2 = tf.get_variable(name='b2', shape=[12, 1], dtype=tf.float32,initializer=tf.zeros_initializer())W3 = tf.get_variable(name='W3', shape=[6, 12], dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer(seed=1))b3 = tf.get_variable(name='b3', shape=[6, 1], dtype=tf.float32,initializer=tf.zeros_initializer())parameters={'W1': W1,'b1': b1,'W2': W2,'b2': b2,'W3': W3,'b3': b3}return parameters
"""
one-hot编码
"""
def convert_one_hot(Y,C):one_hot=np.eye(C)[Y.reshape(-1)].Treturn one_hot
"""
前向传播
"""
def forward_propagation(X,parameters):W1 = parameters['W1']b1 = parameters['b1']W2 = parameters['W2']b2 = parameters['b2']W3 = parameters['W3']b3 = parameters['b3']Z1=tf.add(tf.matmul(W1,X),b1)A1=tf.nn.relu(Z1)Z2 = tf.add(tf.matmul(W2, A1) , b2)A2 = tf.nn.relu(Z2)Z3 = tf.add(tf.matmul(W3, A2) , b3)return Z3
"""
计算损失值
"""
def compute_cost(Z3,Y):Z_input=tf.transpose(Z3) ##转置Y = tf.transpose(Y) ####tf.nn.softmax_cross_entropy_w 要求shape是(number of examples,num_class)cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=Z_input,labels=Y))return cost
"""
构建模型
"""
def model(train_X,train_Y,test_X,test_Y,learning_rate,num_pochs,minibatch_size):tf.set_random_seed(1)seed=3(n_x,m)=train_X.shape #(12288,1080)costs=[]n_y=train_Y.shape[0] #(6,1080)X, Y = create_placeholder(n_x, n_y)parameters = initialize_parameters()Z3 = forward_propagation(X, parameters)#print(Z3)cost = compute_cost(Z3, Y)optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)init = tf.global_variables_initializer()sess = tf.Session()sess.run(init)for i in range(num_pochs):epoch_cost=0mini_batches=tf_utils.random_mini_batches(train_X,train_Y,minibatch_size,seed)num_minibatches=int(m/minibatch_size)for mini_batche in mini_batches:(mini_batche_X,mini_batche_Y)=mini_batche_,temp_cost=sess.run([optimizer,cost],feed_dict={X:mini_batche_X,Y:mini_batche_Y})epoch_cost += temp_cost / num_minibatchesif i%100==0:#cost=sess.run(cost,feed_dict={X:mini_batche_X,Y:mini_batche_Y})print('after {} iterations minibatch_cost={}'.format(i,epoch_cost))costs.append(epoch_cost)plt.plot(costs)plt.xlabel('iterations')plt.ylabel('cost')plt.title('learning_rate={}'.format(learning_rate))plt.show()parameters=sess.run(parameters)#print('parameters={}'.format(parameters))correct_prediction=tf.equal(tf.argmax(Z3,0),tf.argmax(Y,0))##0 代表按列取索引最大值 1代表行索引最大值accuarcy=tf.reduce_mean(tf.cast(correct_prediction,'float'))print('train accuarcy is',sess.run(accuarcy,feed_dict={X: train_X,Y: train_Y}))print('test accuarcy is ',sess.run(accuarcy,feed_dict={X: test_X, Y: test_Y}))return parameters
"""
测试模型
"""
def test_model():train_set_x_orig, train_set_Y, test_set_x_orig, test_set_Y, classes = tf_utils.load_dataset()train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0],train_set_x_orig.shape[1] * train_set_x_orig.shape[2] * 3).Ttest_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0],test_set_x_orig.shape[1] * test_set_x_orig.shape[2] * 3).Ttrain_X = train_set_x_flatten / 255 #(12288,1080)test_X = test_set_x_flatten / 255train_Y = convert_one_hot(train_set_Y,6)#(6,1080)#print('train_y',train_Y.shape)test_Y = convert_one_hot(test_set_Y, 6)parameters=model(train_X, train_Y, test_X, test_Y, learning_rate=0.0001, num_pochs=1000, minibatch_size=32)img = cv2.imread('thumbs_up.jpg')imgsize = cv2.resize(img, (64, 64), interpolation=cv2.INTER_CUBIC).reshape(1,64*64*3).Tcv2.imshow('imgsize', imgsize)image_predict=tf_utils.predict(imgsize,parameters)print(image_predict)
if __name__ == '__main__':test_model()
打印结果:
下图的预测结果是1 符合