这是学习《Neural Network and Deep Learning》的笔记。在刚开始学习时,电脑安装的是python3,而作者用的是2.我想着反正要熟悉代码,大学期间MATLAB用的比较多,也懒得去折腾python2或是重写成python3.于是用MATLAB完成了里面的实例和例题。这本书很适合新手,特别是像我这种非科班的人。通过启发式的思想引导读者由浅入深,自己动手操作代码,成就感很强。这个笔记主要是书本里例子的操作,还有习题的解答,难免会有错误,欢迎指正。
下面就是前两章的主要代码,后面的内容都是在这基础上修改的。
function NW(a)
%NW([784,30,10])
global NetWork;
NetWork.length = length(a);
for i=2:1:NetWork.length
NetWork.bias{i-1} = randn(a(i),1);
NetWork.weight{i-1} = randn(a(i),a(i-1));
end
end
function update_mini_batch(mini_batch,eta,mini_batch_size)
global NetWork;
for i=1:1:NetWork.length-1;
nabla_bi{i} = zeros(size(NetWork.bias{i}));
end
for i=1:1:NetWork.length-1;
nabla_wi{i} = zeros(size(NetWork.weight{i}));
end
for i=1:1:mini_batch_size
[delta_nabla_b,delta_nabla_w] = backprop(mini_batch{1,1}(:,i),mini_batch{1,2}(i));
for j=1:NetWork.length-1
nabla_bi{j} = nabla_bi{j} + delta_nabla_b{j};
nabla_wi{j} = nabla_wi{j} + delta_nabla_w{j};
end
end
for k = 1:NetWork.length-1
NetWork.weight{k}=NetWork.weight{k}-(eta/mini_batch_size)*nabla_wi{k};
NetWork.bias{k}=NetWork.bias{k}-(eta/mini_batch_size)*nabla_bi{k};
end
end
function [nabla_b,nabla_w] = backprop(x,y)
global NetWork;
for i=1:1:NetWork.length-1;
nabla_b{i} = zeros(size(NetWork.bias{i}));
end
for i=1:1:NetWork.length-1;
nabla_w{i} = zeros(size(NetWork.weight{i}));
end
% 向前传播
activation = x./256;
activations{1} = activation;
for i=1:NetWork.length-1
z = NetWork.weight{i}*activation+NetWork.bias{i};
zs{i} = z;
activation = sigmoid(z);
activations{i+1} = activation;
end
% 向后传播
% 输出层误差:
a = cost_derivative(activations{NetWork.length},y);
b = sigmoid_prime(zs{NetWork.length-1});
delta = cost_derivative(activations{NetWork.length},y).*sigmoid_prime(zs{NetWork.length-1});
nabla_b{NetWork.length-1} = delta;
nabla_w{NetWork.length-1} = delta*activations{NetWork.length-1}';
for i=NetWork.length-2:-1:1
z = zs{i};
sp = sigmoid_prime(z);
delta = (NetWork.weight{i+1}'*delta).*sp;
nabla_b{i}=delta;
nabla_w{i}=delta*activations{i}';
end
end
function c = cost_derivative(output_activations,y)
y1 = zeros(10,1);
y1(y+1) = 1;
c = output_activations - y1;
end
function s = sigmoid(z)
s = (1./(1+exp(-z)));
end
function sp = sigmoid_prime(z)
sp = (sigmoid(z).*(1-sigmoid(z)));
end
```