import torch
from torch import nn
import numpy as np
import matplotlib.pyplot as plt# torch.manual_seed(1) # reproducible# Hyper Parameters
INPUT_SIZE =1# rnn input size / image width
LR =0.02# learning rateclassRNN(nn.Module):def__init__(self):super(RNN, self).__init__()self.rnn = nn.RNN(input_size=1,hidden_size=32,# rnn hidden unitnum_layers=1,# number of rnn layerbatch_first=True,# input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size))self.out = nn.Linear(32,1)defforward(self, x, h_state):# x (batch, time_step, input_size)# h_state (n_layers, batch, hidden_size)# r_out (batch, time_step, output_size)r_out, h_state = self.rnn(x, h_state)outs =[]# this is where you can find torch is dynamicfor time_step inrange(r_out.size(1)):# calculate output for each time stepouts.append(self.out(r_out[:, time_step,:]))return torch.stack(outs, dim=1), h_staternn = RNN()print(rnn)optimizer = torch.optim.Adam(rnn.parameters(), lr=LR)# optimize all cnn parameters
loss_func = nn.MSELoss()# the target label is not one-hottedh_state =None# for initial hidden stateplt.figure(1, figsize=(12,5))
plt.ion()# continuously plot######################## Below is different ######################################### static time steps ########### for step in range(60):# start, end = step * np.pi, (step+1)*np.pi # time steps# # use sin predicts cos# steps = np.linspace(start, end, 10, dtype=np.float32)################ dynamic time steps #########
step =0for i inrange(60):dynamic_steps = np.random.randint(1,4)# has random time stepsstart, end = step * np.pi,(step + dynamic_steps)* np.pi # different time steps lengthstep += dynamic_steps# use sin predicts cossteps = np.linspace(start, end,10* dynamic_steps, dtype=np.float32)####################### Above is different ###########################print(len(steps))# print how many time step feed to RNNx_np = np.sin(steps)# float32 for converting torch FloatTensory_np = np.cos(steps)x = torch.from_numpy(x_np[np.newaxis,:, np.newaxis])# shape (batch, time_step, input_size)y = torch.from_numpy(y_np[np.newaxis,:, np.newaxis])prediction, h_state = rnn(x, h_state)# rnn output# !! next step is important !!h_state = h_state.data # repack the hidden state, break the connection from last iterationloss = loss_func(prediction, y)# cross entropy lossoptimizer.zero_grad()# clear gradients for this training steploss.backward()# backpropagation, compute gradientsoptimizer.step()# apply gradients# plottingplt.plot(steps, y_np.flatten(),'r-')plt.plot(steps, prediction.data.numpy().flatten(),'b-')plt.draw()plt.pause(0.05)plt.ioff()
plt.show()
5.3过拟合Dropout
import torch
import matplotlib.pyplot as plt# torch.manual_seed(1) # reproducibleN_SAMPLES =20
N_HIDDEN =300# training data
x = torch.unsqueeze(torch.linspace(-1,1, N_SAMPLES),1)
y = x +0.3*torch.normal(torch.zeros(N_SAMPLES,1), torch.ones(N_SAMPLES,1))# test data
test_x = torch.unsqueeze(torch.linspace(-1,1, N_SAMPLES),1)
test_y = test_x +0.3*torch.normal(torch.zeros(N_SAMPLES,1), torch.ones(N_SAMPLES,1))# show data
plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.5, label='train')
plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.5, label='test')
plt.legend(loc='upper left')
plt.ylim((-2.5,2.5))
plt.show()net_overfitting = torch.nn.Sequential(torch.nn.Linear(1, N_HIDDEN),torch.nn.ReLU(),torch.nn.Linear(N_HIDDEN, N_HIDDEN),torch.nn.ReLU(),torch.nn.Linear(N_HIDDEN,1),)net_dropped = torch.nn.Sequential(torch.nn.Linear(1, N_HIDDEN),torch.nn.Dropout(0.5),# drop 50% of the neurontorch.nn.ReLU(),torch.nn.Linear(N_HIDDEN, N_HIDDEN),torch.nn.Dropout(0.5),# drop 50% of the neurontorch.nn.ReLU(),torch.nn.Linear(N_HIDDEN,1),)print(net_overfitting)# net architectureprint(net_dropped)optimizer_ofit = torch.optim.Adam(net_overfitting.parameters(), lr=0.01)
optimizer_drop = torch.optim.Adam(net_dropped.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss()plt.ion()# something about plottingfor t inrange(500):pred_ofit = net_overfitting(x)pred_drop = net_dropped(x)loss_ofit = loss_func(pred_ofit, y)loss_drop = loss_func(pred_drop, y)optimizer_ofit.zero_grad()optimizer_drop.zero_grad()loss_ofit.backward()loss_drop.backward()optimizer_ofit.step()optimizer_drop.step()if t %10==0:# change to eval mode in order to fix drop out effectnet_overfitting.eval()net_dropped.eval()# parameters for dropout differ from train mode一旦我们用测试集进行结果测试的时候,一定要使用net.eval()把dropout关掉# plottingplt.cla()test_pred_ofit = net_overfitting(test_x)test_pred_drop = net_dropped(test_x)plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.3, label='train')plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.3, label='test')plt.plot(test_x.data.numpy(), test_pred_ofit.data.numpy(),'r-', lw=3, label='overfitting')plt.plot(test_x.data.numpy(), test_pred_drop.data.numpy(),'b--', lw=3, label='dropout(50%)')plt.text(0,-1.2,'overfitting loss=%.4f'% loss_func(test_pred_ofit, test_y).data.numpy(), fontdict={'size':20,'color':'red'})plt.text(0,-1.5,'dropout loss=%.4f'% loss_func(test_pred_drop, test_y).data.numpy(), fontdict={'size':20,'color':'blue'})plt.legend(loc='upper left'); plt.ylim((-2.5,2.5));plt.pause(0.1)# change back to train modenet_overfitting.train()net_dropped.train()plt.ioff()
plt.show()
5.4批标准化
import torch
from torch import nn
from torch.nn import init
import torch.utils.data as Data
import matplotlib.pyplot as plt
import numpy as np# torch.manual_seed(1) # reproducible# np.random.seed(1)# Hyper parameters
N_SAMPLES =2000
BATCH_SIZE =64
EPOCH =12
LR =0.03
N_HIDDEN =8
ACTIVATION = torch.tanh
B_INIT =-0.2# use a bad bias constant initializer# training data
x = np.linspace(-7,10, N_SAMPLES)[:, np.newaxis]
noise = np.random.normal(0,2, x.shape)
y = np.square(x)-5+ noise# test data
test_x = np.linspace(-7,10,200)[:, np.newaxis]
noise = np.random.normal(0,2, test_x.shape)
test_y = np.square(test_x)-5+ noisetrain_x, train_y = torch.from_numpy(x).float(), torch.from_numpy(y).float()
test_x = torch.from_numpy(test_x).float()
test_y = torch.from_numpy(test_y).float()train_dataset = Data.TensorDataset(train_x, train_y)
train_loader = Data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2,)# show data
plt.scatter(train_x.numpy(), train_y.numpy(), c='#FF9359', s=50, alpha=0.2, label='train')
plt.legend(loc='upper left')classNet(nn.Module):def__init__(self, batch_normalization=False):super(Net, self).__init__()self.do_bn = batch_normalizationself.fcs =[]self.bns =[]self.bn_input = nn.BatchNorm1d(1, momentum=0.5)# for input datafor i inrange(N_HIDDEN):# build hidden layers and BN layersinput_size =1if i ==0else10fc = nn.Linear(input_size,10)setattr(self,'fc%i'% i, fc)# IMPORTANT set layer to the Moduleself._set_init(fc)# parameters initializationself.fcs.append(fc)if self.do_bn:bn = nn.BatchNorm1d(10, momentum=0.5)setattr(self,'bn%i'% i, bn)# IMPORTANT set layer to the Moduleself.bns.append(bn)self.predict = nn.Linear(10,1)# output layerself._set_init(self.predict)# parameters initializationdef_set_init(self, layer):init.normal_(layer.weight, mean=0., std=.1)init.constant_(layer.bias, B_INIT)defforward(self, x):pre_activation =[x]if self.do_bn: x = self.bn_input(x)# input batch normalizationlayer_input =[x]for i inrange(N_HIDDEN):x = self.fcs[i](x)pre_activation.append(x)if self.do_bn: x = self.bns[i](x)# batch normalizationx = ACTIVATION(x)layer_input.append(x)out = self.predict(x)return out, layer_input, pre_activationnets =[Net(batch_normalization=False), Net(batch_normalization=True)]# print(*nets) # print net architectureopts =[torch.optim.Adam(net.parameters(), lr=LR)for net in nets]loss_func = torch.nn.MSELoss()defplot_histogram(l_in, l_in_bn, pre_ac, pre_ac_bn):for i,(ax_pa, ax_pa_bn, ax, ax_bn)inenumerate(zip(axs[0,:], axs[1,:], axs[2,:], axs[3,:])):[a.clear()for a in[ax_pa, ax_pa_bn, ax, ax_bn]]if i ==0:p_range =(-7,10);the_range =(-7,10)else:p_range =(-4,4);the_range =(-1,1)ax_pa.set_title('L'+str(i))ax_pa.hist(pre_ac[i].data.numpy().ravel(), bins=10,range=p_range, color='#FF9359', alpha=0.5);ax_pa_bn.hist(pre_ac_bn[i].data.numpy().ravel(), bins=10,range=p_range, color='#74BCFF', alpha=0.5)ax.hist(l_in[i].data.numpy().ravel(), bins=10,range=the_range, color='#FF9359');ax_bn.hist(l_in_bn[i].data.numpy().ravel(), bins=10,range=the_range, color='#74BCFF')for a in[ax_pa, ax, ax_pa_bn, ax_bn]: a.set_yticks(());a.set_xticks(())ax_pa_bn.set_xticks(p_range);ax_bn.set_xticks(the_range)axs[0,0].set_ylabel('PreAct');axs[1,0].set_ylabel('BN PreAct');axs[2,0].set_ylabel('Act');axs[3,0].set_ylabel('BN Act')plt.pause(0.01)if __name__ =="__main__":f, axs = plt.subplots(4, N_HIDDEN +1, figsize=(10,5))plt.ion()# something about plottingplt.show()# traininglosses =[[],[]]# recode loss for two networksfor epoch inrange(EPOCH):print('Epoch: ', epoch)layer_inputs, pre_acts =[],[]for net, l inzip(nets, losses):net.eval()# set eval mode to fix moving_mean and moving_varpred, layer_input, pre_act = net(test_x)l.append(loss_func(pred, test_y).data.item())layer_inputs.append(layer_input)pre_acts.append(pre_act)net.train()# free moving_mean and moving_varplot_histogram(*layer_inputs,*pre_acts)# plot histogramfor step,(b_x, b_y)inenumerate(train_loader):for net, opt inzip(nets, opts):# train for each networkpred, _, _ = net(b_x)loss = loss_func(pred, b_y)opt.zero_grad()loss.backward()opt.step()# it will also learns the parameters in Batch Normalizationplt.ioff()# plot training lossplt.figure(2)plt.plot(losses[0], c='#FF9359', lw=3, label='Original')plt.plot(losses[1], c='#74BCFF', lw=3, label='Batch Normalization')plt.xlabel('step');plt.ylabel('test loss');plt.ylim((0,2000));plt.legend(loc='best')# evaluation# set net to eval mode to freeze the parameters in batch normalization layers[net.eval()for net in nets]# set eval mode to fix moving_mean and moving_varpreds =[net(test_x)[0]for net in nets]plt.figure(3)plt.plot(test_x.data.numpy(), preds[0].data.numpy(), c='#FF9359', lw=4, label='Original')plt.plot(test_x.data.numpy(), preds[1].data.numpy(), c='#74BCFF', lw=4, label='Batch Normalization')plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='r', s=50, alpha=0.2, label='train')plt.legend(loc='best')plt.show()
鲜为人知的6个黑科技网站Pandas is the go-to Python library for data analysis and manipulation. It provides numerous functions and methods that expedice the data analysis process.Pandas是用于数据分析和处理的Python库。 它提供了加速数据分析过程的众多功能和方法…
大熊猫卸妆后数据科学 (Data Science) Pandas is used mainly for reading, cleaning, and extracting insights from data. We will see an advanced use of Pandas which are very important to a Data Scientist. These operations are used to analyze data and manipulate…
数据eda数据科学和机器学习统计 (STATISTICS FOR DATA SCIENCE AND MACHINE LEARNING) Categorical variables are the ones where the possible values are provided as a set of options, it can be pre-defined or open. An example can be the gender of a person. In the …
jdk重启后步行“永远不要做出预测,尤其是关于未来的预测。” (KK Steincke) (“Never Make Predictions, Especially About the Future.” (K. K. Steincke)) Does this picture portray a horse or a car? 这张照片描绘的是马还是汽车? How likely is …
mongodb仲裁者Coming out of college with a background in mathematics, I fell upward into the rapidly growing field of data analytics. It wasn’t until years later that I realized the incredible power that comes with the position. As Uncle Ben told Peter Par…
优化 回归应用数据科学 (Applied data science) Price and quantity are two fundamental measures that determine the bottom line of every business, and setting the right price is one of the most important decisions a company can make. Under-pricing hurts the co…
大数据数据科学家常用面试题During my time as a Data Scientist, I had the chance to interview my fair share of candidates for data-related roles. While doing this, I started noticing a pattern: some kinds of (simple) mistakes were overwhelmingly frequent amo…
scrapy模拟模拟点击复杂系统 (Complex Systems) In our daily life, we encounter many complex systems where individuals are interacting with each other such as the stock market or rush hour traffic. Finding appropriate models for these complex systems may give…