文章目录
- 1. nn模块
- 2. torch.optim 优化器
- 3. 自定义nn模块
- 4. 权重共享
参考 http://pytorch123.com/
1. nn模块
import torch
N, D_in, Hidden_size, D_out = 64, 1000, 100, 10
torch.nn.Sequential
建立模型,跟 keras 很像
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)model = torch.nn.Sequential(torch.nn.Linear(D_in, Hidden_size),torch.nn.ReLU(),torch.nn.Linear(Hidden_size, D_out)
)# 损失函数
loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-4
loss_list = []for t in range(500):y_pred = model(x) # 前向传播loss = loss_fn(y_pred, y) # 损失loss_list.append(loss.item())print(t, loss.item())model.zero_grad() # 清零梯度loss.backward() # 反向传播,计算梯度with torch.no_grad(): # 更新参数,不计入网络图的操作当中for param in model.parameters():param -= learning_rate*param.grad # 更新参数
# 绘制损失
import pandas as pd
loss_curve = pd.DataFrame(loss_list, columns=['loss'])
loss_curve.plot()
2. torch.optim 优化器
torch.optim.Adam
使用优化器optimizer.zero_grad() # 清零梯度
optimizer.step() # 更新参数
learning_rate = 1e-4optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)loss_list = []
for t in range(500):y_pred = model(x) # 前向传播loss = loss_fn(y_pred, y) # 损失loss_list.append(loss.item())print(t, loss.item())optimizer.zero_grad() # 清零梯度loss.backward() # 反向传播,计算梯度optimizer.step() # 更新参数
3. 自定义nn模块
- 继承
nn.module
,并定义forward
前向传播函数
import torch
class myModel(torch.nn.Module):def __init__(self, D_in, Hidden_size, D_out):super(myModel, self).__init__()self.fc1 = torch.nn.Linear(D_in, Hidden_size)self.fc2 = torch.nn.Linear(Hidden_size, D_out)def forward(self, x):x = self.fc1(x).clamp(min=0) # clamp 修剪数据在 min - max 之间,relu的作用x = self.fc2(x)return x
N, D_in, Hidden_size, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)model = myModel(D_in, Hidden_size, D_out) # 自定义模型loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)loss_val = []for t in range(500):y_pred = model(x)loss = loss_fn(y_pred, y)loss_val.append(loss.item())optimizer.zero_grad()loss.backward()optimizer.step()import pandas as pd
loss_val = pd.DataFrame(loss_val, columns=['loss'])
loss_val.plot()
4. 权重共享
- 建立一个有3种FC层的玩具模型,中间
shareFC
层会被 for 循环重复 0-3 次(随机),这几层(次数随机)的参数是共享的
import random
import torchclass shareParamsModel(torch.nn.Module):def __init__(self, D_in, Hidden_size, D_out):super(shareParamsModel, self).__init__()self.inputFC = torch.nn.Linear(D_in, Hidden_size)self.shareFC = torch.nn.Linear(Hidden_size, Hidden_size)self.outputFC = torch.nn.Linear(Hidden_size, D_out)self.sharelayers = 0 # 记录随机出了多少层def forward(self, x):x = self.inputFC(x).clamp(min=0)self.sharelayers = 0for _ in range(random.randint(0, 3)):x = self.shareFC(x).clamp(min=0)self.sharelayers += 1x = self.outputFC(x)return x
N, D_in, Hidden_size, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)model = shareParamsModel(D_in, Hidden_size, D_out)loss_fn = torch.nn.MSELoss(reduction='sum')optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)loss_val = []for t in range(500):y_pred = model(x)print('share layers: ', model.sharelayers)loss = loss_fn(y_pred, y)loss_val.append(loss.item())optimizer.zero_grad()loss.backward()optimizer.step()for p in model.parameters():print(p.size())import pandas as pd
loss_val = pd.DataFrame(loss_val, columns=['loss'])
loss_val.plot()
输出:
share layers: 1
share layers: 0
share layers: 2
share layers: 1
share layers: 2
share layers: 1
share layers: 0
share layers: 1
share layers: 0
share layers: 0
share layers: 3
share layers: 3
。。。省略
参数数量,多次运行,均为以下结果
torch.Size([100, 1000])
torch.Size([100])
torch.Size([100, 100])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])