一、先了解一下深度学习是如何优化参数的
import torch
import mathclass Polynomial3(torch.nn.Module):def __init__(self):super().__init__()self.a = torch.nn.Parameter(torch.randn(()))self.b = torch.nn.Parameter(torch.randn(()))self.c = torch.nn.Parameter(torch.randn(()))self.d = torch.nn.Parameter(torch.randn(()))def forward(self, x):return self.a + self.b * x + self.c * x**2 + self.d * x**3def string(self):return f'y = {self.a.item()} + {self.b.item()}*x + {self.c.item()}*x^2 + {self.d.item()}*x^3'x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)model = Polynomial3()loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer_fn = torch.optim.SGD(model.parameters(),lr=1e-6)for t in range(1,2001):y_pred = model(x)# print(x.shape, y_pred.shape)loss = loss_fn(y_pred, y)if t % 100 == 0:print(f't = {t}, loss_value={loss.item()}')# 置零之前的梯度optimizer_fn.zero_grad()# 反向传播求参数梯度loss.backward()# 参数更新optimizer_fn.step()print(f'result: {model.string()}')
二、你已经看了一个利用torch进行参数优化的例子了,可以学会自己写参数优化的代码了
假如现在有一个函数 f ( x ) = a + b x + c x 2 f(x)=a+bx+cx^2 f(x)=a+bx+cx2,需要求出 a , b , c a,b,c a,b,c的参数。经过标注人员测试发现一组对应的 x , y x,y x,y 如下:
x=0, y=-1x=1, y=4x=2, y=13x=3, y=26x=4, y=43x=5, y=64x=6, y=89x=7, y=118x=8, y=151x=9, y=188x=10, y=229x=11, y=274x=12, y=323x=13, y=376x=14, y=433x=15, y=494x=16, y=559x=17, y=628x=18, y=701x=19, y=778x=20, y=859x=21, y=944x=22, y=1033x=23, y=1126x=24, y=1223x=25, y=1324x=26, y=1429x=27, y=1538x=28, y=1651x=29, y=1768x=30, y=1889x=31, y=2014x=32, y=2143x=33, y=2276x=34, y=2413x=35, y=2554x=36, y=2699x=37, y=2848x=38, y=3001x=39, y=3158x=40, y=3319x=41, y=3484x=42, y=3653x=43, y=3826x=44, y=4003x=45, y=4184x=46, y=4369x=47, y=4558x=48, y=4751x=49, y=4948x=50, y=5149x=51, y=5354x=52, y=5563x=53, y=5776x=54, y=5993x=55, y=6214x=56, y=6439x=57, y=6668x=58, y=6901x=59, y=7138x=60, y=7379x=61, y=7624x=62, y=7873x=63, y=8126x=64, y=8383x=65, y=8644x=66, y=8909x=67, y=9178x=68, y=9451x=69, y=9728x=70, y=10009x=71, y=10294x=72, y=10583x=73, y=10876x=74, y=11173x=75, y=11474x=76, y=11779x=77, y=12088x=78, y=12401x=79, y=12718x=80, y=13039x=81, y=13364x=82, y=13693x=83, y=14026x=84, y=14363x=85, y=14704x=86, y=15049x=87, y=15398x=88, y=15751x=89, y=16108x=90, y=16469x=91, y=16834x=92, y=17203x=93, y=17576x=94, y=17953x=95, y=18334x=96, y=18719x=97, y=19108x=98, y=19501x=99, y=19898
2.1 写代码
import torch
import math
import numpy as npclass customModel(torch.nn.Module):def __init__(self):super().__init__()self.a = torch.nn.Parameter(torch.randn(()))self.b = torch.nn.Parameter(torch.randn(()))self.c = torch.nn.Parameter(torch.randn(()))def forward(self, x):y = self.a + self.b * x + self.c * x**2return ydef string(self):return f'y = {self.a.item()} + {self.b.item()}*x + {self.c.item()}*x^2'if __name__ == '__main__':model = customModel()x = np.array([i for i in range(100)])x = torch.tensor(x,dtype=torch.float32)y = np.array([-1 + 3*i + 2*i**2 for i in range(100)])y = torch.tensor(y,dtype=torch.float32) # print(x.size)# for i in range(x.size):# print(x[i], y[i])loss_fn = torch.nn.MSELoss(reduction='sum')optimizer_fn = torch.optim.AdamW(model.parameters(), lr=1e-3,weight_decay=1e-3)for t in range(1, 80001):y_pred = model(x)# print(y_pred)loss = loss_fn(y_pred, y)if t % 100 == 0:print(f't = {t}, loss_value = {loss.item()}')optimizer_fn.zero_grad()loss.backward()optimizer_fn.step()print(f'result: {model.string()}')
2.2 结果
t = 79500, loss_value = 0.3282114565372467
t = 79600, loss_value = 0.021682187914848328
t = 79700, loss_value = 0.34126701951026917
t = 79800, loss_value = 5.102657794952393
t = 79900, loss_value = 0.6290067434310913
t = 80000, loss_value = 0.4637832045555115
result: y = -0.9952965378761292 + 2.999793767929077*x + 2.0000174045562744*x^2
可以看到 s e l f . a = − 0.9952965378761292 , s e l f . b = 2.999793767929077 , s e l f . c = 2.0000174045562744 self.a=-0.9952965378761292,self.b=2.999793767929077,self.c= 2.0000174045562744 self.a=−0.9952965378761292,self.b=2.999793767929077,self.c=2.0000174045562744,跟我们用来生成数据的函数 a = − 1 , b = 3 , c = 2 a=-1,b=3,c=2 a=−1,b=3,c=2是非常接近的。