【人工智能基础】RNN实验

一、RNN特性

权重共享

word_i · weight + bais

持久记忆单元

word_i · weight_word + bais_word + h_i · weight_h + bais_h

二、公式化表达

h_t = f(h_{t - 1}, x_t)
h_t = tanh(W_hhh_{t - 1} + W_xhx_t)
y_t = W_hyh_t

三、RNN网络正弦波波形预测

环境准备

import numpy as np
import torch
from torch import nn,optim
from matplotlib import pyplot as plt# 时间轴采样数
num_time_steps = 50
input_size = 1
hidden_size = 16
output_size = 1
lr = 0.01

RNN类

class Net(nn.Module):def __init__(self,):super(Net, self).__init__()self.rnn = nn.RNN(input_size = input_size, hidden_size = hidden_size, num_layers = 1,# 格式为[batch, seq, feature]batch_first = True)for p in self.rnn.parameters():nn.init.normal_(p,mean=0.0, std=0.001)self.linear = nn.Linear(hidden_size, output_size)def forward(self, x, hidden_prev):out, hidden_prev = self.rnn(x, hidden_prev)# [1, seq, h] => [seq, h]out = out.view(-1,hidden_size)# [seq, h] => [seq, 1]out = self.linear(out)# [seq, 1] => [1, seq, 1], 需要和y做均方差out = out.unsqueeze(dim=0)return out, hidden_prev.clone()

正弦数据构建函数

def create_image():start = np.random.randint(3, size=1)[0]time_steps = np.linspace(start, start + 10, num_time_steps)data = np.sin(time_steps)data = data.reshape(num_time_steps, 1)x = torch.tensor(data[:-1]).float().view(1, num_time_steps - 1, 1)y = torch.tensor(data[1:]).float().view(1, num_time_steps - 1, 1)return time_steps,x, y

训练模型


model = Net()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr)hidden_prev = torch.zeros(1,1, hidden_size)
for iter in range(6000):time_steps,x, y = create_image()output, hidden_prev = model(x, hidden_prev)hidden_prev = hidden_prev.detach()loss = criterion(output,y)model.zero_grad()loss.backward()for p in model.parameters():torch.nn.utils.clip_grad_norm_(p,10)optimizer.step()if iter % 1000 == 0:plt.plot(time_steps[:-1], x.ravel(), c = 'b')plt.plot(time_steps[:-1], y.ravel(), c= 'r')plt.plot(time_steps[:-1], output.detach().numpy().ravel(), c= 'g')plt.show()print('Iteration:{} loss {}'.format(iter, loss.item()))

可以看到第二次绘制图像的时候，输出曲线基本拟合了目标曲线

未训练图像

训练后图像

图像预测

time_steps,x, y = create_image()predictions = []
# input = x[:, 0, :]
for i in range(x.shape[1]):input = x[:, i, :].view(1, 1, 1)(pred, hiden_prev) = model(input, hidden_prev)input = predpredictions.append(pred.detach().numpy().ravel()[0])x = x.data.numpy().ravel()y = y.data.numpy()
plt.scatter(time_steps[:-1], x.ravel(), s=90)
plt.plot(time_steps[:-1], x.ravel())plt.scatter(time_steps[1:],predictions)
plt.show()