文章目录
- LSTM基础理论
- 从零开始实现LSTM
- 简洁版LSTM实现
- 参考资料
LSTM基础理论
关于LSTM的基础理论不再赘述,可以参考资料:
- RNN神经网络-LSTM模型结构
- https://github.com/ShusenTang/Dive-into-DL-PyTorch/blob/master/docs/chapter06_RNN/6.8_lstm.md
从零开始实现LSTM
- 导入所需的依赖包,获取设备:
import torch
from torch import nn, optim
import numpy as np
import zipfiledevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- 定义数据加载代码,这里以周杰伦的歌词数据为例:
def load_data_jay_lyrics():with zipfile.ZipFile('./data/jaychou_lyrics.txt.zip') as zin:with zin.open('jaychou_lyrics.txt') as f:corpus_chars = f.read().decode('utf-8')# corpus_chars[:40] # '想要有直升机\n想要和你飞到宇宙去\n想要和你融化在一起\n融化在宇宙里\n我每天每天每'# 将换行符替换成空格;仅使用前1万个字符来训练模型corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ')corpus_chars = corpus_chars[0:10000]# 将每个字符映射成索引idx_to_char = list(set(corpus_chars))char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])vocab_size = len(char_to_idx) # 1027corpus_indices = [char_to_idx[char] for char in corpus_chars]sample = corpus_indices[:20]return corpus_indices, char_to_idx, idx_to_char, vocab_size
- 定义参数初始化函数:
def get_params():def _one(shape):ts = torch.tensor(np.random.normal(0, 0.01, size=shape), device=device, dtype=torch.float32)return torch.nn.Parameter(ts, requires_grad=True)def _three():return (_one((num_inputs, num_hiddens)),_one((num_inputs, num_hiddens)),torch.nn.Parameter(torch.zeros(num_hiddens, device=device, dtype=torch.float32), requires_grad=True))W_xi, W_hi, b_i = _three() # 输入门参数W_xf, W_hf, b_f = _three() # 遗忘门参数W_xo, W_ho, b_o = _three() # 输出门参数W_xc, W_hc, b_c = _three() # 候选记忆细胞参数# 输出层参数W_hq = _one((num_hiddens, num_outputs))b_q = torch.nn.Parameter(torch.zeros(num_outputs, device=device, dtype=torch.float32), requires_grad=True)return nn.ParameterList([W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hq, b_q])# 加载周杰伦歌词数据
corpus_indices, char_to_idx, idx_to_char, vocab_size = load_data_jay_lyrics()
num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_sizedef init_lstm_state(batch_size, num_hiddens, device):return (torch.zeros((batch_size, num_hiddens), device=device),torch.zeros((batch_size, num_hiddens), device=device))
- 定义LSTM模型:
def lstm(inputs, state, params):[W_xi, W_hi, b_i, W_xf, W_hf, b_f, W_xo, W_ho, b_o, W_xc, W_hc, b_c, W_hq, b_q] = params(H, C) = stateoutputs = []for X in inputs:I = torch.sigmoid(torch.matmul(X, W_xi) + torch.matmul(H, W_hi) + b_i)F = torch.sigmoid(torch.matmul(X, W_xf) + torch.matmul(H, W_hf) + b_f)O = torch.sigmoid(torch.matmul(X, W_xo) + torch.matmul(H, W_ho) + b_o)C_tilda = torch.tanh(torch.matmul(X, W_xc) + torch.matmul(H, W_hc) + b_c)C = F * C + I * C_tildaH = O * C.tanh()Y = torch.matmul(H, W_hq) + b_qoutputs.append(Y)return outputs, (H, C)
- 训练模型。
num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2
pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开']train_and_predict_rnn(lstm, get_params, init_lstm_state, num_hiddens,vocab_size, device, corpus_indices, idx_to_char,char_to_idx, False, num_epochs, num_steps, lr,clipping_theta, batch_size, pred_period, pred_len,prefixes)
这里
train_and_predict_rnn
为训练函数,可以参考
https://github.com/ShusenTang/Dive-into-DL-PyTorch/blob/master/docs/chapter06_RNN/6.5_rnn-pytorch.md
中的实现。
简洁版LSTM实现
- 定义RNNModel类:
# 本类已保存在d2lzh_pytorch包中方便以后使用
class RNNModel(nn.Module):def __init__(self, rnn_layer, vocab_size):super(RNNModel, self).__init__()self.rnn = rnn_layerself.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1) self.vocab_size = vocab_sizeself.dense = nn.Linear(self.hidden_size, vocab_size)self.state = Nonedef forward(self, inputs, state): # inputs: (batch, seq_len)# 获取one-hot向量表示X = to_onehot(inputs, self.vocab_size) # X是个listY, self.state = self.rnn(torch.stack(X), state)# 全连接层会首先将Y的形状变成(num_steps * batch_size, num_hiddens),它的输出# 形状为(num_steps * batch_size, vocab_size)output = self.dense(Y.view(-1, Y.shape[-1]))return output, self.state
def to_onehot(X, n_class): # X shape: (batch, seq_len), output: seq_len elements of (batch, n_class)return [one_hot(X[:, i], n_class) for i in range(X.shape[1])]
- 定义LSTM模型并训练模型:
lr = 1e-2 # 注意调整学习率lstm_layer = nn.LSTM(input_size=vocab_size, hidden_size=num_hiddens)model = RNNModel(lstm_layer, vocab_size)train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device,corpus_indices, idx_to_char, char_to_idx,num_epochs, num_steps, lr, clipping_theta,batch_size, pred_period, pred_len, prefixes)
参考资料
- https://github.com/ShusenTang/Dive-into-DL-PyTorch/blob/master/docs/chapter06_RNN/6.8_lstm.md